Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,5 @@ cython_debug/
reports/
output
test_results

.DS_Store
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Change log

### 0.3.2
- Fix duplicate polygon `_id` generation in OSM→OSW export by assigning sequential IDs per feature type.
- Remap edge `_u_id`/`_v_id` and zone `_w_id` references to exported node IDs so references stay consistent after ID normalization.
- Harden OSM ID remapping in normalizer output with deterministic per-type counters and relation-member type-aware reference rewrites.

### 0.3.1
- Preserve custom `ext:*` features across all geometries: ext-only points keep numeric IDs (no `p` prefix), ext-only lines/polygons are retained, and custom attributes are emitted in the appropriate GeoJSON file.
- Add schema-safe handling for ext-only geometries during construction to avoid missing-ref crashes.
Expand Down
23 changes: 21 additions & 2 deletions src/example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import asyncio
from osm_osw_reformatter import Formatter
import argparse

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

Expand All @@ -27,6 +28,24 @@ def osw_convert():
# f.cleanup()


# if __name__ == '__main__':
# asyncio.run(osm_convert())
# osw_convert()

def main():
parser = argparse.ArgumentParser(description='Convert between OSM and OSW')
parser.add_argument('-i', '--input', required=True, help='input file path')
parser.add_argument('-o', '--output', required=True, help='output directory')
parser.add_argument('-s', '--mode', required=True, choices=['OSW2OSM', 'OSM2OSW'], help='conversion mode')
args = parser.parse_args()

os.makedirs(args.output, exist_ok=True)
f = Formatter(workdir=args.output, file_path=args.input)

if args.mode == 'OSM2OSW':
asyncio.run(f.osm2osw())
else:
f.osw2osm()

if __name__ == '__main__':
asyncio.run(osm_convert())
osw_convert()
main()
102 changes: 74 additions & 28 deletions src/osm_osw_reformatter/serializer/osm/osm_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,43 +614,49 @@ def to_geojson(self, *args) -> None:
zones_path = args[4]
polygons_path = args[5]

_id = 1
edge_features = []
for u, v, d in self.G.edges(data=True):
d_copy = {**d}
d_copy['_id'] = str(_id)
_id += 1
d_copy['_u_id'] = str(u)
d_copy['_v_id'] = str(v)

d_copy['ext:osm_id'] = str(d['osm_id'])

if 'osm_id' in d_copy:
d_copy.pop('osm_id')

if 'segment' in d_copy:
d_copy.pop('segment')

geometry = mapping(d_copy.pop('geometry'))

edge_features.append(
{'type': 'Feature', 'geometry': geometry, 'properties': d_copy}
)
edges_fc = {**OSW_JSON_HEADER, **{"features": edge_features}}
edge_id_counter = 1
node_id_counter = 1
point_id_counter = 1
line_id_counter = 1
zone_id_counter = 1
polygon_id_counter = 1

def _source_id(node_key):
id_str = str(node_key)
if isinstance(node_key, str) and id_str[:1] in {"p", "l", "z", "g"}:
return id_str[1:]
return id_str

def _assign_ids(properties, new_id, source_id):
properties["_id"] = str(new_id)
if "ext:osm_id" not in properties:
properties["ext:osm_id"] = str(properties.get("osm_id", source_id))
properties.pop("osm_id", None)

def _remap_node_ref(ref, node_id_map):
if ref in node_id_map:
return node_id_map[ref]
try:
ref_int = int(ref)
except (TypeError, ValueError):
ref_int = None
if ref_int is not None and ref_int in node_id_map:
return node_id_map[ref_int]
return str(ref)

node_features = []
point_features = []
line_features = []
zone_features = []
polygon_features = []
node_id_map = {}
for n, d in self.G.nodes(data=True):
d_copy = {**d}
id_str = str(n)
trimmed_id = id_str[1:] if isinstance(n, str) else id_str
d_copy["_id"] = trimmed_id
d_copy['ext:osm_id'] = str(d_copy.get('osm_id', d_copy["_id"]))
source_id = _source_id(n)

if OSWPointNormalizer.osw_point_filter(d):
_assign_ids(d_copy, point_id_counter, source_id)
point_id_counter += 1
geometry = mapping(d_copy.pop("geometry"))

if "lon" in d_copy:
Expand All @@ -663,25 +669,33 @@ def to_geojson(self, *args) -> None:
{"type": "Feature", "geometry": geometry, "properties": d_copy}
)
elif OSWLineNormalizer.osw_line_filter(d):
_assign_ids(d_copy, line_id_counter, source_id)
line_id_counter += 1
geometry = mapping(d_copy.pop("geometry"))

line_features.append(
{"type": "Feature", "geometry": geometry, "properties": d_copy}
)
elif OSWZoneNormalizer.osw_zone_filter(d):
_assign_ids(d_copy, zone_id_counter, source_id)
zone_id_counter += 1
geometry = mapping(d_copy.pop("geometry"))

zone_features.append(
{"type": "Feature", "geometry": geometry, "properties": d_copy}
)
elif OSWPolygonNormalizer.osw_polygon_filter(d):
_assign_ids(d_copy, polygon_id_counter, source_id)
polygon_id_counter += 1
geometry = mapping(d_copy.pop("geometry"))

polygon_features.append(
{"type": "Feature", "geometry": geometry, "properties": d_copy}
)
else:
d_copy['_id'] = str(n)
_assign_ids(d_copy, node_id_counter, source_id)
node_id_map[n] = d_copy["_id"]
node_id_counter += 1

geometry = mapping(d_copy.pop('geometry'))

Expand All @@ -694,6 +708,38 @@ def to_geojson(self, *args) -> None:
node_features.append(
{'type': 'Feature', 'geometry': geometry, 'properties': d_copy}
)

for zone_feature in zone_features:
props = zone_feature.get("properties", {})
w_ids = props.get("_w_id")
if isinstance(w_ids, list):
props["_w_id"] = [str(_remap_node_ref(ref, node_id_map)) for ref in w_ids]
elif w_ids is not None:
props["_w_id"] = str(_remap_node_ref(w_ids, node_id_map))

edge_features = []
for u, v, d in self.G.edges(data=True):
d_copy = {**d}
d_copy['_id'] = str(edge_id_counter)
edge_id_counter += 1
d_copy['_u_id'] = str(node_id_map.get(u, u))
d_copy['_v_id'] = str(node_id_map.get(v, v))

d_copy['ext:osm_id'] = str(d['osm_id'])

if 'osm_id' in d_copy:
d_copy.pop('osm_id')

if 'segment' in d_copy:
d_copy.pop('segment')

geometry = mapping(d_copy.pop('geometry'))

edge_features.append(
{'type': 'Feature', 'geometry': geometry, 'properties': d_copy}
)
edges_fc = {**OSW_JSON_HEADER, **{"features": edge_features}}

nodes_fc = {**OSW_JSON_HEADER, **{"features": node_features}}
points_fc = {**OSW_JSON_HEADER, **{"features": point_features}}
lines_fc = {**OSW_JSON_HEADER, **{"features": line_features}}
Expand Down
91 changes: 75 additions & 16 deletions src/osm_osw_reformatter/serializer/osm/osm_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ def process_output(self, osmnodes, osmways, osmrelations):
node_id_map = {}
way_id_map = {}
rel_id_map = {}
next_node_id = 1
next_way_id = 1
next_rel_id = 1

def _set_id_tag(osm_obj, new_id):
tags = getattr(osm_obj, "tags", None)
Expand All @@ -260,22 +263,34 @@ def _set_id_tag(osm_obj, new_id):
else:
tags["_id"] = value

def _member_type(member):
for attr in ("type", "member_type", "objtype", "element_type"):
value = getattr(member, attr, None)
if isinstance(value, str):
normalized = value.lower()
if normalized in ("node", "way", "relation"):
return normalized
return None

# Remap node IDs sequentially starting at 1
for node in osmnodes:
old_id = getattr(node, "id", None)
if old_id is None:
continue
new_id = len(node_id_map) + 1
node_id_map[old_id] = new_id
new_id = next_node_id
next_node_id += 1
# Keep first mapping for refs that still point to source IDs.
node_id_map.setdefault(old_id, new_id)
node.id = new_id
_set_id_tag(node, new_id)

# Remap way IDs and rewrite node refs
for way in osmways:
old_id = getattr(way, "id", None)
if old_id is not None:
new_id = len(way_id_map) + 1
way_id_map[old_id] = new_id
new_id = next_way_id
next_way_id += 1
way_id_map.setdefault(old_id, new_id)
way.id = new_id
_set_id_tag(way, new_id)

Expand All @@ -285,12 +300,14 @@ def _set_id_tag(osm_obj, new_id):
for ref in node_refs:
if isinstance(ref, int):
if ref not in node_id_map:
new_id = len(node_id_map) + 1
new_id = next_node_id
next_node_id += 1
node_id_map[ref] = new_id
new_refs.append(node_id_map.get(ref, ref))
elif hasattr(ref, "id"):
if ref.id not in node_id_map:
new_id = len(node_id_map) + 1
new_id = next_node_id
next_node_id += 1
node_id_map[ref.id] = new_id
ref.id = node_id_map.get(ref.id, ref.id)
new_refs.append(ref)
Expand All @@ -310,8 +327,9 @@ def _set_id_tag(osm_obj, new_id):
for rel in osmrelations:
old_id = getattr(rel, "id", None)
if old_id is not None:
new_id = len(rel_id_map) + 1
rel_id_map[old_id] = new_id
new_id = next_rel_id
next_rel_id += 1
rel_id_map.setdefault(old_id, new_id)
rel.id = new_id
_set_id_tag(rel, new_id)

Expand All @@ -320,16 +338,57 @@ def _set_id_tag(osm_obj, new_id):
if not hasattr(member, "ref"):
continue
ref = member.ref
m_type = _member_type(member)
if isinstance(ref, int):
if ref not in node_id_map and ref not in way_id_map and ref not in rel_id_map:
new_id = len(rel_id_map) + 1
rel_id_map[ref] = new_id
member.ref = node_id_map.get(ref, way_id_map.get(ref, rel_id_map.get(ref, ref)))
if m_type == "node":
if ref not in node_id_map:
new_id = next_node_id
next_node_id += 1
node_id_map[ref] = new_id
member.ref = node_id_map.get(ref, ref)
elif m_type == "way":
if ref not in way_id_map:
new_id = next_way_id
next_way_id += 1
way_id_map[ref] = new_id
member.ref = way_id_map.get(ref, ref)
elif m_type == "relation":
if ref not in rel_id_map:
new_id = next_rel_id
next_rel_id += 1
rel_id_map[ref] = new_id
member.ref = rel_id_map.get(ref, ref)
else:
if ref not in node_id_map and ref not in way_id_map and ref not in rel_id_map:
new_id = next_rel_id
next_rel_id += 1
rel_id_map[ref] = new_id
member.ref = node_id_map.get(ref, way_id_map.get(ref, rel_id_map.get(ref, ref)))
elif hasattr(ref, "id"):
if ref.id not in node_id_map and ref.id not in way_id_map and ref.id not in rel_id_map:
new_id = len(rel_id_map) + 1
rel_id_map[ref.id] = new_id
ref.id = node_id_map.get(ref.id, way_id_map.get(ref.id, rel_id_map.get(ref.id, ref.id)))
if m_type == "node":
if ref.id not in node_id_map:
new_id = next_node_id
next_node_id += 1
node_id_map[ref.id] = new_id
ref.id = node_id_map.get(ref.id, ref.id)
elif m_type == "way":
if ref.id not in way_id_map:
new_id = next_way_id
next_way_id += 1
way_id_map[ref.id] = new_id
ref.id = way_id_map.get(ref.id, ref.id)
elif m_type == "relation":
if ref.id not in rel_id_map:
new_id = next_rel_id
next_rel_id += 1
rel_id_map[ref.id] = new_id
ref.id = rel_id_map.get(ref.id, ref.id)
else:
if ref.id not in node_id_map and ref.id not in way_id_map and ref.id not in rel_id_map:
new_id = next_rel_id
next_rel_id += 1
rel_id_map[ref.id] = new_id
ref.id = node_id_map.get(ref.id, way_id_map.get(ref.id, rel_id_map.get(ref.id, ref.id)))

# Ensure deterministic ordering now that IDs have been remapped
if hasattr(osmnodes, "sort"):
Expand Down
2 changes: 1 addition & 1 deletion src/osm_osw_reformatter/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.3.1'
__version__ = '0.3.2'
37 changes: 37 additions & 0 deletions tests/unit_tests/test_files/bug_2942.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?><osm version="0.6" generator="TDEI exporter" upload="false">
<node visible="true" version="1" id="1" lat="47.6436499" lon="-122.1429456"/>
<node visible="true" version="1" id="2" lat="47.6436495" lon="-122.1430253"><tag k="ext:osm_id" v="14"/></node>
<node visible="true" version="1" id="3" lat="47.6435058" lon="-122.1433361"/>
<node visible="true" version="1" id="4" lat="47.6433557" lon="-122.1433316"/>
<node visible="true" version="1" id="5" lat="47.6433557" lon="-122.1432618"/>
<node visible="true" version="1" id="6" lat="47.6435058" lon="-122.1432618"/>
<node visible="true" version="1" id="7" lat="47.6436499" lon="-122.1429282"><tag k="ext:osm_id" v="25"/><tag k="ext:test_field" v="custom_value"/></node>
<node visible="true" version="1" id="8" lat="47.6435058" lon="-122.1433361"/>
<node visible="true" version="1" id="9" lat="47.6437429" lon="-122.1429105"><tag k="ext:osm_id" v="28"/><tag k="ext:test_field" v="custom_value"/></node>
<node visible="true" version="1" id="10" lat="47.6437636" lon="-122.1429432"><tag k="ext:osm_id" v="3"/><tag k="ext:test_field" v="custom_value"/></node>
<node visible="true" version="1" id="11" lat="47.6435058" lon="-122.1433361"/>
<node visible="true" version="1" id="12" lat="47.6433557" lon="-122.1433316"/>
<node visible="true" version="1" id="13" lat="47.6433557" lon="-122.1432618"/>
<node visible="true" version="1" id="14" lat="47.6435058" lon="-122.1432618"/>
<node visible="true" version="1" id="15" lat="47.6435058" lon="-122.1433361"/>
<node visible="true" version="1" id="16" lat="47.6437325" lon="-122.1429432"/>
<node visible="true" version="1" id="17" lat="47.6438124" lon="-122.1429647"/>
<node visible="true" version="1" id="18" lat="47.6437338" lon="-122.1430452"/>
<node visible="true" version="1" id="19" lat="47.6438124" lon="-122.1429446"/>
<node visible="true" version="1" id="20" lat="47.6437896" lon="-122.1430443"/>
<node visible="true" version="1" id="21" lat="47.6438272" lon="-122.1429446"/>
<node visible="true" version="1" id="22" lat="47.6437896" lon="-122.1429422"/>
<node visible="true" version="1" id="23" lat="47.6438272" lon="-122.1429647"/>
<node visible="true" version="1" id="24" lat="47.6437338" lon="-122.1429422"/>
<node visible="true" version="1" id="25" lat="47.6438124" lon="-122.1429647"/>
<node visible="true" version="1" id="26" lat="47.6439343" lon="-122.1429531"/>
<node visible="true" version="1" id="27" lat="47.6438391" lon="-122.1429522"/>
<node visible="true" version="1" id="28" lat="47.6437345" lon="-122.1429278"><tag k="ext:osm_id" v="29"/><tag k="ext:man_made" v="manhole"/></node>
<node visible="true" version="1" id="29" lat="47.643988" lon="-122.1429563"><tag k="natural" v="tree"/><tag k="leaf_type" v="broadleaved"/><tag k="ext:osm_id" v="30"/><tag k="leaf_cycle" v="deciduous"/></node>
<way visible="true" version="1" id="1"><nd ref="3"/><nd ref="4"/><nd ref="5"/><nd ref="6"/><nd ref="8"/><tag k="ext:osm_id" v="2"/><tag k="ext:building" v="yes"/></way>
<way visible="true" version="1" id="2"><nd ref="11"/><nd ref="12"/><nd ref="13"/><nd ref="14"/><nd ref="15"/><tag k="ext:osm_id" v="4"/><tag k="ext:building" v="yes"/></way>
<way visible="true" version="1" id="3"><nd ref="17"/><nd ref="19"/><nd ref="21"/><nd ref="23"/><nd ref="25"/><tag k="natural" v="wood"/><tag k="leaf_type" v="broadleaved"/><tag k="ext:osm_id" v="6"/><tag k="leaf_cycle" v="deciduous"/></way>
<way visible="true" version="1" id="4"><nd ref="2"/><nd ref="1"/><tag k="footway" v="traffic_island"/><tag k="highway" v="footway"/><tag k="incline" v="-0.013"/><tag k="surface" v="asphalt"/><tag k="ext:osm_id" v="5"/></way>
<way visible="true" version="1" id="5"><nd ref="16"/><nd ref="18"/><nd ref="20"/><nd ref="22"/><nd ref="24"/><tag k="ext:length" v="16.1"/><tag k="ext:osm_id" v="6"/><tag k="ext:barrier" v="fence"/></way>
<way visible="true" version="1" id="6"><nd ref="26"/><nd ref="27"/><tag k="natural" v="tree_row"/><tag k="leaf_type" v="broadleaved"/><tag k="ext:osm_id" v="7"/><tag k="leaf_cycle" v="deciduous"/></way>
</osm>
Loading