diff --git a/.gitignore b/.gitignore index a64789c..b6f4430 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ osm2ma/.idea *.pyc *.coverage *.py,cover -osm2ma/testfiles/output_shp/ \ No newline at end of file +osm2ma/testfiles/output_shp/ +geofabrik/ \ No newline at end of file diff --git a/osm2ma/configengine.py b/osm2ma/configengine.py index 0cb8e41..671a5db 100644 --- a/osm2ma/configengine.py +++ b/osm2ma/configengine.py @@ -71,9 +71,9 @@ def _init_db_tables(self): osm_key_value text, element_icon text, comment text, - useful text, + useful text COLLATE NOCASE, data_category text, - cat_value text, + cat_value text COLLATE NOCASE, data_theme text, theme_value text, conforms_to_hierarchy text, @@ -91,7 +91,8 @@ def _init_db_tables(self): cat_value text, theme_value text, osm_element, - geom_type text + geom_type text, + osm_metadata boolean ); create table shpf_list ( @@ -137,6 +138,13 @@ def _populate_scratch_table(self): """ Duplicate config table which has been normalised wrt geometry type. + TODO: ammend this to properly handle the generic OSM tags + TODO: update fixtures for this. + + 1st; Run existing insert query but EXCLUDING cat_value = 'OSM' + 2nd; For each unique cat_value, theme_value pair now in scratch, insert from config where cat_value = 'OSM', but + replacing with the loop's current values of cat_value, theme_value + :return: None """ @@ -150,7 +158,8 @@ def _populate_scratch_table(self): cat_value, theme_value, osm_element, - geom_type + geom_type, + osm_metadata ) select osm_key_name, @@ -158,12 +167,54 @@ def _populate_scratch_table(self): cat_value, theme_value, osm_element, - '{geom}' + '{geom}', + 'FALSE' from config where config.geom_str like '%{geom}%' + and config.useful = 'y' + and config.cat_value != 'osms' '''.format(geom=geom) cur.execute(u_sql) + u_sql = ''' + select + cat_value, + theme_value, + geom_type + from scratch + group by + cat_value, + theme_value, + geom_type + ''' + + for cat, theme, geom in cur.execute(u_sql).fetchall(): + #print 'cat = {}, theme = {}, geom = {}'.format(cat, theme, geom) + u_sql = ''' + INSERT INTO scratch ( + osm_key_name, + osm_key_value, + cat_value, + theme_value, + osm_element, + geom_type, + osm_metadata + ) + select + osm_key_name, + osm_key_value, + '{cat_value}', + '{theme_value}', + osm_element, + '{geom}', + 'TRUE' + from config where + config.geom_str like '%{geom}%' + and config.useful = 'y' + and config.cat_value == 'osms' + '''.format(cat_value=cat, theme_value=theme, geom=geom) + cur.execute(u_sql) + def _populate_shpfile_table(self, geo_extd, scale): """ Fill list of shapefile, aggregating details of individual attributes. @@ -191,8 +242,8 @@ def _populate_shpfile_table(self, geo_extd, scale): ), scratch.cat_value, scratch.geom_type, - attriblist(scratch.osm_key_name), - condition_clause(scratch.osm_key_name, scratch.osm_key_value) + attriblist(scratch.osm_key_name, scratch.osm_metadata), + condition_clause(scratch.osm_key_name, scratch.osm_key_value, scratch.osm_metadata) FROM scratch GROUP BY shpf_name( @@ -231,8 +282,8 @@ def _init_db_funcs(self): :return: None """ self.db.create_function("shpf_name", 5, ConfigXWalk._create_shpf_name) - self.db.create_aggregate("attriblist", 1, _AttribList) - self.db.create_aggregate("condition_clause", 2, _SelectClause) + self.db.create_aggregate("attriblist", 2, _AttribList) + self.db.create_aggregate("condition_clause", 3, _SelectClause) def get_xwalk(self): """ @@ -295,13 +346,19 @@ class _AttribList: """ def __init__(self): self.set_attribs = set() + self.set_meta = set() - def step(self, value): + def step(self, value, meta): if len(value) > 0: - self.set_attribs.add(value) + if 'true' == meta.lower(): + self.set_meta.add(value) + else: + self.set_attribs.add(value) def finalize(self): - return ", ".join(sorted(self.set_attribs)) + attribs = ", ".join(sorted(self.set_attribs)) + meta = ", ".join(sorted(self.set_meta)) + return "{}, {}".format(attribs, meta) class _SelectClause: @@ -317,20 +374,21 @@ def __init__(self): self.query_args = dict() self.exclude_keys = set() - def step(self, osm_key, osm_value): - if (type(osm_value) == unicode) and ( - osm_value.lower() in {u'*', u'user defined', u'number', u'url or article title'}): - self.exclude_keys.add(osm_key) - else: - for val in osm_value.split(u'/'): - # TODO: This might be dangerous, because you can't be - # sure that the val or key won't contain a quote, for - # example. - # FIXME: Use params to execute? - unique_clause = u"'{key}'='{val}'".format( - key=osm_key, val=val.strip() - ) - self.query_args[unique_clause] = osm_key + def step(self, osm_key, osm_value, osm_meta): + if u'false' == osm_meta.lower(): + if (type(osm_value) == unicode) and ( + osm_value.lower() in {u'*', u'user defined', u'number', u'url or article title'}): + self.exclude_keys.add(osm_key) + else: + for val in osm_value.split(u'/'): + # TODO: This might be dangerous, because you can't be + # sure that the val or key won't contain a quote, for + # example. + # FIXME: Use params to execute? + unique_clause = u"'{key}'='{val}'".format( + key=osm_key, val=val.strip() + ) + self.query_args[unique_clause] = osm_key def finalize(self): cleaned_pairs = set() diff --git a/osm2ma/fixtures.py b/osm2ma/fixtures.py index aa70e7e..18bdec7 100644 --- a/osm2ma/fixtures.py +++ b/osm2ma/fixtures.py @@ -15,6 +15,11 @@ def path_to_fixtures_xls(): test_script_path = os.path.abspath(os.path.dirname(__file__)) return os.path.join(test_script_path, r"testfiles", r"fixtures.xls") +example_pbf = os.path.join(os.path.abspath(os.path.dirname(__file__)), + r"testfiles", + r"oxfordshire-latest.osm.pbf" + ) + _workbook = xlrd.open_workbook(os.path.realpath(path_to_fixtures_xls())) rawconf_good = _workbook.name_map.get("rawconf_good")[0].area2d(clipped=True) rawconf_invalid_heirarchy = _workbook.name_map.get("rawconf_invalid_heirarchy")[0].area2d(clipped=True) @@ -23,104 +28,189 @@ def path_to_fixtures_xls(): rawconf_wrong_column_order = _workbook.name_map.get("rawconf_wrong_column_order")[0].area2d(clipped=True) scratch_table_good = [ - (u'aeroway', u'aerodrome', u'tran', u'air', u'Node Area', u'pt'), - (u'aeroway',u'User defined', u'tran', u'air', u'Node Way', u'pt'), - (u'military', u'airfield', u'tran', u'air', u'Node Area', u'pt'), - (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'pt'), - (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'pt'), - (u'boundary', u'user defined', u'admn', u'ad', u'Node Way', u'pt'), - (u'fixme', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'pt'), - (u'source', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'pt'), - (u'source', u'historical', u'osms', u'met', u'Node Way Area Relation', u'pt'), - (u'source:name', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'pt'), - (u'wikipedia', u'URL or article title', u'osms', u'met', u'Node Way Area Relation', u'pt'), - (u'aeroway', u'runway', u'tran', u'air', u'Way Area', u'ln'), - (u'aeroway', u'User defined', u'tran', u'air', u'Node Way', u'ln'), - (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'ln'), - (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'ln'), - (u'border_type', u'*',u'admn', u'ad', u'Way Area', u'ln'), - (u'boundary', u'user defined', u'admn', u'ad', u'Node Way', u'ln'), - (u'fixme', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'ln'), - (u'source', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'ln'), - (u'source', u'historical', u'osms', u'met', u'Node Way Area Relation', u'ln'), - (u'source:name', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'ln'), - (u'wikipedia', u'URL or article title', u'osms',u'met', u'Node Way Area Relation', u'ln'), - (u'aeroway', u'aerodrome', u'tran', u'air', u'Node Area', u'py'), - (u'aeroway', u'runway', u'tran', u'air', u'Way Area', u'py'), - (u'military', u'airfield', u'tran', u'air', u'Node Area', u'py'), - (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'py'), - (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'py'), - (u'boundary', u'administrative', u'admn', u'ad', u'Area', u'py'), - (u'boundary', u'maritime', u'admn', u'ad', u'Area', u'py'), - (u'boundary', u'political', u'admn', u'ad', u'Area', u'py'), - (u'border_type', u'*', u'admn', u'ad', u'Way Area', u'py'), - (u'admin_level', u'Number', u'admn', u'ad', u'Area', u'py'), - (u'fixme', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'py'), - (u'source', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'py'), - (u'source', u'historical', u'osms', u'met', u'Node Way Area Relation', u'py'), - (u'source:name', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'py'), - (u'wikipedia', u'URL or article title', u'osms', u'met', u'Node Way Area Relation', u'py'), - (u'fixme', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'rel'), - (u'source', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'rel'), - (u'source', u'historical', u'osms', u'met', u'Node Way Area Relation', u'rel'), - (u'source:name', u'User defined', u'osms', u'met', u'Node Way Area Relation', u'rel'), - (u'wikipedia', u'URL or article title', u'osms', u'met', u'Node Way Area Relation', u'rel') + (u'aeroway', u'aerodrome', u'tran', u'air', u'Node Area', u'pt', u'FALSE'), + (u'aeroway',u'User defined', u'tran', u'air', u'Node Way', u'pt', u'FALSE'), + (u'military', u'airfield', u'tran', u'air', u'Node Area', u'pt', u'FALSE'), + (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'pt', u'FALSE'), + (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'pt', u'FALSE'), + (u'boundary', u'user defined', u'admn', u'ad', u'Node Way', u'pt', u'FALSE'), + (u'aeroway', u'runway', u'tran', u'air', u'Way Area', u'ln', u'FALSE'), + (u'aeroway', u'User defined', u'tran', u'air', u'Node Way', u'ln', u'FALSE'), + (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'ln', u'FALSE'), + (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'ln', u'FALSE'), + (u'border_type', u'*',u'admn', u'ad', u'Way Area', u'ln', u'FALSE'), + (u'boundary', u'user defined', u'admn', u'ad', u'Node Way', u'ln', u'FALSE'), + (u'aeroway', u'aerodrome', u'tran', u'air', u'Node Area', u'py', u'FALSE'), + (u'aeroway', u'runway', u'tran', u'air', u'Way Area', u'py', u'FALSE'), + (u'military', u'airfield', u'tran', u'air', u'Node Area', u'py', u'FALSE'), + (u'iata', u'User Defined', u'tran', u'air', u'Node Way Area', u'py', u'FALSE'), + (u'icao', u'User Defined', u'tran', u'air', u'Node Way Area', u'py', u'FALSE'), + (u'boundary', u'administrative', u'admn', u'ad', u'Area', u'py', u'FALSE'), + (u'boundary', u'political', u'admn', u'ad', u'Area', u'py', u'FALSE'), + (u'border_type', u'*', u'admn', u'ad', u'Way Area', u'py', u'FALSE'), + (u'admin_level', u'Number', u'admn', u'ad', u'Area', u'py', u'FALSE'), + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE') +] + +temp_scratch_table_osm_only = [ + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'fixme', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'historical', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source:name', u'User defined', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'wikipedia', u'URL or article title', u'admn', u'ad', u'Node Way Area Relation', u'py', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'ln', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'pt', u'TRUE'), + (u'fixme', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source', u'historical', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'source:name', u'User defined', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), + (u'wikipedia', u'URL or article title', u'tran', u'air', u'Node Way Area Relation', u'py', u'TRUE'), ] shpf_list_table_good = [ - (u'wrl_admn_ad_ln_su_osm_pp.shp', u'admn', u'ln', u'border_type, boundary', u"'border_type' IS NOT null or 'boundary' IS NOT null"), - (u'wrl_admn_ad_pt_su_osm_pp.shp', u'admn', u'pt', u'boundary', u"'boundary' IS NOT null"), - (u'wrl_admn_ad_py_su_osm_pp.shp', u'admn', u'py', u'admin_level, border_type, boundary', u"'admin_level' IS NOT null or 'border_type' IS NOT null or 'boundary'='administrative' or 'boundary'='maritime' or 'boundary'='political'"), - (u'wrl_osms_met_ln_su_osm_pp.shp', u'osms', u'ln', u'fixme, source, source:name, wikipedia', u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), - (u'wrl_osms_met_pt_su_osm_pp.shp', u'osms', u'pt', u'fixme, source, source:name, wikipedia', u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), - (u'wrl_osms_met_py_su_osm_pp.shp', u'osms', u'py', u'fixme, source, source:name, wikipedia', u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), - (u'wrl_osms_met_rel_su_osm_pp.shp', u'osms', u'rel', u'fixme, source, source:name, wikipedia', u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), - (u'wrl_tran_air_ln_su_osm_pp.shp', u'tran', u'ln', u'aeroway, iata, icao', u"'aeroway' IS NOT null or 'iata' IS NOT null or 'icao' IS NOT null"), - (u'wrl_tran_air_pt_su_osm_pp.shp', u'tran', u'pt', u'aeroway, iata, icao, military', u"'aeroway' IS NOT null or 'iata' IS NOT null or 'icao' IS NOT null or 'military'='airfield'"), - (u'wrl_tran_air_py_su_osm_pp.shp', u'tran', u'py', u'aeroway, iata, icao, military', u"'aeroway'='aerodrome' or 'aeroway'='runway' or 'iata' IS NOT null or 'icao' IS NOT null or 'military'='airfield'") + (u'wrl_admn_ad_ln_su_osm_pp.shp', u'admn', u'ln', + u'border_type, boundary, fixme, source, source:name, wikipedia', + u"'border_type' IS NOT null or 'boundary' IS NOT null"), + (u'wrl_admn_ad_pt_su_osm_pp.shp', u'admn', u'pt', + u'boundary, fixme, source, source:name, wikipedia', + u"'boundary' IS NOT null"), + (u'wrl_admn_ad_py_su_osm_pp.shp', u'admn', u'py', + u'admin_level, border_type, boundary, fixme, source, source:name, wikipedia', + u"'admin_level' IS NOT null or 'border_type' IS NOT null or 'boundary'='administrative' or 'boundary'='political'"), + (u'wrl_tran_air_ln_su_osm_pp.shp', u'tran', u'ln', + u'aeroway, iata, icao, fixme, source, source:name, wikipedia', + u"'aeroway' IS NOT null or 'iata' IS NOT null or 'icao' IS NOT null"), + (u'wrl_tran_air_pt_su_osm_pp.shp', u'tran', u'pt', + u'aeroway, iata, icao, military, fixme, source, source:name, wikipedia', + u"'aeroway' IS NOT null or 'iata' IS NOT null or 'icao' IS NOT null or 'military'='airfield'"), + (u'wrl_tran_air_py_su_osm_pp.shp', u'tran', u'py', + u'aeroway, iata, icao, military, fixme, source, source:name, wikipedia', + u"'aeroway'='aerodrome' or 'aeroway'='runway' or 'iata' IS NOT null or 'icao' IS NOT null or 'military'='airfield'") +] + +temp_shpf_list_table_osm_only = [ + (u'wrl_osms_met_ln_su_osm_pp.shp', u'osms', u'ln', u'fixme, source, source:name, wikipedia', + u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), + (u'wrl_osms_met_pt_su_osm_pp.shp', u'osms', u'pt', u'fixme, source, source:name, wikipedia', + u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), + (u'wrl_osms_met_py_su_osm_pp.shp', u'osms', u'py', u'fixme, source, source:name, wikipedia', + u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), + (u'wrl_osms_met_rel_su_osm_pp.shp', u'osms', u'rel', u'fixme, source, source:name, wikipedia', + u"'fixme' IS NOT null or 'source' IS NOT null or 'source:name' IS NOT null or 'wikipedia' IS NOT null"), + ] """ A list of which are unsorted, contain duplicates and contains blank strings. """ -attrib_list_args = ['f', 'a', 'b', 'c', 'd', '', 'e', 'a', 'a'] -attrib_list_result = 'a, b, c, d, e, f' +attrib_list_args = [('ffff', 'FALSE'), + ('aaaa', 'FALSE'), + ('bbbb', 'TRUE' ), + ('cccc', 'FALSE'), + ('dddd', 'FALSE'), + ('', 'FALSE'), + ('eeee', 'FALSE'), + ('aaaa', 'FALSE'), + ('aaaa', 'FALSE')] +attrib_list_result = 'aaaa, cccc, dddd, eeee, ffff, bbbb' """ A list of tuples. Each tuple has as its first item -- A list of tuples of string pairs, representing OSM key value pairs +- A list of tuples of string pairs, representing OSM key value pairs and a "meta" field indicating whether + the key/value pair represents feature level metadata. - A string of the resulting selct clause + +If the meta field is True for all instances of a particular attribute then the attribute should be +excluded from the attribute list. EG in this case there are two entries for attribute 'a'. In one case the +meta field False therefore 'a' is included. 'z' only occurs with meta value True, therefore is exlcuded """ select_clause_args_and_result_pairs = [ ( [ - (u"border_type", u"*") + (u"border_type", u"*", u'FALSE') ], u"'border_type' IS NOT null" ), ( [ - (u"boundary", u"administrative"), - (u"boundary", u"maritime"), - (u"boundary", u"political"), - (u"boundary", u"user defined"), - (u"border_type", u"*") + (u"boundary", u"administrative", u'FALSE'), + (u"boundary", u"maritime", u'FALSE'), + (u"boundary", u"political", u'FALSE'), + (u"boundary", u"user defined", u'FALSE'), + (u"border_type", u"*", u'FALSE') ], u"'border_type' IS NOT null or 'boundary' IS NOT null" ), ( [ - (u"boundary", u"administrative"), - (u"boundary", u"maritime"), - (u"boundary", u"political") + (u"boundary", u"administrative", u'FALSE'), + (u"boundary", u"maritime", u'FALSE'), + (u"boundary", u"political", u'FALSE') ], u"'boundary'='administrative' or 'boundary'='maritime' or 'boundary'='political'" ), ( [ - (u"admin_level", u"Number"), + (u"boundary", u"administrative", u'FALSE'), + (u"boundary", u"maritime", u'FALSE'), + (u"boundary", u"political", u'FALSE'), + (u"source", u"historical", u'TRUE'), + (u"wikipedia", u"URL", u'TRUE') + ], + u"'boundary'='administrative' or 'boundary'='maritime' or 'boundary'='political'" + ), + ( + [ + (u"admin_level", u"Number", u'FALSE'), ], u"'admin_level' IS NOT null" ), @@ -129,3 +219,31 @@ def path_to_fixtures_xls(): u'' ) ] + +output_dir_listing = [ + [ + ".", + ['admn', 'tran'], + [] + ], + [ + "admn", + [], + ['wrl_admn_ad_ln_su_osm_pp.dbf', 'wrl_admn_ad_ln_su_osm_pp.prj', + 'wrl_admn_ad_ln_su_osm_pp.shp', 'wrl_admn_ad_ln_su_osm_pp.shx', + 'wrl_admn_ad_pt_su_osm_pp.dbf', 'wrl_admn_ad_pt_su_osm_pp.prj', + 'wrl_admn_ad_pt_su_osm_pp.shp', 'wrl_admn_ad_pt_su_osm_pp.shx', + 'wrl_admn_ad_py_su_osm_pp.dbf', 'wrl_admn_ad_py_su_osm_pp.prj', + 'wrl_admn_ad_py_su_osm_pp.shp', 'wrl_admn_ad_py_su_osm_pp.shx'] + ], + [ + "tran", + [], + ['wrl_tran_air_ln_su_osm_pp.dbf', 'wrl_tran_air_ln_su_osm_pp.prj', + 'wrl_tran_air_ln_su_osm_pp.shp', 'wrl_tran_air_ln_su_osm_pp.shx', + 'wrl_tran_air_pt_su_osm_pp.dbf', 'wrl_tran_air_pt_su_osm_pp.prj', + 'wrl_tran_air_pt_su_osm_pp.shp', 'wrl_tran_air_pt_su_osm_pp.shx', + 'wrl_tran_air_py_su_osm_pp.dbf', 'wrl_tran_air_py_su_osm_pp.prj', + 'wrl_tran_air_py_su_osm_pp.shp', 'wrl_tran_air_py_su_osm_pp.shx'] + ] +] \ No newline at end of file diff --git a/osm2ma/ogrwrapper.py b/osm2ma/ogrwrapper.py index e4c18b6..18f9004 100644 --- a/osm2ma/ogrwrapper.py +++ b/osm2ma/ogrwrapper.py @@ -24,6 +24,9 @@ def _create_new_shpfile(shpf_name, shpf_dir, dest_geom_type, dest_srs): logging.info('Creating shapefile: {}'.format(shpf_name)) # Create the output Layer shpf_path = os.path.join(shpf_dir, shpf_name) + shpf_name = shpf_name.encode('utf-8') + shpf_path = shpf_path.encode('utf-8') + shpf_driver = ogr.GetDriverByName("ESRI Shapefile") # Remove output shapefile if it already exists @@ -32,28 +35,36 @@ def _create_new_shpfile(shpf_name, shpf_dir, dest_geom_type, dest_srs): # Create the output shapefile shp_data_source = shpf_driver.CreateDataSource(shpf_path) - # out_lyr_name = os.path.splitext( os.path.split( outShapefile )[1] )[0] - # This should be just the shapefile name without extension - need to check out_layer = None shpf_name = shpf_name.encode('utf-8') out_layer = shp_data_source.CreateLayer( os.path.splitext(shpf_name)[1], srs=dest_srs, geom_type=dest_geom_type ) - # out_layer = shpDataSource.CreateLayer(name=u'wrl_util_bdg_py_su_osm_pp') + #_create_attributes(source_lyr, out_layer, attribs) + return shp_data_source, out_layer # do stuff -def _copy_attributes(source_lyr, dest_lyr, target_attribs): +def _create_attributes(source_lyr, dest_lyr, target_attribs): logging.debug('copying attributes') # Add input Layer Fields to the output Layer if it is the one we want + # By copying the field definition from the source it saves us having to worry + # about data type or string widths etc. source_lyr_defn = source_lyr.GetLayerDefn() + lst_attribs = map(unicode.strip, target_attribs.split(",")) + # print(lst_attribs) for i in range(0, source_lyr_defn.GetFieldCount()): field_defn = source_lyr_defn.GetFieldDefn(i) field_name = field_defn.GetName() - if field_name in target_attribs: - dest_lyr.CreateField(field_defn) + dest_lyr.CreateField(field_defn) + # if field_name in lst_attribs: + # dest_lyr.CreateField(field_defn) + # else: + # print "PBF attrib not required; {}".format(field_name) + #for attrib in target_attribs.split('r'): + # lyr.CreateField(field_defn) # do stuff @@ -128,8 +139,11 @@ def get_geom_details(shpf_geom_type): def do_ogr2ogr_process(shp_defn, pbf_data_source, output_dir): shpf_name, data_cat, shpf_geom_type, attribs, where_clause = shp_defn cat_dir_path = _create_datacat_dir(output_dir, data_cat) - logging.debug( + logging.info( 'starting ogr2ogr process for shapefile: {}'.format(shpf_name)) + #logging.info( + # 'using attributes : {}'.format(', '.join(attribs))) + print "using attributes: {}".format(attribs) osm_source_layer, dest_geom = get_geom_details(shpf_geom_type) @@ -143,14 +157,16 @@ def do_ogr2ogr_process(shp_defn, pbf_data_source, output_dir): # if pbf_lyr.GetFeatureCount() was working I'd test to only copy files with # > 0 features. logging.debug('do_ogr2ogr_process: about to create new shapefile') - shp_data_source, shp_lyr = _create_new_shpfile( - shpf_name, cat_dir_path, dest_geom, pbf_srs) + shp_data_source, shp_lyr = _create_new_shpfile(shpf_name, cat_dir_path, dest_geom, pbf_srs, ) + logging.debug('do_ogr2ogr_process: created new shapefile') logging.debug('do_ogr2ogr_process: about to copy attributes') - _copy_attributes(pbf_lyr, shp_lyr, attribs) + _create_attributes(pbf_lyr, shp_lyr, attribs) + logging.debug('do_ogr2ogr_process: copied attributes') logging.debug('do_ogr2ogr_process: about to copy features') _copy_features(pbf_lyr, shp_lyr, attribs) + logging.debug('do_ogr2ogr_process: copied features') # cmd_str = compose_ogr2ogr_cmd( # data_cat, geom_type, attribs, where_clause, pbf_file, shpf_name, @@ -162,7 +178,9 @@ def do_ogr2ogr_process(shp_defn, pbf_data_source, output_dir): def batch_convert(xwalk, pbf_file, output_dir): gdal.UseExceptions() gdal.SetConfigOption("OGR_INTERLEAVED_READING", "YES") - # Open input PBF driver + + osmconf_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), r'osmconf.ini') + gdal.SetConfigOption('OSM_CONFIG_FILE', osmconf_path) # Open input PBF driver pbf_driver = ogr.GetDriverByName("OSM") pbf_data_source = pbf_driver.Open(pbf_file, 0) diff --git a/osm2ma/osmconf.ini b/osm2ma/osmconf.ini new file mode 100644 index 0000000..4de9a87 --- /dev/null +++ b/osm2ma/osmconf.ini @@ -0,0 +1,110 @@ +# +# Configuration file for OSM import +# + +# put here the name of keys for ways that are assumed to be polygons if they are closed +# see http://wiki.openstreetmap.org/wiki/Map_Features +closed_ways_are_polygons=aeroway,amenity,boundary,building,craft,geological,historic,landuse,leisure,military,natural,office,place,shop,sport,tourism + +# comment to avoid laundering of keys ( ':' turned into '_' ) +attribute_name_laundering=yes + +# uncomment to report all nodes, including the ones without any (significant) tag +#report_all_nodes=yes + +# uncomment to report all ways, including the ones without any (significant) tag +#report_all_ways=yes + +[points] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,barrier,highway,ref,address,is_in,place,man_made +# keys that, alone, are not significant enough to report a node as a OGR point +unsignificant=created_by,converted_by,source,time,ele,attribution +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[lines] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,highway,waterway,aerialway,barrier,man_made +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[multipolygons] +# common attributes +# note: for multipolygons, osm_id=yes instanciates a osm_id field for the id of relations +# and a osm_way_id field for the id of closed ways. Both fields are exclusively set. +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type,aeroway,amenity,admin_level,barrier,boundary,building,craft,geological,historic,land_area,landuse,leisure,man_made,military,natural,office,place,shop,sport,tourism +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[multilinestrings] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes + +[other_relations] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +all_tags=yes diff --git a/osm2ma/osmconf.origional.ini b/osm2ma/osmconf.origional.ini new file mode 100644 index 0000000..81e0df7 --- /dev/null +++ b/osm2ma/osmconf.origional.ini @@ -0,0 +1,110 @@ +# +# Configuration file for OSM import +# + +# put here the name of keys for ways that are assumed to be polygons if they are closed +# see http://wiki.openstreetmap.org/wiki/Map_Features +closed_ways_are_polygons=aeroway,amenity,boundary,building,craft,geological,historic,landuse,leisure,military,natural,office,place,shop,sport,tourism + +# comment to avoid laundering of keys ( ':' turned into '_' ) +attribute_name_laundering=yes + +# uncomment to report all nodes, including the ones without any (significant) tag +#report_all_nodes=yes + +# uncomment to report all ways, including the ones without any (significant) tag +#report_all_ways=yes + +[points] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,barrier,highway,ref,address,is_in,place,man_made +# keys that, alone, are not significant enough to report a node as a OGR point +unsignificant=created_by,converted_by,source,time,ele,attribution +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +#all_tags=yes + +[lines] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,highway,waterway,aerialway,barrier,man_made +# keys that should NOT be reported in the "other_tags" field +ignore=created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +#all_tags=yes + +[multipolygons] +# common attributes +# note: for multipolygons, osm_id=yes instanciates a osm_id field for the id of relations +# and a osm_way_id field for the id of closed ways. Both fields are exclusively set. +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type,aeroway,amenity,admin_level,barrier,boundary,building,craft,geological,historic,land_area,landuse,leisure,man_made,military,natural,office,place,shop,sport,tourism +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +#all_tags=yes + +[multilinestrings] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +#all_tags=yes + +[other_relations] +# common attributes +osm_id=yes +osm_version=no +osm_timestamp=no +osm_uid=no +osm_user=no +osm_changeset=no + +# keys to report as OGR fields +attributes=name,type +# keys that should NOT be reported in the "other_tags" field +ignore=area,created_by,converted_by,source,time,ele,note,openGeoDB:,fixme,FIXME +# uncomment to avoid creation of "other_tags" field +#other_tags=no +# uncomment to create "all_tags" field. "all_tags" and "other_tags" are exclusive +#all_tags=yes diff --git a/osm2ma/test_configengine.py b/osm2ma/test_configengine.py index a553167..60ec35a 100644 --- a/osm2ma/test_configengine.py +++ b/osm2ma/test_configengine.py @@ -89,8 +89,8 @@ def test_class_function(self): maintian state during the aggreegation process. Therefore it is difficult to meaningfully subdivide tests. :return: """ - for arg in fixtures.attrib_list_args: - self.al.step(arg) + for arg, meta in fixtures.attrib_list_args: + self.al.step(arg, meta) self.assertEqual(self.al.finalize(), fixtures.attrib_list_result) @@ -111,8 +111,8 @@ def test_class_function(self): """ for args, result in fixtures.select_clause_args_and_result_pairs: sc = _SelectClause() - for key, val in args: - sc.step(key, val) + for key, val, meta in args: + sc.step(key, val, meta) self.assertEqual(sc.finalize(), result) diff --git a/osm2ma/test_ogrwrapper.py b/osm2ma/test_ogrwrapper.py index 4e4b47b..04d0bfa 100644 --- a/osm2ma/test_ogrwrapper.py +++ b/osm2ma/test_ogrwrapper.py @@ -1,35 +1,82 @@ # This file was originally generated by PyScripter's unit test wizard import unittest +import tempfile +import shutil +import os import ogrwrapper as ogrw from osgeo import ogr +from osgeo import osr +from osgeo import gdal +from configengine import xwalk_from_raw_config +import fixtures class TestGlobalFunctions(unittest.TestCase): def setUp(self): - pass + gdal.UseExceptions() + gdal.SetConfigOption("OGR_INTERLEAVED_READING", "YES") + self.tmpdir = tempfile.mkdtemp() def tearDown(self): - pass - - @unittest.skip("not implemented") - def test_create_datacat_dir(self): - pass - - @unittest.skip("not implemented") - def test_create_new_shpfile(self): - pass - - @unittest.skip("not implemented") - def test_copy_attributes(self): - pass - - @unittest.skip("not implemented") - def test_copy_features(self): - pass - - def testget_geom_details(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def test_medium_create_datacat_dir(self): + test_data_cat = "test_data_cat" + # test trying to create shpfile twice (shouldn't error) + for x in range(0, 2): + created_path = ogrw._create_datacat_dir(self.tmpdir, test_data_cat) + self.assertEqual(created_path, os.path.join(self.tmpdir, test_data_cat)) + self.assertTrue(os.path.exists(created_path)) + + def test_medium_create_new_shpfile(self): + test_shpfile = "test_shpfile" + dest_srs = osr.SpatialReference() + dest_srs.ImportFromEPSG(3857) # from EPSG + + # test trying to create shpfile twice (shouldn't error) + for i in range(2): + ogrw._create_new_shpfile(test_shpfile+".shp", + self.tmpdir, + ogr.wkbLineString, + dest_srs) + + # check the file exists on disk + for extn in (u'.dbf', u'.prj', u'.shp', u'.shx'): + self.assertTrue(os.path.exists(os.path.join(self.tmpdir, test_shpfile + extn))) + + def test_create_attributes(self): + test_shpfile = "test_shpfile" + dest_srs = osr.SpatialReference() + dest_srs.ImportFromEPSG(3857) # from EPSG + # test_attribs = ["attrib1", "attrib2", "attrib3", "attrib4", "attrib5"] + test_attribs = ["name", "other_tags"] + + shp_source, shp_lyr = ogrw._create_new_shpfile( + test_shpfile+".shp", + self.tmpdir, + ogr.wkbLineString, + dest_srs) + + pbf_driver = ogr.GetDriverByName("OSM") + pbf_data_source = pbf_driver.Open(fixtures.example_pbf, 0) + pbf_lyr = pbf_data_source.GetLayerByName("points") + + ogrw._create_attributes(pbf_lyr, shp_lyr, ", ".join(test_attribs)) + lyr_defn = shp_lyr.GetLayerDefn() + result_attribs = [] + for i in range(lyr_defn.GetFieldCount()): + result_attribs.append(lyr_defn.GetFieldDefn(i).GetName()) + + print(sorted(test_attribs)) + print(sorted(result_attribs)) + + self.assertEquals(sorted(test_attribs), + sorted(result_attribs), + "Need to add test to ensure the correct attributes are created") + + def testget_short_geom_details(self): source_layer, dest_geom = ogrw.get_geom_details("pt") self.assertEqual(source_layer, "points") self.assertEqual(dest_geom, ogr.wkbPoint) @@ -49,13 +96,13 @@ def testget_geom_details(self): # Test an invalid geometry type value. self.assertRaises(ValueError, ogrw.get_geom_details, "abcde") - @unittest.skip("not implemented") - def testdo_ogr2ogr_process(self): - pass - - @unittest.skip("not implemented") - def testbatch_convert(self): - pass - -if __name__ == '__main__': - unittest.main() + @unittest.skip("too long") + def test_long_batch_convert(self): + xwalk = xwalk_from_raw_config(fixtures.rawconf_good, 'wrl', 'su') + ogrw.batch_convert(xwalk, fixtures.example_pbf, self.tmpdir) + result_dir_listing = [] + for root, dirs, files in os.walk(self.tmpdir): + dirs.sort() + files.sort() + result_dir_listing.append([os.path.relpath(root, self.tmpdir), dirs, files]) + self.assertEqual(result_dir_listing, fixtures.output_dir_listing) diff --git a/osm2ma/testfiles/fixtures.xls b/osm2ma/testfiles/fixtures.xls index b758486..2a092f5 100644 Binary files a/osm2ma/testfiles/fixtures.xls and b/osm2ma/testfiles/fixtures.xls differ