Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__pycache__
*.pyc
.idea
.DS_Store
nohup.out
95 changes: 47 additions & 48 deletions web/app/auto_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@
OSM_TABLE as osm_table,
MWM_SIZE_THRESHOLD,
)
from subregions import get_subregions_info
from subregions import (
get_regions_info,
get_subregions_info,
)


class DisjointClusterUnion:
"""Disjoint set union implementation for administrative subregions."""

def __init__(self, region_id, subregions, next_level, mwm_size_thr=None):
def __init__(self, subregions, mwm_size_thr=None):
assert all(s_data['mwm_size_est'] is not None
for s_data in subregions.values())
self.region_id = region_id

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

save_splitting_to_file в auto_split_debug надо тогда тоже комплектно поменять или, если использовать не планируется, выпилить
сейчас он требует наличия region_id в DisjointClusterUnion

self.subregions = subregions
self.next_level = next_level
self.mwm_size_thr = mwm_size_thr or MWM_SIZE_THRESHOLD
self.representatives = {sub_id: sub_id for sub_id in subregions}
# A cluster is one or more subregions with common borders
Expand All @@ -33,6 +34,22 @@ def __init__(self, region_id, subregions, next_level, mwm_size_thr=None):
'finished': False, # True if the cluster cannot be merged with another
}

def try_collapse_into_one(self):
sum_mwm_size_est = sum(s_data['mwm_size_est']
for s_data in self.subregions.values())
if sum_mwm_size_est <= self.mwm_size_thr:
a_subregion_id = next(iter(self.subregions))
self.clusters = {}
self.clusters[a_subregion_id] = {
'representative': a_subregion_id,
'subregion_ids': list(self.subregions.keys()),
'mwm_size_est': sum_mwm_size_est,
'finished': True
}
return True
else:
return False

def get_smallest_cluster(self):
"""Find minimal cluster."""
smallest_cluster_id = min(
Expand Down Expand Up @@ -143,15 +160,14 @@ def calculate_common_border_matrix(conn, subregion_ids):
return common_border_matrix


def find_golden_splitting(conn, border_id, next_level, mwm_size_thr):
subregions = get_subregions_info(conn, border_id, osm_table,
next_level, need_cities=True)
if not subregions:
return
if any(s_data['mwm_size_est'] is None for s_data in subregions.values()):
def combine_into_clusters(conn, regions, mwm_size_thr):
"""Merge regions into clusters up to mwm_size_thr"""

if any(s_data['mwm_size_est'] is None for s_data in regions.values()):
return

dcu = DisjointClusterUnion(border_id, subregions, next_level, mwm_size_thr)
dcu = DisjointClusterUnion(regions, mwm_size_thr)

all_subregion_ids = dcu.get_all_subregion_ids()
common_border_matrix = calculate_common_border_matrix(conn, all_subregion_ids)

Expand All @@ -172,57 +188,40 @@ def find_golden_splitting(conn, border_id, next_level, mwm_size_thr):
return dcu


def get_union_sql(subregion_ids):
assert(len(subregion_ids) > 0)
if len(subregion_ids) == 1:
return f"""
SELECT way FROM {osm_table} WHERE osm_id={subregion_ids[0]}
"""
else:
return f"""
SELECT ST_Union(
({get_union_sql(subregion_ids[0:1])}),
({get_union_sql(subregion_ids[1:])})
)
"""
def split_region_at_admin_level(conn, region_id, next_level, mwm_size_thr):
subregions = get_subregions_info(conn, region_id, osm_table, next_level)
if not subregions:
return
dcu = combine_into_clusters(conn, subregions, mwm_size_thr)
save_splitting_to_db(conn, region_id, next_level, dcu)


def save_splitting_to_db(conn, dcu: DisjointClusterUnion):
def save_splitting_to_db(conn, region_id, next_level, dcu: DisjointClusterUnion):
with conn.cursor() as cursor:
# Remove previous splitting of the region
cursor.execute(f"""
DELETE FROM {autosplit_table}
WHERE osm_border_id = {dcu.region_id}
WHERE osm_border_id = {region_id}
AND mwm_size_thr = {dcu.mwm_size_thr}
AND next_level = {dcu.next_level}
AND next_level = {next_level}
""")
for cluster_id, data in dcu.clusters.items():
subregion_ids = data['subregion_ids']
subregion_ids_array_str = (
'{' + ','.join(str(x) for x in subregion_ids) + '}'
)
cluster_geometry_sql = get_union_sql(subregion_ids)
for cluster_id, cluster_data in dcu.clusters.items():
subregion_ids = cluster_data['subregion_ids']
subregion_ids_str = ','.join(str(x) for x in subregion_ids)
subregion_ids_array_str = '{' + subregion_ids_str + '}'
cursor.execute(f"""
INSERT INTO {autosplit_table} (osm_border_id, subregion_ids, geom,
next_level, mwm_size_thr, mwm_size_est)
VALUES (
{dcu.region_id},
{region_id},
'{subregion_ids_array_str}',
({cluster_geometry_sql}),
{dcu.next_level},
(
SELECT ST_Union(way) FROM {osm_table}
WHERE osm_id IN ({subregion_ids_str})
),
{next_level},
{dcu.mwm_size_thr},
{data['mwm_size_est']}
{cluster_data['mwm_size_est']}
)
""")
conn.commit()


def split_region(conn, region_id, next_level, mwm_size_thr):
dcu = find_golden_splitting(conn, region_id, next_level, mwm_size_thr)
if dcu is None:
return
save_splitting_to_db(conn, dcu)

## May need to debug
#from auto_split_debug import save_splitting_to_file
#save_splitting_to_file(conn, dcu)
60 changes: 55 additions & 5 deletions web/app/borders_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import config
from borders_api_utils import *
from countries_structure import (
auto_divide_country,
CountryStructureException,
create_countries_initial_structure,
)
Expand All @@ -28,6 +29,7 @@
borders_to_xml,
lines_to_xml,
)
from simple_splitting import simple_split
from subregions import (
get_child_region_ids,
get_parent_region_id,
Expand Down Expand Up @@ -233,22 +235,61 @@ def prepare_sql_search_string(string):
@app.route('/search')
def search():
query = request.args.get('q')
sql_search_string = prepare_sql_search_string(query)
# query may contain region id or a part of its name
try:
region_id = int(query)
search_value = region_id
is_id = True
except ValueError:
search_value = prepare_sql_search_string(query)
is_id = False

with g.conn.cursor() as cursor:
cursor.execute(f"""
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom)
FROM {config.BORDERS_TABLE}
WHERE name ILIKE %s
WHERE {'id =' if is_id else 'name ILIKE'} %s
ORDER BY (ST_Area(geography(geom)))
LIMIT 1""", (sql_search_string,)
LIMIT 1""", (search_value,)
)
if cursor.rowcount > 0:
rec = cursor.fetchone()
return jsonify(status='ok', bounds=rec)
return jsonify(status='not found')


@app.route('/simple_split')
@check_write_access
@validate_args_types(id=int)
def simple_split_endpoint():
"""Split into 2/4 parts with straight lines"""
region_id = int(request.args.get('id'))
with g.conn.cursor() as cursor:
cursor.execute(f"""
SELECT name, mwm_size_est
FROM {config.BORDERS_TABLE}
WHERE id = %s""", (region_id,))
if cursor.rowcount == 0:
return jsonify(status=f"Region {region_id} not found")
name, mwm_size_est = cursor.fetchone()
if mwm_size_est is None:
mwm_size_est = update_border_mwm_size_estimation(g.conn, region_id)
if mwm_size_est is not None:
return jsonify(status='MWM size estimation was updated')
else:
return jsonify(status="Cannot esitmate region mwm size")
region = {
'id': region_id,
'name': name,
'mwm_size_est': mwm_size_est,
}

if simple_split(g.conn, region):
g.conn.commit()
return jsonify(status='ok')
return jsonify(status="Can't split region into parts")


@app.route('/split')
@check_write_access
@validate_args_types(id=int)
Expand All @@ -257,6 +298,7 @@ def split():
line = request.args.get('line')
save_region = (request.args.get('save_region') == 'true')
borders_table = config.BORDERS_TABLE
warnings = []
with g.conn.cursor() as cursor:
# check that we're splitting a single polygon
cursor.execute(f"""
Expand Down Expand Up @@ -305,7 +347,6 @@ def split():
new_ids.append(free_id)
counter += 1
free_id -= 1
warnings = []
for border_id in new_ids:
try:
update_border_mwm_size_estimation(g.conn, border_id)
Expand Down Expand Up @@ -863,7 +904,7 @@ def export_poly():
borders_table = request.args.get('table')
borders_table = config.OTHER_TABLES.get(borders_table, config.BORDERS_TABLE)

fetch_borders_args = {'table': borders_table, 'only_leaves': True}
fetch_borders_args = {'table': borders_table, 'only_leaves': False}

if 'xmin' in request.args:
# If one coordinate is given then others are also expected.
Expand Down Expand Up @@ -994,6 +1035,15 @@ def border():
return jsonify(status='ok', geojson=borders[0])


@app.route('/auto_divide_country')
@validate_args_types(id=int)
def auto_divide_country_endpoint():
country_id = int(request.args.get('id'))
errors, warnings = auto_divide_country(g.conn, country_id)
if errors:
return jsonify(status='<br/>'.join(errors[:3]))
return jsonify(status='ok', warnings=warnings[:10])

@app.route('/start_over')
def start_over():
try:
Expand Down
32 changes: 17 additions & 15 deletions web/app/borders_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
BORDERS_TABLE as borders_table,
OSM_TABLE as osm_table,
)
from auto_split import split_region
from auto_split import split_region_at_admin_level
from subregions import (
get_parent_region_id,
get_region_country,
Expand Down Expand Up @@ -70,6 +70,9 @@ def fetch_borders(**kwargs):
for rec in cursor:
region_id = rec[8]
country_id, country_name = get_region_country(g.conn, region_id)
if country_id is None:
# This means region_id was deleted from the DB meanwhile.
continue
props = { 'name': rec[0] or '', 'nodes': rec[2], 'modified': rec[3],
'disabled': rec[4], 'count_k': rec[5],
'comment': rec[6],
Expand Down Expand Up @@ -152,7 +155,7 @@ def get_clusters_for_preview_one(region_id, next_level, mwm_size_thr):
""", splitting_sql_params
)
if cursor.rowcount == 0:
split_region(g.conn, region_id, next_level, mwm_size_thr)
split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr)

cursor.execute(f"""
SELECT subregion_ids[1],
Expand Down Expand Up @@ -258,7 +261,7 @@ def divide_into_clusters(region_ids, next_level, mwm_size_thr):
""", splitting_sql_params
)
if cursor.rowcount == 0:
split_region(g.conn, region_id, next_level, mwm_size_thr)
split_region_at_admin_level(g.conn, region_id, next_level, mwm_size_thr)

free_id = get_free_id()
counter = 0
Expand Down Expand Up @@ -395,7 +398,7 @@ def find_potential_parents(region_id):
return parents


def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed', mwm_size_est=None):
errors, warnings = [], []
with conn.cursor() as cursor:
# Check if this id already in use
Expand All @@ -406,22 +409,21 @@ def copy_region_from_osm(conn, region_id, name=None, parent_id='not_passed'):
errors.append(f"Region with id={region_id} already exists under name '{name}'")
return errors, warnings

name_expr = f"'{name}'" if name else "name"
parent_id_expr = f"{parent_id}" if isinstance(parent_id, int) else "NULL"
cursor.execute(f"""
parent_id_sql = None if parent_id == 'not_passed' else parent_id
query = f"""
INSERT INTO {borders_table}
(id, geom, name, parent_id, modified, count_k)
SELECT osm_id, way, {name_expr}, {parent_id_expr}, now(), -1
(id, geom, name, parent_id, modified, count_k, mwm_size_est)
SELECT osm_id, way, {'%s' if name is not None else 'name'}, %s, now(), -1, %s
FROM {osm_table}
WHERE osm_id = %s
""", (region_id,)
)
"""
args = (parent_id_sql, mwm_size_est, region_id)
if name is not None:
args = (name,) + args
cursor.execute(query, args)
if parent_id == 'not_passed':
assign_region_to_lowest_parent(conn, region_id)
try:
update_border_mwm_size_estimation(conn, region_id)
except Exception as e:
warnings.append(str(e))

return errors, warnings


Expand Down
21 changes: 14 additions & 7 deletions web/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
OSM_PLACES_TABLE = 'osm_places'
# transit table for autosplitting results
AUTOSPLIT_TABLE = 'splitting'
# table with land polygons (i.e. without ocean), split into smaller overlapping pieces
# TODO: prepare this table during docker container setup
LAND_POLYGONS_TABLE = 'land'
# coastline split into smaller chunks
# TODO: prepare this table during docker container setup
COASTLINE_TABLE = 'coastlines'
# tables with borders for reference
OTHER_TABLES = {
#'old': 'old_borders'
Expand All @@ -30,12 +36,13 @@
DAEMON_LOG_PATH = '/var/log/borders-daemon.log'
# mwm size threshold in Kb
MWM_SIZE_THRESHOLD = 70*1024
# Estimated mwm size is predicted by the 'model.pkl' with 'scaler.pkl' for X
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model.pkl'
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler.pkl'
# Estimated mwm size is predicted by the 'model*.pkl' with 'scaler*.pkl' for X
MWM_SIZE_PREDICTION_MODEL_PATH = '/app/data/model_with_coastline.pkl'
MWM_SIZE_PREDICTION_MODEL_SCALER_PATH = '/app/data/scaler_with_coastline.pkl'
MWM_SIZE_PREDICTION_MODEL_LIMITATIONS = {
'area': 5500 * 1.5,
'urban_pop': 3500000 * 1.5,
'city_cnt': 32 * 1.5,
'hamlet_cnt': 2120 * 1.5
'land_area': 700_000,
'city_pop': 32_000_000,
'city_cnt': 1_200,
'hamlet_cnt': 40_000,
'coastline_length': 25_000,
}
Loading