From 8cdf5dcd3329e8ace3e8eb2e028b06f7fd252d82 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Wed, 8 Feb 2023 09:59:06 +0100
Subject: [PATCH 1/7] refactor(export): update export, only one process

refactor(export): move scripts to subdir

refactor(export): new version with mtd join

feat(cache): add export_graph script

fix(export): add init files

fix(export): import modification

fix(export): flake exceptions

refactor(export): v0 export avec prep

refactor(export): v1 export fonctionnel, tests complets a faire

fix(export): flake deprecated regex

refactor(export): use cache id instead of cache path

cleaning

clean
---
 scripts/__init__.py                          |   0
 scripts/export_graph/__init__.py             |   0
 scripts/export_graph/export_graph.py         | 121 +++++++++
 scripts/export_graph/prep_vectorise_graph.py | 211 +++++++++++++++
 scripts/export_graph/vectorise_graph.py      | 199 ++++++++++++++
 scripts/prep_vectorise_graph.py              | 257 -------------------
 scripts/vectorise_graph.py                   | 196 --------------
 7 files changed, 531 insertions(+), 453 deletions(-)
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/export_graph/__init__.py
 create mode 100644 scripts/export_graph/export_graph.py
 create mode 100644 scripts/export_graph/prep_vectorise_graph.py
 create mode 100644 scripts/export_graph/vectorise_graph.py
 delete mode 100644 scripts/prep_vectorise_graph.py
 delete mode 100644 scripts/vectorise_graph.py

diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/export_graph/__init__.py b/scripts/export_graph/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/export_graph/export_graph.py b/scripts/export_graph/export_graph.py
new file mode 100644
index 000000000..87d1a419d
--- /dev/null
+++ b/scripts/export_graph/export_graph.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+"""
+Script d'export du graphe a partir d'un cache
+"""
+import os
+import sys
+import re
+import argparse
+import prep_vectorise_graph as prep
+import vectorise_graph as vect
+
+current = os.path.dirname(os.path.realpath(__file__))
+parent = os.path.dirname(current)
+sys.path.append(parent)
+
+from process_requests import check_get_post, response2pyobj  # noqa: E402
+
+
+def read_args():
+    """Gestion des arguments"""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-c", "--cache", required=True, type=str, help="path of input cache")
+    parser.add_argument("-o", "--output", required=True, help="output folder")
+    parser.add_argument("-b", "--branch", required=True,
+                        help="id of branch of cache to use as source for patches")
+    parser.add_argument('-u', '--url',
+                        help="http://[serveur]:[port] (default: http://localhost:8081)",
+                        type=str, default='http://localhost:8081')
+    parser.add_argument("-t", "--tilesize",
+                        help="tile size (in pixels) for vectorising graph tiles (default: 100000)",
+                        type=int, default=100000)
+    parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
+    args_prep = parser.parse_args()
+
+    if args_prep.verbose >= 1:
+        print("\nArguments: ", args_prep)
+
+    return args_prep
+
+
+args = read_args()
+
+# check input url
+url_pattern = r'^https?:\/\/[0-9A-z.]+\:[0-9]+$'
+if not re.match(url_pattern, args.url):
+    raise SystemExit(f"ERROR: URL '{args.url}' is invalid")
+
+# recuperer les patches correspondant a la branche desiree (requete curl)
+patches_files = os.path.join(args.output, 'patches.json')
+req_get_patches = f'{args.url}/{args.branch}/patches'
+resp_get_patches = check_get_post(req_get_patches)
+list_patches_api = response2pyobj(resp_get_patches)
+
+# creation du repertoire de sortie si necessaire
+try:
+    os.mkdir(args.output)
+except FileExistsError:
+    print("ALERTE : Le dossier de sortie existe déjà.")
+
+# define working dir
+os.chdir(args.output)
+print(f"INFO : Le répertoire de travail est '{os.getcwd()}'")
+# redefine input directory
+try:
+    cache_path = os.path.relpath(args.cache, start=args.output)
+except ValueError:
+    print("No relative path, absolute path is used instead")
+    cache_path = os.path.abspath(args.cache)
+print("Updated input path relative to working dir: '" + cache_path + "'")
+
+# check if input dir exists
+if not os.path.exists(cache_path):
+    raise SystemExit("ERREUR : Le répertoire " + cache_path + " n'existe pas.")
+
+# on verifie si l'overviews utilise est bien correct
+path_depth, level, resol, proj, overviews = prep.check_overviews(cache_path)
+
+# recupere la liste des tiles impactes par les patches sur le chantier
+list_patches, id_branch_patch = prep.list_patches(list_patches_api, cache_path, path_depth, level)
+
+# create correct path out
+path_out = os.path.join(args.output, os.path.basename(cache_path))
+
+# on verifie que la branch donne est correcte
+# encore necessaire vu qu'on va chercher les patches via id_branch ?
+prep.check_branch_patch(args.branch, id_branch_patch)
+
+# on cree un vrt pour chaque tuile ddu cache
+prep.create_tiles(cache_path, level, args.branch, path_out, list_patches)
+
+# creation des vrt intermediaires
+prep.build_full_vrt(path_out, resol)
+prep.build_vrt_emprise(path_out)
+prep.build_vrt_32bits(path_out)
+
+# creation des dalles de vrt pour la vectorisation
+prep.create_tiles_vrt(args.output, path_out, resol, args.tilesize)
+
+# preparation du fichier pour la gpao
+dict_cmd = {"projects": []}
+project_name = os.path.basename(args.output.split('.')[0])
+
+# chantier polygonize
+vect.create_chantier_polygonize(dict_cmd, args.output, project_name)
+
+# chantier merge
+vect.add_chantier_merge(dict_cmd, project_name)
+merge_path, tmp_dir = vect.add_job_merge(dict_cmd, args.output, project_name, proj)
+dissolve_path = vect.add_job_dissolve(dict_cmd, project_name, merge_path, tmp_dir)
+
+# chantier mtd
+vect.add_chantier_mtd(dict_cmd, project_name)
+vect.add_table_mtd(dissolve_path)
+mtd_file = vect.create_mtd_dico(tmp_dir, overviews)
+vect.add_job_gpkg_to_fgb(dict_cmd, dissolve_path)
+vect.add_job_join_mtd(dict_cmd, dissolve_path, mtd_file)
+vect.add_job_fbg_to_gpkg(dict_cmd, dissolve_path, args.output, project_name)
+
+# ecriture du json de gpao
+vect.write_json_file(dict_cmd, args.output, project_name)
diff --git a/scripts/export_graph/prep_vectorise_graph.py b/scripts/export_graph/prep_vectorise_graph.py
new file mode 100644
index 000000000..efda1ab4e
--- /dev/null
+++ b/scripts/export_graph/prep_vectorise_graph.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+"""
+Script de vectorisation du graphe à partir d'un cache d'images COG
+"""
+import os
+import sys
+import json
+from osgeo import gdal
+
+current = os.path.dirname(os.path.realpath(__file__))
+parent = os.path.dirname(current)
+sys.path.append(parent)
+
+from cache_def import get_slab_path  # noqa: E402
+
+
+def check_overviews(cache):
+    overviews_path = os.path.join(cache, "overviews.json")
+
+    # lecture du fichier overviews pour recuperer les infos du cache
+    try:
+        with open(overviews_path, encoding='utf-8') as fileOverviews:
+            overviews = json.load(fileOverviews)
+    except IOError:
+        print(f"ERREUR: Le fichier '{overviews_path}' n'existe pas.")
+
+    if 'pathDepth' not in overviews:
+        raise SystemExit(f"ERREUR: L'attribut 'pathDepth' n'existe pas dans '{overviews_path}'.")
+    path_depth = overviews['pathDepth']
+
+    if 'level' not in overviews:
+        raise SystemExit(f"ERREUR: L'attribut 'level' n'existe pas dans '{overviews_path}'.")
+    if 'max' not in overviews['level']:
+        raise SystemExit(f"ERREUR: L'attribut 'max' n'existe pas dans '{overviews_path}'.")
+    level = overviews['level']['max']
+
+    if 'resolution' not in overviews:
+        raise SystemExit(f"ERREUR: L'attribut 'resolution' n'existe pas dans '{overviews_path}.")
+    resol = overviews['resolution']
+
+    proj = str(overviews['crs']['type']) + ':' + str(overviews['crs']['code'])
+
+    return path_depth, level, resol, proj, overviews
+
+
+# on recupere les infos concernant les patches dans le json en entree
+def list_patches(patches, cache, path_depth, level):
+    if 'features' not in patches:
+        raise SystemExit(f"ERROR: Attribute 'features' does not exist in '{patches}'.")
+    patches = patches['features']
+
+    id_branch_patch = None
+    list_patches = []
+    for patch in patches:
+        if patch['properties']['active'] is True:
+            slabs = patch['properties']['slabs']
+            id_branch_patch = patch['properties']['id_branch']
+            for slab in slabs:
+                x = slab[0]
+                y = slab[1]
+
+                slab_path = get_slab_path(int(x), int(y), int(path_depth))
+                tile_path = os.path.join(cache, 'graph', str(level), slab_path[1:])
+                list_patches.append(os.path.abspath(tile_path + '.tif'))
+
+    return list_patches, id_branch_patch
+
+
+def check_branch_patch(branch, id_branch_patch):
+    if branch and id_branch_patch and int(branch) != id_branch_patch:
+        raise SystemExit(f"** ERREUR: "
+                         f"Pas de correspondance entre la branche indiquée '{branch}' "
+                         f"et celle des retouches '{id_branch_patch}' !")
+
+    if not branch and id_branch_patch:
+        print(f"** La branche de retouches traitée est : '{id_branch_patch}'")
+        branch = str(id_branch_patch)
+
+
+def create_tiles(cache, level, branch, path_out, list_patches):
+    graph_dir = os.path.join(cache, 'graph', str(level))
+
+    # on parcourt le repertoire graph du cache pour recuperer l'ensemble des images de graphe
+    list_tiles = []
+    for (root, dirs, files) in os.walk(graph_dir):
+        for file in files:
+            file = os.path.join(root, file)
+            list_tiles.append(os.path.abspath(file))
+
+    # fichier intermediaire contenant la liste de images pour le vrt
+    with open(path_out + '.txt', 'w', encoding='utf-8') as f_out:
+        for tile in list_tiles:
+            # il faut filtrer uniquement les tuiles presentes a l'origine
+            # on recupere juste le nom de la dalle sans extension -> 2 caracteres
+            filename = os.path.basename(tile).split('.')[0]
+
+            if len(filename) > 2:  # cas des tuiles avec retouche
+                continue
+            if tile in list_patches:
+                # dans ce cas il faut ajouter la tuile index_branche + tilename a la liste
+                tilename = os.path.basename(tile)
+                tile_path = os.path.join(os.path.dirname(tile), str(branch) + '_' + tilename)
+                f_out.write(tile_path + '\n')
+            else:
+                # on ajoute la tuile d'origine dans la liste pour creer le vrt
+                f_out.write(tile + '\n')
+
+
+def build_full_vrt(path_out, resol):
+    # on construit un vrt a partir de la liste des images recuperee precedemment
+    cmd_buildvrt = (
+        'gdalbuildvrt'
+        + ' -input_file_list '
+        + path_out + '.txt '
+        + path_out + '_graphTiles.vrt'
+        + ' -tap'
+        + ' -tr ' + str(resol) + ' ' + str(resol) + ' '
+    )
+    os.system(cmd_buildvrt)
+
+
+def build_vrt_emprise(path_out):
+    # on construit un 2eme vrt à partir du premier
+    # (pour avoir la bonne structure avec les bons parametres : notamment l'emprise)
+    cmd_buildvrt2 = (
+        'gdalbuildvrt '
+        + path_out + '_tmp.vrt '
+        + path_out + '_graphTiles.vrt'
+    )
+    os.system(cmd_buildvrt2)
+
+
+def build_vrt_32bits(path_out):
+    # modification du VRT pour passage 32bits
+    with open(path_out + '_tmp.vrt', 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as f:
+        for line in lines:
+            # on ecrit le code python au bon endroit dans le VRT
+            if 'band="1"' in line:
+                f.write('\t<VRTRasterBand dataType="Int32" band="1" '
+                        'subClass="VRTDerivedRasterBand">\n')
+                f.write('\t<PixelFunctionType>color_to_int32</PixelFunctionType>\n')
+                f.write('\t<PixelFunctionLanguage>Python</PixelFunctionLanguage>\n')
+                f.write('\t<PixelFunctionCode>\n')
+                f.write('<![CDATA[\n')
+                f.write('import numpy as np\n')
+                f.write('def color_to_int32(in_ar, out_ar, xoff, yoff, xsize, ysize, \
+                        raster_xsize, raster_ysize, buf_radius, gt, **kwargs):\n')
+                f.write('\tout_ar[:] = in_ar[0] + 256 * in_ar[1] + 256 * 256 * in_ar[2]\n')
+                f.write(']]>\n')
+                f.write('\t</PixelFunctionCode>\n')
+            elif 'band="2"' in line:
+                pass
+            elif 'band="3"' in line:
+                pass
+            elif '</VRTRaster' in line:
+                pass
+            elif '<OverviewList' in line:
+                f.write('\t</VRTRasterBand>\n')
+                f.write(line)
+            else:
+                f.write(line)
+
+
+def create_tiles_vrt(output, path_out, resol, tilesize):
+    tiles_dir = os.path.join(os.path.abspath(output), 'tiles')
+    if not os.path.exists(tiles_dir):
+        os.mkdir(tiles_dir)
+
+    # on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
+    info = gdal.Info(path_out + '_32bits.vrt')
+    info_list = info.split('\n')
+
+    ul, lr = '', ''
+    for line in info_list:
+        if 'Upper Left' in line:
+            ul = line
+        elif 'Lower Right' in line:
+            lr = line
+
+    ul = ul.replace('(', '').replace(')', '').replace(',', '')
+    ul_split = ul.split(' ')
+    x_min = ul_split[5]
+    y_max = ul_split[6]
+
+    lr = lr.replace('(', '').replace(')', '').replace(',', '')
+    lr_split = lr.split(' ')
+    x_max = lr_split[4]
+    y_min = lr_split[5]
+
+    # ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
+    x_min = int(float(x_min) // 1000 * 1000)
+    y_min = int(float(y_min) // 1000 * 1000)
+
+    x_max = int((float(x_max) // 1000 + 1) * 1000)
+    y_max = int((float(y_max) // 1000 + 1) * 1000)
+
+    tile_size = int(resol * tilesize)
+
+    for x in range(x_min, x_max, tile_size):
+        for y in range(y_min, y_max, tile_size):
+            cmd_vrt = (
+                'gdalbuildvrt '
+                + os.path.join(tiles_dir, str(x) + '_' + str(y) + '.vrt') + ' '
+                + path_out + '_32bits.vrt'
+                + ' -tr ' + str(resol) + ' ' + str(resol)
+                + ' -te ' + str(x) + ' ' + str(y) + ' '
+                + str(x + tile_size) + ' ' + str(y + tile_size)
+            )
+            os.system(cmd_vrt)
diff --git a/scripts/export_graph/vectorise_graph.py b/scripts/export_graph/vectorise_graph.py
new file mode 100644
index 000000000..576667b8e
--- /dev/null
+++ b/scripts/export_graph/vectorise_graph.py
@@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+"""
+Script de vectorisation du graphe à partir d'un cache d'images COG
+"""
+import os
+import json
+import platform
+import sqlite3
+
+
+# creation du chantier de vectorisation
+def create_chantier_polygonize(dict_cmd, output, project_name):
+    dict_cmd["projects"].append({"name": str(project_name+'_polygonize'), "jobs": []})
+
+    script = "gdal_polygonize.py"
+    if platform.system() == "Windows":
+        script = script.split('.', maxsplit=1)[0]+".bat"
+
+    tiles_dir = os.path.join(output, "tiles")
+    gpkg_dir = os.path.join(output, "gpkg")
+
+    if not os.path.exists(gpkg_dir):
+        os.mkdir(gpkg_dir)
+
+    # on recupere la liste des tuiles creees
+    list_tiles_graph = os.listdir(tiles_dir)
+
+    # on vectorise chaque tuile separement
+    for tile in list_tiles_graph:
+        gpkg_path = os.path.join(gpkg_dir, tile.split('.')[0] + '.gpkg')
+        if os.path.exists(gpkg_path):
+            print(f"Le fichier '{gpkg_path}' existe déjà. "
+                  f"On le supprime avant de relancer le calcul.")
+            os.remove(gpkg_path)
+        cmd_polygonize = (
+            script + ' '
+            + os.path.join(tiles_dir, tile) + ' '
+            + gpkg_path
+            + ' -f "GPKG" '
+            + '-mask ' + os.path.join(tiles_dir, tile)
+        )
+        dict_cmd["projects"][0]["jobs"].append(
+            {"name": "polygonize_"+tile.split('.')[0], "command": cmd_polygonize}
+        )
+
+    # fin chantier polygonize
+
+
+# debut chantier merge
+# le traitement est divise en deux chantiers de GPAO avec le second dependant du premier
+# le premier (chantier polygonize) contient l'ensemble des gdal_polygonize
+# le second (chantier merge) va contenir les traitements
+# permettant de passer des geopackages calcules par dalle
+# a un geopackage global pour toute l'emprise de notre chantier
+def add_chantier_merge(dict_cmd, project_name):
+    dict_cmd["projects"].append({"name": str(project_name+'_merge'),
+                                 "jobs": [], "deps": [{"id": 0}]})
+
+
+def add_job_merge(dict_cmd, output, project_name, proj):
+    script_merge = "ogrmerge.py"
+    if platform.system() == "Windows":
+        script_merge = script_merge.split('.', maxsplit=1)[0]+".bat"
+
+    gpkg_dir = os.path.join(output, "gpkg")
+    tmp_dir = os.path.join(output, 'tmp')
+    if not os.path.exists(tmp_dir):
+        os.mkdir(tmp_dir)
+
+    merge_file = project_name + '_merge.gpkg'
+    merge_path = os.path.join(tmp_dir, merge_file)
+    all_gpkg = os.path.join(gpkg_dir, '*.gpkg')
+    cmd_merge = (
+        script_merge
+        + ' -o ' + merge_path
+        + ' ' + all_gpkg
+        + ' -a_srs ' + proj
+        + ' -nln data'
+        + ' -single'
+        + ' -field_strategy Union'
+        + ' -overwrite_ds'
+    )
+    dict_cmd["projects"][1]["jobs"].append({"name": "ogrmerge", "command": cmd_merge})
+    return merge_path, tmp_dir
+
+
+def add_job_dissolve(dict_cmd, project_name, merge_path, tmp_dir):
+    dissolve_file = project_name + '_dissolve.gpkg'
+    dissolve_path = os.path.join(tmp_dir, dissolve_file)
+    cmd_dissolve = (
+        'ogr2ogr '
+        + dissolve_path + ' '
+        + merge_path
+        + ' -nlt PROMOTE_TO_MULTI'
+        + ' -nln data'
+        + ' -dialect sqlite'
+        + ' -makevalid'
+        + ' -sql "SELECT DN as color, ST_union(geom) as geom FROM data GROUP BY DN"'
+    )
+    dict_cmd["projects"][1]["jobs"].append(
+        {"name": "dissolve", "command": cmd_dissolve, "deps": [{"id": 0}]}
+    )
+    return dissolve_path
+
+# fin chantier merge
+
+
+# chantier metadonnees
+def add_chantier_mtd(dict_cmd, project_name):
+    dict_cmd["projects"].append({"name": str(project_name+'_mtd'),
+                                 "jobs": [], "deps": [{"id": 1}]})
+
+
+# recuperation des metadonnees
+def add_table_mtd(dissolve_path):
+    conn = sqlite3.connect(dissolve_path)
+    c = conn.cursor()
+
+    c.execute('''
+              CREATE TABLE IF NOT EXISTS mtd
+              ([id] INTEGER PRIMARY KEY, [color_32] INTEGER, [opi] TEXT, [rgb] TEXT,
+              [date_cliche] TEXT, [time_ut_cliche] TEXT)
+              ''')
+    conn.commit()
+
+# TODO: test de performance sur cache gros > 1000 OPIs | export graphe + mtd
+
+
+# TODO: verifier json gpao dans TNR ?
+def create_mtd_dico(tmp_dir, overviews):
+    dico = {}
+
+    mtd_file = os.path.join(tmp_dir, 'mtd.csv')
+    with open(mtd_file, 'w', encoding='utf-8') as file:
+        file.write('opi;color;rgb;date;time_ut\n')
+        for opi in overviews['list_OPI']:
+            dico[opi] = []
+            opi_mtd = overviews['list_OPI'].get(opi)
+            color = str(opi_mtd['color'])
+            color_32 = opi_mtd['color'][0] + opi_mtd['color'][1]*256 + opi_mtd['color'][2]*256**2
+            date = opi_mtd['date']
+            time_ut = opi_mtd['time_ut']
+            dico[opi].append((color_32, opi_mtd))
+            file.write(f'{opi};{color_32};{color};{date};{time_ut}\n')
+
+    return mtd_file
+
+
+# conversion gpkg -> fgb
+def add_job_gpkg_to_fgb(dict_cmd, dissolve_path):
+    cmd_gpkg_to_fgb = (
+        'ogr2ogr '
+        + '-f FlatGeobuf '
+        + dissolve_path.split('.')[0] + '.fgb '
+        + dissolve_path
+    )
+    dict_cmd["projects"][2]["jobs"].append(
+        {"name": "gpkg_to_fgb", "command": cmd_gpkg_to_fgb}
+    )
+
+
+# jointure metadonnees
+def add_job_join_mtd(dict_cmd, dissolve_path, mtd_file):
+    cmd_join_mtd = (
+        'ogr2ogr '
+        + '-dialect sqlite '
+        + '-sql "SELECT data.*, mtd.* FROM data JOIN \''+mtd_file+'\'.mtd as mtd '
+                                                                  'on data.color = mtd.color" '
+        + dissolve_path.split('.')[0] + '_mtd.fgb '
+        + dissolve_path.split('.')[0] + '.fgb'
+    )
+    dict_cmd["projects"][2]["jobs"].append(
+        {"name": "cmd_join_mtd", "command": cmd_join_mtd, "deps": [{"id": 0}]}
+    )
+
+
+# conversion fgb -> gpkg
+def add_job_fbg_to_gpkg(dict_cmd, dissolve_path, output, project_name):
+    gpkg_file = project_name + '_mtd.gpkg'
+    gpkg_path = os.path.join(output, gpkg_file)
+    cmd_fgb_to_gpkg = (
+        'ogr2ogr '
+        + '-f GPKG '
+        + gpkg_path + ' '
+        + dissolve_path.split('.')[0] + '_mtd.fgb'
+    )
+    dict_cmd["projects"][2]["jobs"].append(
+        {"name": "cmd_fgb_to_gpkg", "command": cmd_fgb_to_gpkg, "deps": [{"id": 1}]}
+    )
+
+# fin chantier metadonnees
+
+
+# TODO : supprimer dossier temp si necessaire
+def write_json_file(dict_cmd, output, project_name):
+    json_file = project_name + "_gpao.json"
+    json_path = os.path.join(output, json_file)
+    with open(json_path, "w", encoding='utf-8') as out_file:
+        json.dump(dict_cmd, out_file, indent=4)
diff --git a/scripts/prep_vectorise_graph.py b/scripts/prep_vectorise_graph.py
deleted file mode 100644
index 2a0c9921e..000000000
--- a/scripts/prep_vectorise_graph.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Script de vectorisation du graphe à partir d'un cache d'images COG
-"""
-import os
-import argparse
-import json
-import time
-from cache_def import get_slab_path
-from osgeo import gdal
-
-
-def read_args():
-    """Gestion des arguments"""
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-c", "--cache", required=True, help="input cache folder")
-    parser.add_argument("-o", "--output", required=True, help="output folder")
-    parser.add_argument("-b", "--branch",
-                        help="id of branch of cache to use as source for patches (default: None)",
-                        default=None)
-    parser.add_argument("-p", "--patches",
-                        required=True,
-                        help="file containing patches on the branch to export")
-    parser.add_argument("-t", "--tilesize",
-                        help="tile size (in pixels) for vectorising graph tiles (default: 100000)",
-                        type=int, default=100000)
-    parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
-    args_prep = parser.parse_args()
-
-    if args_prep.verbose >= 1:
-        print("\nArguments: ", args_prep)
-
-    return args_prep
-
-
-args = read_args()
-
-try:
-    os.mkdir(args.output)
-except FileExistsError:
-    print("ERREUR : Le dossier de sortie existe déjà.")
-
-# define working dir
-os.chdir(args.output)
-print(f"INFO : Le répertoire de travail est '{os.getcwd()}'")
-# redefine input directory
-try:
-    args.cache = os.path.relpath(args.cache, start=args.output)
-except ValueError:
-    print("No relative path, absolute path is used instead")
-    args.cache = os.path.abspath(args.cache)
-print("Updated input path relative to working dir: '" + args.cache + "'")
-
-# check if input dir exists
-if not os.path.exists(args.cache):
-    raise SystemExit("ERREUR : Le répertoire " + args.cache + " n'existe pas.")
-
-t_start = time.perf_counter()
-
-overviews_path = os.path.join(args.cache, "overviews.json")
-
-# lecture du fichier overviews pour recuperer les infos du cache
-try:
-    with open(overviews_path, encoding='utf-8') as fileOverviews:
-        overviews = json.load(fileOverviews)
-except IOError:
-    print(f"ERREUR: Le fichier '{overviews_path}' n'existe pas.")
-
-if 'pathDepth' not in overviews:
-    raise SystemExit(f"ERREUR: L'attribut 'pathDepth' n'existe pas dans '{overviews_path}'.")
-path_depth = overviews['pathDepth']
-
-if 'level' not in overviews:
-    raise SystemExit(f"ERREUR: L'attribut 'level' n'existe pas dans '{overviews_path}'.")
-if 'max' not in overviews['level']:
-    raise SystemExit(f"ERREUR: L'attribut 'max' n'existe pas dans '{overviews_path}'.")
-level = overviews['level']['max']
-
-if 'resolution' not in overviews:
-    raise SystemExit(f"ERREUR: L'attribut 'resolution' n'existe pas dans '{overviews_path}.")
-resol = overviews['resolution']
-
-cache_name = os.path.basename((os.path.normpath(args.cache)))
-if args.verbose > 0:
-    print(f"Cache name = '{cache_name}'")
-
-# on recupere les infos concernant les patches dans le json en entree
-with open(args.patches, encoding='utf-8') as file_patches:
-    patches_data = json.load(file_patches)
-if 'features' not in patches_data:
-    raise SystemExit(f"ERROR: Attribute 'features' does not exist in '{args.patches}'.")
-patches = patches_data['features']
-
-id_branch_patch = None
-list_patches = []
-for patch in patches:
-    if patch['properties']['active'] is True:
-        slabs = patch['properties']['slabs']
-        id_branch_patch = patch['properties']['id_branch']
-        for slab in slabs:
-            x = slab[0]
-            y = slab[1]
-
-            slab_path = get_slab_path(int(x), int(y), int(path_depth))
-            tile_path = os.path.join(args.cache, 'graph', str(level), slab_path[1:])
-            list_patches.append(os.path.abspath(tile_path+'.tif'))
-
-if args.branch and id_branch_patch and int(args.branch) != id_branch_patch:
-    raise SystemExit(f"** ERREUR: "
-                     f"Pas de correspondance entre la branche indiquée '{args.branch}' "
-                     f"et celle des retouches '{id_branch_patch}' !")
-
-if args.branch and not id_branch_patch:
-    raise SystemExit(f"** ERREUR: "
-                     f"Branche de retouches indiquée '{args.branch}', mais aucune retouche !")
-
-if not args.branch and id_branch_patch:
-    print(f"** La branche de retouches traitée est : '{id_branch_patch}'")
-    args.branch = str(id_branch_patch)
-
-graph_dir = os.path.join(args.cache, 'graph', str(level))
-
-# on parcourt le repertoire graph du cache pour recuperer l'ensemble des images de graphe
-list_tiles = []
-for (root, dirs, files) in os.walk(graph_dir):
-    for file in files:
-        file = os.path.join(root, file)
-        list_tiles.append(os.path.abspath(file))
-
-# fichier intermediaire contenant la liste de images pour le vrt
-path_out = os.path.join(args.output, os.path.basename(args.cache))
-with open(path_out + '.txt', 'w', encoding='utf-8') as f_out:
-    for tile in list_tiles:
-        if args.verbose > 0:
-            print(f"tile : '{tile}'")
-        # il faut filtrer uniquement les tuiles presentes a l'origine
-        # on recupere juste le nom de la dalle sans extension -> 2 caracteres
-        filename = os.path.basename(tile).split('.')[0]
-
-        if len(filename) > 2:  # cas des tuiles avec retouche
-            continue
-        if tile in list_patches:
-            # dans ce cas il faut ajouter la tuile index_branche + tilename a la liste
-            tilename = os.path.basename(tile)
-            tile_path = os.path.join(os.path.dirname(tile), str(args.branch)+'_'+tilename)
-            f_out.write(tile_path+'\n')
-        else:
-            # on ajoute la tuile d'origine dans la liste pour creer le vrt
-            f_out.write(tile+'\n')
-
-# on construit un vrt a partir de la liste des images recuperee precedemment
-cmd_buildvrt = (
-    'gdalbuildvrt'
-    + ' -input_file_list '
-    + path_out + '.txt '
-    + path_out + '_graphTiles.vrt'
-    + ' -tap'
-    + ' -tr ' + str(resol) + ' ' + str(resol) + ' '
-)
-if args.verbose > 0:
-    print(cmd_buildvrt)
-os.system(cmd_buildvrt)
-
-# on construit un 2eme vrt à partir du premier
-# (pour avoir la bonne structure avec les bons parametres : notamment l'emprise)
-cmd_buildvrt2 = (
-    'gdalbuildvrt '
-    + path_out + '_tmp.vrt '
-    + path_out + '_graphTiles.vrt'
-)
-if args.verbose > 0:
-    print(cmd_buildvrt2)
-os.system(cmd_buildvrt2)
-
-# modification du VRT pour passage 32bits
-with open(path_out + '_tmp.vrt', 'r', encoding='utf-8') as f:
-    lines = f.readlines()
-with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as f:
-    for line in lines:
-        # on ecrit le code python au bon endroit dans le VRT
-        if 'band="1"' in line:
-            f.write('\t<VRTRasterBand dataType="Int32" band="1" subClass="VRTDerivedRasterBand">\n')
-            f.write('\t<PixelFunctionType>color_to_int32</PixelFunctionType>\n')
-            f.write('\t<PixelFunctionLanguage>Python</PixelFunctionLanguage>\n')
-            f.write('\t<PixelFunctionCode>\n')
-            f.write('<![CDATA[\n')
-            f.write('import numpy as np\n')
-            f.write('def color_to_int32(in_ar, out_ar, xoff, yoff, xsize, ysize, \
-                    raster_xsize, raster_ysize, buf_radius, gt, **kwargs):\n')
-            f.write('\tout_ar[:] = in_ar[0] + 256 * in_ar[1] + 256 * 256 * in_ar[2]\n')
-            f.write(']]>\n')
-            f.write('\t</PixelFunctionCode>\n')
-        elif 'band="2"' in line:
-            pass
-        elif 'band="3"' in line:
-            pass
-        elif '</VRTRaster' in line:
-            pass
-        elif '<OverviewList' in line:
-            f.write('\t</VRTRasterBand>\n')
-            f.write(line)
-        else:
-            f.write(line)
-
-tiles_dir = os.path.join(os.path.abspath(args.output), 'tiles')
-if not os.path.exists(tiles_dir):
-    os.mkdir(tiles_dir)
-
-# on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
-info = gdal.Info(path_out + '_32bits.vrt')
-infoList = info.split('\n')
-
-ul, lr = '', ''
-for line in infoList:
-    if 'Upper Left' in line:
-        ul = line
-    elif 'Lower Right' in line:
-        lr = line
-
-ul = ul.replace('(', '').replace(')', '').replace(',', '')
-ul_split = ul.split(' ')
-x_min = ul_split[5]
-y_max = ul_split[6]
-
-lr = lr.replace('(', '').replace(')', '').replace(',', '')
-lr_split = lr.split(' ')
-x_max = lr_split[4]
-y_min = lr_split[5]
-
-# ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
-x_min = int(float(x_min) // 1000 * 1000)
-y_min = int(float(y_min) // 1000 * 1000)
-
-x_max = int((float(x_max) // 1000 + 1) * 1000)
-y_max = int((float(y_max) // 1000 + 1) * 1000)
-
-tile_size = int(resol * args.tilesize)
-
-for x in range(x_min, x_max, tile_size):
-    for y in range(y_min, y_max, tile_size):
-        cmd_vrt = (
-            'gdalbuildvrt '
-            + os.path.join(tiles_dir, str(x) + '_' + str(y) + '.vrt') + ' '
-            + path_out + '_32bits.vrt'
-            + ' -tr ' + str(resol) + ' ' + str(resol)
-            + ' -te ' + str(x) + ' ' + str(y) + ' ' + str(x+tile_size) + ' ' + str(y+tile_size)
-        )
-        if args.verbose > 0:
-            print(cmd_vrt)
-        os.system(cmd_vrt)
-
-t_end = time.perf_counter()
-
-# temps de calcul total
-if args.verbose > 0:
-    print(f"Temps global du calcul : {str(round(t_end-t_start, 2))}")
diff --git a/scripts/vectorise_graph.py b/scripts/vectorise_graph.py
deleted file mode 100644
index cd40576b4..000000000
--- a/scripts/vectorise_graph.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Script de vectorisation du graphe à partir d'un cache d'images COG
-"""
-import os
-import argparse
-import json
-import platform
-import time
-
-
-def read_args():
-    """Gestion des arguments"""
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input", required=True,
-                        help="input data folder (created by prep_vectorise_graph.py)")
-    parser.add_argument("-c", "--cache", required=True, help="cache folder")
-    parser.add_argument("-o", "--output", required=True, help="output json gpao filepath")
-    parser.add_argument("-g", "--graph",
-                        help="output vectorised graph pathfile (default: OUTPUT.gpkg)",
-                        default=None)
-    parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
-    args_vectorise = parser.parse_args()
-
-    if args_vectorise.graph is None:
-        args_vectorise.graph = args_vectorise.output.split('.')[0] + '.gpkg'
-
-    if args_vectorise.verbose >= 1:
-        print("\nArguments: ", args_vectorise)
-
-    return args_vectorise
-
-
-args = read_args()
-
-# define working dir
-os.chdir(os.path.dirname(args.output))
-print(f"INFO : Le répertoire de travail est '{os.getcwd()}'")
-# redefine input directory
-try:
-    # we always want to use absolute path because it will be processed in GPAOv2
-    # (no relative path possible because the exec instance is generated within the GPAOv2)
-    args.input = os.path.abspath(args.input)
-except ValueError:
-    print("No absolute path possible for input")
-
-# check if input dir exists
-if not os.path.exists(args.input):
-    raise SystemExit(f"ERREUR : Le répertoire '{args.input}' n'existe pas.")
-
-t_start = time.perf_counter()
-
-overviews_path = os.path.join(args.cache, "overviews.json")
-
-# lecture du fichier overviews pour recuperer les infos du cache
-try:
-    with open(overviews_path, encoding='utf-8') as fileOverviews:
-        overviews = json.load(fileOverviews)
-except IOError:
-    print(f"ERREUR: Le fichier '{overviews_path}' n'existe pas.")
-
-PROJ = str(overviews['crs']['type']) + ':' + str(overviews['crs']['code'])
-
-project_name = os.path.basename(args.output.split('.')[0])
-if args.verbose > 0:
-    print(f"Nom du chantier = '{project_name}'")
-
-dict_cmd = {"projects": []}
-
-# debut chantier polygonize
-dict_cmd["projects"].append({"name": str(project_name+'_polygonize'), "jobs": []})
-
-script = "gdal_polygonize.py"
-if platform.system() == "Windows":
-    script = script.split('.', maxsplit=1)[0]+".bat"
-
-tiles_dir = os.path.join(args.input, "tiles")
-gpkg_dir = os.path.join(args.input, "gpkg")
-
-if not os.path.exists(gpkg_dir):
-    os.mkdir(gpkg_dir)
-
-# on recupere la liste des tuiles creees
-list_tiles_graph = os.listdir(tiles_dir)
-
-# on vectorise chaque tuile separement
-for tile in list_tiles_graph:
-    if args.verbose > 0:
-        print(tile)
-    gpkg_path = os.path.join(gpkg_dir, tile.split('.')[0] + '.gpkg')
-    if os.path.exists(gpkg_path):
-        print(f"Le fichier '{gpkg_path}' existe déjà. On le supprime avant de relancer le calcul.")
-        os.remove(gpkg_path)
-    cmd_polygonize = (
-        script + ' '
-        + os.path.join(tiles_dir, tile) + ' '
-        + gpkg_path
-        + ' -f "GPKG" '
-        + '-mask ' + os.path.join(tiles_dir, tile)
-    )
-    if args.verbose > 0:
-        print(cmd_polygonize)
-    dict_cmd["projects"][0]["jobs"].append(
-        {"name": "polygonize_"+tile.split('.')[0], "command": cmd_polygonize}
-    )
-
-# fin chantier polygonize
-
-# debut chantier merge
-# le traitement est divise en deux chantiers de GPAO avec le second dependant du premier
-# le premier (chantier polygonize) contient l'ensemble des gdal_polygonize
-# le second (chantier merge) va contenir les traitements
-# permettant de passer des geopackages calcules par dalle
-# a un geopackage global pour toute l'emprise de notre chantier
-dict_cmd["projects"].append({"name": str(project_name+'_merge'), "jobs": [], "deps": [{"id": 0}]})
-
-script_merge = "ogrmerge.py"
-if platform.system() == "Windows":
-    script_merge = script_merge.split('.', maxsplit=1)[0]+".bat"
-
-merge_file = project_name + '_merge.gpkg'
-merge_path = os.path.join(args.input, merge_file)
-all_gpkg = os.path.join(gpkg_dir, '*.gpkg')
-cmd_merge = (
-    script_merge
-    + ' -o ' + merge_path
-    + ' ' + all_gpkg
-    + ' -a_srs ' + PROJ
-    + ' -nln data'
-    + ' -single'
-    + ' -field_strategy Union'
-    + ' -overwrite_ds'
-)
-if args.verbose > 0:
-    print(cmd_merge)
-dict_cmd["projects"][1]["jobs"].append({"name": "ogrmerge", "command": cmd_merge})
-
-clean_file = project_name + '_clean.gpkg'
-clean_path = os.path.join(args.input, clean_file)
-cmd_clean = (
-        'ogr2ogr '
-        + clean_path + ' '
-        + merge_path
-        + ' -overwrite'
-        + ' -nlt PROMOTE_TO_MULTI'
-        + ' -makevalid'
-)
-if args.verbose > 0:
-    print(cmd_clean)
-dict_cmd["projects"][1]["jobs"].append(
-    {"name": "clean", "command": cmd_clean, "deps": [{"id": 0}]}
-)
-
-dissolve_file = project_name + '_dissolve.gpkg'
-dissolve_path = os.path.join(args.input, dissolve_file)
-cmd_dissolve = (
-    'ogr2ogr '
-    + dissolve_path + ' '
-    + merge_path
-    + ' -nlt PROMOTE_TO_MULTI'
-    + ' -nln data'
-    + ' -dialect sqlite'
-    + ' -sql "SELECT DN as color, ST_union(geom) as geom FROM data GROUP BY DN"'
-)
-if args.verbose > 0:
-    print(cmd_dissolve)
-dict_cmd["projects"][1]["jobs"].append(
-    {"name": "dissolve", "command": cmd_dissolve, "deps": [{"id": 1}]}
-)
-
-cmd_make_valid = (
-    'ogr2ogr '
-    + args.output.split('.')[0] + '_final.gpkg '
-    + dissolve_path
-    + ' -overwrite'
-    + ' -nlt PROMOTE_TO_MULTI'
-    + ' -makevalid'
-)
-if args.verbose > 0:
-    print(cmd_make_valid)
-dict_cmd["projects"][1]["jobs"].append(
-    {"name": "make_valid", "command": cmd_make_valid, "deps": [{"id": 2}]}
-)
-
-# fin chantier merge
-
-json_file = args.output
-with open(json_file, "w", encoding='utf-8') as out_file:
-    json.dump(dict_cmd, out_file, indent=4)
-
-t_end = time.perf_counter()
-
-# temps de calcul total
-if args.verbose > 0:
-    print(f"Temps global du calcul : {str(round(t_end-t_start, 2))}")

From 96a14367056d7926fd66f90ef5f725bc33389197 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Tue, 13 Jun 2023 15:37:56 +0200
Subject: [PATCH 2/7] refactor(export): add bbox option and corrections for
 pylint

---
 scripts/export_graph/export_graph.py         |  18 ++-
 scripts/export_graph/prep_vectorise_graph.py | 117 +++++++++++--------
 scripts/export_graph/vectorise_graph.py      |  15 ++-
 3 files changed, 95 insertions(+), 55 deletions(-)

diff --git a/scripts/export_graph/export_graph.py b/scripts/export_graph/export_graph.py
index 87d1a419d..001ede7a3 100644
--- a/scripts/export_graph/export_graph.py
+++ b/scripts/export_graph/export_graph.py
@@ -30,17 +30,28 @@ def read_args():
     parser.add_argument("-t", "--tilesize",
                         help="tile size (in pixels) for vectorising graph tiles (default: 100000)",
                         type=int, default=100000)
+    parser.add_argument("--bbox", help="bbox for export (in meters), xmin ymin xmax ymax",
+                        type=int, nargs=4)
     parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
     args_prep = parser.parse_args()
 
     if args_prep.verbose >= 1:
         print("\nArguments: ", args_prep)
 
+    coords = str(args_prep.bbox).split(' ')
+    if any(elem is None for elem in coords) and any(elem is not None for elem in coords):
+        raise SystemError("ERROR: all bbox coordinates must be specified")
+
     return args_prep
 
 
 args = read_args()
 
+# TODO : gérer des dalles de NODATA en bord de chantier
+# TODO : pouvoir ajouter un tag gpao dans le chantier
+# TODO : export mtd optionnel si cache n'en contient pas ? Tester comportement sans mtd || OK
+
+print(f"args = {args}")
 # check input url
 url_pattern = r'^https?:\/\/[0-9A-z.]+\:[0-9]+$'
 if not re.match(url_pattern, args.url):
@@ -80,7 +91,10 @@ def read_args():
 list_patches, id_branch_patch = prep.list_patches(list_patches_api, cache_path, path_depth, level)
 
 # create correct path out
-path_out = os.path.join(args.output, os.path.basename(cache_path))
+if os.path.basename(cache_path) != "..":
+    path_out = os.path.join(args.output, os.path.basename(cache_path))
+else:
+    path_out = os.path.join(args.output, os.path.basename(args.output))
 
 # on verifie que la branch donne est correcte
 # encore necessaire vu qu'on va chercher les patches via id_branch ?
@@ -95,7 +109,7 @@ def read_args():
 prep.build_vrt_32bits(path_out)
 
 # creation des dalles de vrt pour la vectorisation
-prep.create_tiles_vrt(args.output, path_out, resol, args.tilesize)
+prep.create_tiles_vrt(args.output, path_out, resol, args.tilesize, args.bbox)
 
 # preparation du fichier pour la gpao
 dict_cmd = {"projects": []}
diff --git a/scripts/export_graph/prep_vectorise_graph.py b/scripts/export_graph/prep_vectorise_graph.py
index efda1ab4e..244bd63ed 100644
--- a/scripts/export_graph/prep_vectorise_graph.py
+++ b/scripts/export_graph/prep_vectorise_graph.py
@@ -15,12 +15,13 @@
 
 
 def check_overviews(cache):
+    """Check if overviews file is compliant with standard"""
     overviews_path = os.path.join(cache, "overviews.json")
 
     # lecture du fichier overviews pour recuperer les infos du cache
     try:
-        with open(overviews_path, encoding='utf-8') as fileOverviews:
-            overviews = json.load(fileOverviews)
+        with open(overviews_path, encoding='utf-8') as file_overviews:
+            overviews = json.load(file_overviews)
     except IOError:
         print(f"ERREUR: Le fichier '{overviews_path}' n'existe pas.")
 
@@ -45,6 +46,7 @@ def check_overviews(cache):
 
 # on recupere les infos concernant les patches dans le json en entree
 def list_patches(patches, cache, path_depth, level):
+    """Get data for patches in input json"""
     if 'features' not in patches:
         raise SystemExit(f"ERROR: Attribute 'features' does not exist in '{patches}'.")
     patches = patches['features']
@@ -56,8 +58,8 @@ def list_patches(patches, cache, path_depth, level):
             slabs = patch['properties']['slabs']
             id_branch_patch = patch['properties']['id_branch']
             for slab in slabs:
-                x = slab[0]
-                y = slab[1]
+                x = slab[0]  # pylint: disable=C0103
+                y = slab[1]  # pylint: disable=C0103
 
                 slab_path = get_slab_path(int(x), int(y), int(path_depth))
                 tile_path = os.path.join(cache, 'graph', str(level), slab_path[1:])
@@ -67,6 +69,7 @@ def list_patches(patches, cache, path_depth, level):
 
 
 def check_branch_patch(branch, id_branch_patch):
+    """Check if input branch exists"""
     if branch and id_branch_patch and int(branch) != id_branch_patch:
         raise SystemExit(f"** ERREUR: "
                          f"Pas de correspondance entre la branche indiquée '{branch}' "
@@ -78,6 +81,7 @@ def check_branch_patch(branch, id_branch_patch):
 
 
 def create_tiles(cache, level, branch, path_out, list_patches):
+    """Create tile list for vectorization"""
     graph_dir = os.path.join(cache, 'graph', str(level))
 
     # on parcourt le repertoire graph du cache pour recuperer l'ensemble des images de graphe
@@ -107,6 +111,7 @@ def create_tiles(cache, level, branch, path_out, list_patches):
 
 
 def build_full_vrt(path_out, resol):
+    """Build full vrt"""
     # on construit un vrt a partir de la liste des images recuperee precedemment
     cmd_buildvrt = (
         'gdalbuildvrt'
@@ -120,6 +125,7 @@ def build_full_vrt(path_out, resol):
 
 
 def build_vrt_emprise(path_out):
+    """Build vrt to get correct spatial hold"""
     # on construit un 2eme vrt à partir du premier
     # (pour avoir la bonne structure avec les bons parametres : notamment l'emprise)
     cmd_buildvrt2 = (
@@ -131,25 +137,26 @@ def build_vrt_emprise(path_out):
 
 
 def build_vrt_32bits(path_out):
+    """Build vrt from a 3-8bits channels to 32bits monochannel"""
     # modification du VRT pour passage 32bits
-    with open(path_out + '_tmp.vrt', 'r', encoding='utf-8') as f:
-        lines = f.readlines()
-    with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as f:
+    with open(path_out + '_tmp.vrt', 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+    with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as file:
         for line in lines:
             # on ecrit le code python au bon endroit dans le VRT
             if 'band="1"' in line:
-                f.write('\t<VRTRasterBand dataType="Int32" band="1" '
-                        'subClass="VRTDerivedRasterBand">\n')
-                f.write('\t<PixelFunctionType>color_to_int32</PixelFunctionType>\n')
-                f.write('\t<PixelFunctionLanguage>Python</PixelFunctionLanguage>\n')
-                f.write('\t<PixelFunctionCode>\n')
-                f.write('<![CDATA[\n')
-                f.write('import numpy as np\n')
-                f.write('def color_to_int32(in_ar, out_ar, xoff, yoff, xsize, ysize, \
+                file.write('\t<VRTRasterBand dataType="Int32" band="1" '
+                           'subClass="VRTDerivedRasterBand">\n')
+                file.write('\t<PixelFunctionType>color_to_int32</PixelFunctionType>\n')
+                file.write('\t<PixelFunctionLanguage>Python</PixelFunctionLanguage>\n')
+                file.write('\t<PixelFunctionCode>\n')
+                file.write('<![CDATA[\n')
+                file.write('import numpy as np\n')
+                file.write('def color_to_int32(in_ar, out_ar, xoff, yoff, xsize, ysize, \
                         raster_xsize, raster_ysize, buf_radius, gt, **kwargs):\n')
-                f.write('\tout_ar[:] = in_ar[0] + 256 * in_ar[1] + 256 * 256 * in_ar[2]\n')
-                f.write(']]>\n')
-                f.write('\t</PixelFunctionCode>\n')
+                file.write('\tout_ar[:] = in_ar[0] + 256 * in_ar[1] + 256 * 256 * in_ar[2]\n')
+                file.write(']]>\n')
+                file.write('\t</PixelFunctionCode>\n')
             elif 'band="2"' in line:
                 pass
             elif 'band="3"' in line:
@@ -157,49 +164,57 @@ def build_vrt_32bits(path_out):
             elif '</VRTRaster' in line:
                 pass
             elif '<OverviewList' in line:
-                f.write('\t</VRTRasterBand>\n')
-                f.write(line)
+                file.write('\t</VRTRasterBand>\n')
+                file.write(line)
             else:
-                f.write(line)
+                file.write(line)
 
 
-def create_tiles_vrt(output, path_out, resol, tilesize):
+def create_tiles_vrt(output, path_out, resol, tilesize, bbox):
+    """Create command line for each tile to be vectorized"""
     tiles_dir = os.path.join(os.path.abspath(output), 'tiles')
     if not os.path.exists(tiles_dir):
         os.mkdir(tiles_dir)
 
-    # on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
-    info = gdal.Info(path_out + '_32bits.vrt')
-    info_list = info.split('\n')
-
-    ul, lr = '', ''
-    for line in info_list:
-        if 'Upper Left' in line:
-            ul = line
-        elif 'Lower Right' in line:
-            lr = line
-
-    ul = ul.replace('(', '').replace(')', '').replace(',', '')
-    ul_split = ul.split(' ')
-    x_min = ul_split[5]
-    y_max = ul_split[6]
-
-    lr = lr.replace('(', '').replace(')', '').replace(',', '')
-    lr_split = lr.split(' ')
-    x_max = lr_split[4]
-    y_min = lr_split[5]
-
-    # ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
-    x_min = int(float(x_min) // 1000 * 1000)
-    y_min = int(float(y_min) // 1000 * 1000)
-
-    x_max = int((float(x_max) // 1000 + 1) * 1000)
-    y_max = int((float(y_max) // 1000 + 1) * 1000)
+    if any(elem is None for elem in str(bbox).split(' ')) or bbox is None:
+        # on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
+        info = gdal.Info(path_out + '_32bits.vrt')
+        info_list = info.split('\n')
+
+        upper_left, lower_right = '', ''
+        for line in info_list:
+            if 'Upper Left' in line:
+                upper_left = line
+            elif 'Lower Right' in line:
+                lower_right = line
+
+        upper_left = upper_left.replace('(', '').replace(')', '').replace(',', '')
+        ul_split = upper_left.split(' ')
+        x_min = ul_split[5]
+        y_max = ul_split[6]
+
+        lower_right = lower_right.replace('(', '').replace(')', '').replace(',', '')
+        lr_split = lower_right.split(' ')
+        x_max = lr_split[4]
+        y_min = lr_split[5]
+
+        # ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
+        x_min = int(float(x_min) // 1000 * 1000)
+        y_min = int(float(y_min) // 1000 * 1000)
+
+        x_max = int((float(x_max) // 1000 + 1) * 1000)
+        y_max = int((float(y_max) // 1000 + 1) * 1000)
+    else:
+        coords = str(bbox).split(' ')
+        x_min = coords[0]
+        y_min = coords[1]
+        x_max = coords[2]
+        y_max = coords[3]
 
     tile_size = int(resol * tilesize)
 
-    for x in range(x_min, x_max, tile_size):
-        for y in range(y_min, y_max, tile_size):
+    for x in range(x_min, x_max, tile_size):  # pylint: disable=C0103
+        for y in range(y_min, y_max, tile_size):  # pylint: disable=C0103
             cmd_vrt = (
                 'gdalbuildvrt '
                 + os.path.join(tiles_dir, str(x) + '_' + str(y) + '.vrt') + ' '
diff --git a/scripts/export_graph/vectorise_graph.py b/scripts/export_graph/vectorise_graph.py
index 576667b8e..9e954adae 100644
--- a/scripts/export_graph/vectorise_graph.py
+++ b/scripts/export_graph/vectorise_graph.py
@@ -10,6 +10,7 @@
 
 # creation du chantier de vectorisation
 def create_chantier_polygonize(dict_cmd, output, project_name):
+    """Create vectorize project"""
     dict_cmd["projects"].append({"name": str(project_name+'_polygonize'), "jobs": []})
 
     script = "gdal_polygonize.py"
@@ -53,11 +54,13 @@ def create_chantier_polygonize(dict_cmd, output, project_name):
 # permettant de passer des geopackages calcules par dalle
 # a un geopackage global pour toute l'emprise de notre chantier
 def add_chantier_merge(dict_cmd, project_name):
+    """Add merge project to dictionary"""
     dict_cmd["projects"].append({"name": str(project_name+'_merge'),
                                  "jobs": [], "deps": [{"id": 0}]})
 
 
 def add_job_merge(dict_cmd, output, project_name, proj):
+    """Add merge job to merge project"""
     script_merge = "ogrmerge.py"
     if platform.system() == "Windows":
         script_merge = script_merge.split('.', maxsplit=1)[0]+".bat"
@@ -85,6 +88,7 @@ def add_job_merge(dict_cmd, output, project_name, proj):
 
 
 def add_job_dissolve(dict_cmd, project_name, merge_path, tmp_dir):
+    """Add dissolve job to merge project"""
     dissolve_file = project_name + '_dissolve.gpkg'
     dissolve_path = os.path.join(tmp_dir, dissolve_file)
     cmd_dissolve = (
@@ -107,16 +111,18 @@ def add_job_dissolve(dict_cmd, project_name, merge_path, tmp_dir):
 
 # chantier metadonnees
 def add_chantier_mtd(dict_cmd, project_name):
+    """Add mtd project to dictionary"""
     dict_cmd["projects"].append({"name": str(project_name+'_mtd'),
                                  "jobs": [], "deps": [{"id": 1}]})
 
 
 # recuperation des metadonnees
 def add_table_mtd(dissolve_path):
+    """Add mtd table creation to mtd project"""
     conn = sqlite3.connect(dissolve_path)
-    c = conn.cursor()
+    cursor = conn.cursor()
 
-    c.execute('''
+    cursor.execute('''
               CREATE TABLE IF NOT EXISTS mtd
               ([id] INTEGER PRIMARY KEY, [color_32] INTEGER, [opi] TEXT, [rgb] TEXT,
               [date_cliche] TEXT, [time_ut_cliche] TEXT)
@@ -128,6 +134,7 @@ def add_table_mtd(dissolve_path):
 
 # TODO: verifier json gpao dans TNR ?
 def create_mtd_dico(tmp_dir, overviews):
+    """Create mtd list for OPIs"""
     dico = {}
 
     mtd_file = os.path.join(tmp_dir, 'mtd.csv')
@@ -148,6 +155,7 @@ def create_mtd_dico(tmp_dir, overviews):
 
 # conversion gpkg -> fgb
 def add_job_gpkg_to_fgb(dict_cmd, dissolve_path):
+    """Create gpkg to fgb job"""
     cmd_gpkg_to_fgb = (
         'ogr2ogr '
         + '-f FlatGeobuf '
@@ -161,6 +169,7 @@ def add_job_gpkg_to_fgb(dict_cmd, dissolve_path):
 
 # jointure metadonnees
 def add_job_join_mtd(dict_cmd, dissolve_path, mtd_file):
+    """Add mtd joint to mtd project"""
     cmd_join_mtd = (
         'ogr2ogr '
         + '-dialect sqlite '
@@ -176,6 +185,7 @@ def add_job_join_mtd(dict_cmd, dissolve_path, mtd_file):
 
 # conversion fgb -> gpkg
 def add_job_fbg_to_gpkg(dict_cmd, dissolve_path, output, project_name):
+    """Add fbg to gpkg job to mts project"""
     gpkg_file = project_name + '_mtd.gpkg'
     gpkg_path = os.path.join(output, gpkg_file)
     cmd_fgb_to_gpkg = (
@@ -193,6 +203,7 @@ def add_job_fbg_to_gpkg(dict_cmd, dissolve_path, output, project_name):
 
 # TODO : supprimer dossier temp si necessaire
 def write_json_file(dict_cmd, output, project_name):
+    """Write dictionary into json file"""
     json_file = project_name + "_gpao.json"
     json_path = os.path.join(output, json_file)
     with open(json_path, "w", encoding='utf-8') as out_file:

From ee22bf2dd0539526ff481f3fb9950afe14b178b7 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Fri, 6 Oct 2023 08:48:51 +0200
Subject: [PATCH 3/7] refactor(export): consistency of use of tile/slab and
 others

---
 scripts/export_graph/export_graph.py         | 23 ++++++-----
 scripts/export_graph/prep_vectorise_graph.py | 42 ++++++++++----------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/scripts/export_graph/export_graph.py b/scripts/export_graph/export_graph.py
index 001ede7a3..e250986dc 100644
--- a/scripts/export_graph/export_graph.py
+++ b/scripts/export_graph/export_graph.py
@@ -38,10 +38,20 @@ def read_args():
     if args_prep.verbose >= 1:
         print("\nArguments: ", args_prep)
 
+    # check input url
+    url_pattern = r'^https?:\/\/[0-9A-z.]+\:[0-9]+$'
+    if not re.match(url_pattern, args_prep.url):
+        raise SystemExit(f"ERROR: URL '{args_prep.url}' is invalid")
+
+    # check bbox
     coords = str(args_prep.bbox).split(' ')
     if any(elem is None for elem in coords) and any(elem is not None for elem in coords):
         raise SystemError("ERROR: all bbox coordinates must be specified")
 
+    # verifier tous les parametres
+
+    # verifier l'existence de la branche d'id args.branch
+
     return args_prep
 
 
@@ -51,12 +61,6 @@ def read_args():
 # TODO : pouvoir ajouter un tag gpao dans le chantier
 # TODO : export mtd optionnel si cache n'en contient pas ? Tester comportement sans mtd || OK
 
-print(f"args = {args}")
-# check input url
-url_pattern = r'^https?:\/\/[0-9A-z.]+\:[0-9]+$'
-if not re.match(url_pattern, args.url):
-    raise SystemExit(f"ERROR: URL '{args.url}' is invalid")
-
 # recuperer les patches correspondant a la branche desiree (requete curl)
 patches_files = os.path.join(args.output, 'patches.json')
 req_get_patches = f'{args.url}/{args.branch}/patches'
@@ -87,7 +91,7 @@ def read_args():
 # on verifie si l'overviews utilise est bien correct
 path_depth, level, resol, proj, overviews = prep.check_overviews(cache_path)
 
-# recupere la liste des tiles impactes par les patches sur le chantier
+# recupere la liste des dalles impactees par les retouches sur le chantier
 list_patches, id_branch_patch = prep.list_patches(list_patches_api, cache_path, path_depth, level)
 
 # create correct path out
@@ -98,10 +102,11 @@ def read_args():
 
 # on verifie que la branch donne est correcte
 # encore necessaire vu qu'on va chercher les patches via id_branch ?
+# verif dans argsparser, a supprimer
 prep.check_branch_patch(args.branch, id_branch_patch)
 
-# on cree un vrt pour chaque tuile ddu cache
-prep.create_tiles(cache_path, level, args.branch, path_out, list_patches)
+# on recupere la liste des dalles impactees par les patches
+prep.create_list_slabs(cache_path, level, args.branch, path_out, list_patches)
 
 # creation des vrt intermediaires
 prep.build_full_vrt(path_out, resol)
diff --git a/scripts/export_graph/prep_vectorise_graph.py b/scripts/export_graph/prep_vectorise_graph.py
index 244bd63ed..84c756407 100644
--- a/scripts/export_graph/prep_vectorise_graph.py
+++ b/scripts/export_graph/prep_vectorise_graph.py
@@ -62,8 +62,8 @@ def list_patches(patches, cache, path_depth, level):
                 y = slab[1]  # pylint: disable=C0103
 
                 slab_path = get_slab_path(int(x), int(y), int(path_depth))
-                tile_path = os.path.join(cache, 'graph', str(level), slab_path[1:])
-                list_patches.append(os.path.abspath(tile_path + '.tif'))
+                full_slab_path = os.path.join(cache, 'graph', str(level), slab_path[1:])
+                list_patches.append(os.path.abspath(full_slab_path + '.tif'))
 
     return list_patches, id_branch_patch
 
@@ -80,34 +80,34 @@ def check_branch_patch(branch, id_branch_patch):
         branch = str(id_branch_patch)
 
 
-def create_tiles(cache, level, branch, path_out, list_patches):
-    """Create tile list for vectorization"""
+def create_list_slabs(cache, level, branch, path_out, list_patches):
+    """Create slabs list for vectorization"""
     graph_dir = os.path.join(cache, 'graph', str(level))
 
     # on parcourt le repertoire graph du cache pour recuperer l'ensemble des images de graphe
-    list_tiles = []
+    list_slabs = []
     for (root, dirs, files) in os.walk(graph_dir):
         for file in files:
             file = os.path.join(root, file)
-            list_tiles.append(os.path.abspath(file))
+            list_slabs.append(os.path.abspath(file))
 
     # fichier intermediaire contenant la liste de images pour le vrt
     with open(path_out + '.txt', 'w', encoding='utf-8') as f_out:
-        for tile in list_tiles:
-            # il faut filtrer uniquement les tuiles presentes a l'origine
+        for slab in list_slabs:
+            # il faut filtrer uniquement les dalles presentes a l'origine
             # on recupere juste le nom de la dalle sans extension -> 2 caracteres
-            filename = os.path.basename(tile).split('.')[0]
+            filename = os.path.basename(slab).split('.')[0]
 
-            if len(filename) > 2:  # cas des tuiles avec retouche
+            if len(filename) > 2:  # cas des dalles avec retouche
                 continue
-            if tile in list_patches:
-                # dans ce cas il faut ajouter la tuile index_branche + tilename a la liste
-                tilename = os.path.basename(tile)
-                tile_path = os.path.join(os.path.dirname(tile), str(branch) + '_' + tilename)
-                f_out.write(tile_path + '\n')
+            if slab in list_patches:
+                # dans ce cas il faut ajouter la dalle index_branche + slab_name a la liste
+                slab_name = os.path.basename(slab)
+                slab_path = os.path.join(os.path.dirname(slab), str(branch) + '_' + slab_name)
+                f_out.write(slab_path + '\n')
             else:
-                # on ajoute la tuile d'origine dans la liste pour creer le vrt
-                f_out.write(tile + '\n')
+                # on ajoute la dalle d'origine dans la liste pour creer le vrt
+                f_out.write(slab + '\n')
 
 
 def build_full_vrt(path_out, resol):
@@ -117,7 +117,7 @@ def build_full_vrt(path_out, resol):
         'gdalbuildvrt'
         + ' -input_file_list '
         + path_out + '.txt '
-        + path_out + '_graphTiles.vrt'
+        + path_out + '_graph_tmp.vrt'
         + ' -tap'
         + ' -tr ' + str(resol) + ' ' + str(resol) + ' '
     )
@@ -130,8 +130,8 @@ def build_vrt_emprise(path_out):
     # (pour avoir la bonne structure avec les bons parametres : notamment l'emprise)
     cmd_buildvrt2 = (
         'gdalbuildvrt '
-        + path_out + '_tmp.vrt '
-        + path_out + '_graphTiles.vrt'
+        + path_out + '_emprise_tmp.vrt '
+        + path_out + '_graph_tmp.vrt'
     )
     os.system(cmd_buildvrt2)
 
@@ -139,7 +139,7 @@ def build_vrt_emprise(path_out):
 def build_vrt_32bits(path_out):
     """Build vrt from a 3-8bits channels to 32bits monochannel"""
     # modification du VRT pour passage 32bits
-    with open(path_out + '_tmp.vrt', 'r', encoding='utf-8') as file:
+    with open(path_out + '_emprise_tmp.vrt', 'r', encoding='utf-8') as file:
         lines = file.readlines()
     with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as file:
         for line in lines:

From 0ca8d7117121536b31ac272421960ded8d2050c6 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Thu, 12 Oct 2023 11:21:24 +0200
Subject: [PATCH 4/7] refactor(export): add more tmp data during process

---
 scripts/export_graph/export_graph.py         | 16 +++++++++++++---
 scripts/export_graph/prep_vectorise_graph.py | 11 ++++++-----
 scripts/export_graph/vectorise_graph.py      |  6 +++---
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/scripts/export_graph/export_graph.py b/scripts/export_graph/export_graph.py
index e250986dc..793163865 100644
--- a/scripts/export_graph/export_graph.py
+++ b/scripts/export_graph/export_graph.py
@@ -22,14 +22,14 @@ def read_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("-c", "--cache", required=True, type=str, help="path of input cache")
     parser.add_argument("-o", "--output", required=True, help="output folder")
-    parser.add_argument("-b", "--branch", required=True,
+    parser.add_argument("-b", "--branch", required=True, type=int,
                         help="id of branch of cache to use as source for patches")
     parser.add_argument('-u', '--url',
                         help="http://[serveur]:[port] (default: http://localhost:8081)",
                         type=str, default='http://localhost:8081')
     parser.add_argument("-t", "--tilesize",
-                        help="tile size (in pixels) for vectorising graph tiles (default: 100000)",
-                        type=int, default=100000)
+                        help="tile size (in pixels) for vectorising graph tiles (default: 5000)",
+                        type=int, default=5000)
     parser.add_argument("--bbox", help="bbox for export (in meters), xmin ymin xmax ymax",
                         type=int, nargs=4)
     parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
@@ -38,11 +38,21 @@ def read_args():
     if args_prep.verbose >= 1:
         print("\nArguments: ", args_prep)
 
+    # check input cache
+    if not os.path.exists(args_prep.cache):
+        raise SystemExit(f"ERROR: folder '{args_prep.cache}' does not exist")
+
+    # check branch -> voir create_qgis_view
+
     # check input url
     url_pattern = r'^https?:\/\/[0-9A-z.]+\:[0-9]+$'
     if not re.match(url_pattern, args_prep.url):
         raise SystemExit(f"ERROR: URL '{args_prep.url}' is invalid")
 
+    # check tilesize > 0
+    if args_prep.tilesize <= 0:
+        raise SystemExit("ERROR: tilesize must be greater that zero")
+
     # check bbox
     coords = str(args_prep.bbox).split(' ')
     if any(elem is None for elem in coords) and any(elem is not None for elem in coords):
diff --git a/scripts/export_graph/prep_vectorise_graph.py b/scripts/export_graph/prep_vectorise_graph.py
index 84c756407..eda892755 100644
--- a/scripts/export_graph/prep_vectorise_graph.py
+++ b/scripts/export_graph/prep_vectorise_graph.py
@@ -136,12 +136,13 @@ def build_vrt_emprise(path_out):
     os.system(cmd_buildvrt2)
 
 
+# ajouter _tmp au vrt_32bits ?
 def build_vrt_32bits(path_out):
     """Build vrt from a 3-8bits channels to 32bits monochannel"""
     # modification du VRT pour passage 32bits
     with open(path_out + '_emprise_tmp.vrt', 'r', encoding='utf-8') as file:
         lines = file.readlines()
-    with open(path_out + '_32bits.vrt', 'w', encoding='utf-8') as file:
+    with open(path_out + '_32bits_tmp.vrt', 'w', encoding='utf-8') as file:
         for line in lines:
             # on ecrit le code python au bon endroit dans le VRT
             if 'band="1"' in line:
@@ -172,13 +173,13 @@ def build_vrt_32bits(path_out):
 
 def create_tiles_vrt(output, path_out, resol, tilesize, bbox):
     """Create command line for each tile to be vectorized"""
-    tiles_dir = os.path.join(os.path.abspath(output), 'tiles')
+    tiles_dir = os.path.join(os.path.abspath(output), 'tmp', 'tiles')
     if not os.path.exists(tiles_dir):
-        os.mkdir(tiles_dir)
+        os.makedirs(tiles_dir)
 
     if any(elem is None for elem in str(bbox).split(' ')) or bbox is None:
         # on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
-        info = gdal.Info(path_out + '_32bits.vrt')
+        info = gdal.Info(path_out + '_32bits_tmp.vrt')
         info_list = info.split('\n')
 
         upper_left, lower_right = '', ''
@@ -218,7 +219,7 @@ def create_tiles_vrt(output, path_out, resol, tilesize, bbox):
             cmd_vrt = (
                 'gdalbuildvrt '
                 + os.path.join(tiles_dir, str(x) + '_' + str(y) + '.vrt') + ' '
-                + path_out + '_32bits.vrt'
+                + path_out + '_32bits_tmp.vrt'
                 + ' -tr ' + str(resol) + ' ' + str(resol)
                 + ' -te ' + str(x) + ' ' + str(y) + ' '
                 + str(x + tile_size) + ' ' + str(y + tile_size)
diff --git a/scripts/export_graph/vectorise_graph.py b/scripts/export_graph/vectorise_graph.py
index 9e954adae..f883e9271 100644
--- a/scripts/export_graph/vectorise_graph.py
+++ b/scripts/export_graph/vectorise_graph.py
@@ -17,8 +17,8 @@ def create_chantier_polygonize(dict_cmd, output, project_name):
     if platform.system() == "Windows":
         script = script.split('.', maxsplit=1)[0]+".bat"
 
-    tiles_dir = os.path.join(output, "tiles")
-    gpkg_dir = os.path.join(output, "gpkg")
+    tiles_dir = os.path.join(output, 'tmp', 'tiles')
+    gpkg_dir = os.path.join(output, 'tmp', 'gpkg')
 
     if not os.path.exists(gpkg_dir):
         os.mkdir(gpkg_dir)
@@ -65,7 +65,7 @@ def add_job_merge(dict_cmd, output, project_name, proj):
     if platform.system() == "Windows":
         script_merge = script_merge.split('.', maxsplit=1)[0]+".bat"
 
-    gpkg_dir = os.path.join(output, "gpkg")
+    gpkg_dir = os.path.join(output, 'tmp', 'gpkg')
     tmp_dir = os.path.join(output, 'tmp')
     if not os.path.exists(tmp_dir):
         os.mkdir(tmp_dir)

From c66de6dd0ac25a9739d713c709778214402c8674 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Fri, 27 Oct 2023 15:48:46 +0200
Subject: [PATCH 5/7] refactor(export): shapefile can be used to limit export
 area

---
 scripts/export_graph/export_graph.py         |  54 ++++++++--
 scripts/export_graph/prep_vectorise_graph.py | 101 +++++++++++--------
 2 files changed, 108 insertions(+), 47 deletions(-)

diff --git a/scripts/export_graph/export_graph.py b/scripts/export_graph/export_graph.py
index 793163865..d4e780d06 100644
--- a/scripts/export_graph/export_graph.py
+++ b/scripts/export_graph/export_graph.py
@@ -8,6 +8,7 @@
 import argparse
 import prep_vectorise_graph as prep
 import vectorise_graph as vect
+from osgeo import ogr
 
 current = os.path.dirname(os.path.realpath(__file__))
 parent = os.path.dirname(current)
@@ -32,6 +33,7 @@ def read_args():
                         type=int, default=5000)
     parser.add_argument("--bbox", help="bbox for export (in meters), xmin ymin xmax ymax",
                         type=int, nargs=4)
+    parser.add_argument("--shapefile", help="filepath of shapefile containing extent of export")
     parser.add_argument("-v", "--verbose", help="verbose (default: 0)", type=int, default=0)
     args_prep = parser.parse_args()
 
@@ -53,15 +55,30 @@ def read_args():
     if args_prep.tilesize <= 0:
         raise SystemExit("ERROR: tilesize must be greater that zero")
 
+    # check if bbox and extent are not both used
+    if args_prep.bbox is not None and args_prep.shapefile is not None:
+        raise SystemExit("ERROR: bbox and extent options were used, incompatible")
+
     # check bbox
-    coords = str(args_prep.bbox).split(' ')
-    if any(elem is None for elem in coords) and any(elem is not None for elem in coords):
-        raise SystemError("ERROR: all bbox coordinates must be specified")
+    if args_prep.bbox is not None:
+        coords = str(args_prep.bbox).split(' ')
+        if any(elem is None for elem in coords) and any(elem is not None for elem in coords):
+            raise SystemError("ERROR: all bbox coordinates must be specified")
+
+    # check extent, only shapefile
+    if args_prep.shapefile is not None:
+        if not os.path.exists(args_prep.shapefile):
+            raise SystemExit(f"ERROR: file {args_prep.shapefile} does not exist")
+        ext = os.path.splitext(args_prep.shapefile)[1]
+        if ext != '.shp':
+            raise SystemExit(f"ERROR: wrong extent file extension expected .shp, got {ext}")
+        driver = ogr.GetDriverByName("ESRI Shapefile")
+        data = driver.Open(args_prep.shapefile, 0)  # 0 = read-only
+        if data is None:
+            raise SystemExit("ERROR: data is null in ", args_prep.shapefile)
 
     # verifier tous les parametres
 
-    # verifier l'existence de la branche d'id args.branch
-
     return args_prep
 
 
@@ -115,8 +132,31 @@ def read_args():
 # verif dans argsparser, a supprimer
 prep.check_branch_patch(args.branch, id_branch_patch)
 
+# on recupere l'emprise du chantier si necessaire
+extent = None
+if args.bbox:
+    x_min = args.bbox[0]
+    y_min = args.bbox[1]
+    x_max = args.bbox[2]
+    y_max = args.bbox[3]
+
+    extent = prep.get_extent(x_min, x_max, y_min, y_max)
+elif args.shapefile:
+    # actuellement on prend seulement la bbox du chantier
+    driver = ogr.GetDriverByName("ESRI Shapefile")
+    data = driver.Open(args.shapefile, 0)  # 0 = read-only
+
+    layer = data.GetLayer()
+    layer_extent = layer.GetExtent()
+    x_min = layer_extent[0]
+    x_max = layer_extent[1]
+    y_min = layer_extent[2]
+    y_max = layer_extent[3]
+
+    extent = prep.get_extent(x_min, x_max, y_min, y_max)
+
 # on recupere la liste des dalles impactees par les patches
-prep.create_list_slabs(cache_path, level, args.branch, path_out, list_patches)
+prep.create_list_slabs(cache_path, level, args.branch, path_out, list_patches, extent)
 
 # creation des vrt intermediaires
 prep.build_full_vrt(path_out, resol)
@@ -124,7 +164,7 @@ def read_args():
 prep.build_vrt_32bits(path_out)
 
 # creation des dalles de vrt pour la vectorisation
-prep.create_tiles_vrt(args.output, path_out, resol, args.tilesize, args.bbox)
+prep.create_tiles_vrt(args.output, path_out, resol, args.tilesize)
 
 # preparation du fichier pour la gpao
 dict_cmd = {"projects": []}
diff --git a/scripts/export_graph/prep_vectorise_graph.py b/scripts/export_graph/prep_vectorise_graph.py
index eda892755..3f2e7d976 100644
--- a/scripts/export_graph/prep_vectorise_graph.py
+++ b/scripts/export_graph/prep_vectorise_graph.py
@@ -6,6 +6,7 @@
 import sys
 import json
 from osgeo import gdal
+from osgeo import ogr
 
 current = os.path.dirname(os.path.realpath(__file__))
 parent = os.path.dirname(current)
@@ -14,6 +15,21 @@
 from cache_def import get_slab_path  # noqa: E402
 
 
+def get_extent(xmin, xmax, ymin, ymax):
+    """ Return a polygon = bbox from coordinates """
+    ring = ogr.Geometry(ogr.wkbLinearRing)
+    ring.AddPoint(xmin, ymin)
+    ring.AddPoint(xmin, ymax)
+    ring.AddPoint(xmax, ymax)
+    ring.AddPoint(xmax, ymin)
+    ring.AddPoint(xmin, ymin)
+
+    extent = ogr.Geometry(ogr.wkbPolygon)
+    extent.AddGeometry(ring)
+
+    return extent
+
+
 def check_overviews(cache):
     """Check if overviews file is compliant with standard"""
     overviews_path = os.path.join(cache, "overviews.json")
@@ -80,24 +96,36 @@ def check_branch_patch(branch, id_branch_patch):
         branch = str(id_branch_patch)
 
 
-def create_list_slabs(cache, level, branch, path_out, list_patches):
+def create_list_slabs(cache, level, branch, path_out, list_patches, extent):
     """Create slabs list for vectorization"""
     graph_dir = os.path.join(cache, 'graph', str(level))
 
     # on parcourt le repertoire graph du cache pour recuperer l'ensemble des images de graphe
     list_slabs = []
-    for (root, dirs, files) in os.walk(graph_dir):
+    for (root, _, files) in os.walk(graph_dir):
         for file in files:
             file = os.path.join(root, file)
-            list_slabs.append(os.path.abspath(file))
 
-    # fichier intermediaire contenant la liste de images pour le vrt
+            # verifier si l'emprise du slab intersecte l'emprise du chantier
+            if extent is not None:
+                dataset = gdal.Open(file)
+                x_min, x_pixel, _, y_max, _, y_pixel = dataset.GetGeoTransform()
+                width, height = dataset.RasterXSize, dataset.RasterYSize
+                x_max = x_min + width * x_pixel
+                y_min = y_max + height * y_pixel
+
+                extent_slab = get_extent(x_min, x_max, y_min, y_max)
+                if extent_slab.Intersects(extent):
+                    list_slabs.append(os.path.abspath(file))
+            else:
+                list_slabs.append(os.path.abspath(file))
+
+    # fichier intermediaire contenant la liste des images pour le vrt
     with open(path_out + '.txt', 'w', encoding='utf-8') as f_out:
         for slab in list_slabs:
             # il faut filtrer uniquement les dalles presentes a l'origine
             # on recupere juste le nom de la dalle sans extension -> 2 caracteres
             filename = os.path.basename(slab).split('.')[0]
-
             if len(filename) > 2:  # cas des dalles avec retouche
                 continue
             if slab in list_patches:
@@ -171,46 +199,39 @@ def build_vrt_32bits(path_out):
                 file.write(line)
 
 
-def create_tiles_vrt(output, path_out, resol, tilesize, bbox):
+def create_tiles_vrt(output, path_out, resol, tilesize):
     """Create command line for each tile to be vectorized"""
     tiles_dir = os.path.join(os.path.abspath(output), 'tmp', 'tiles')
     if not os.path.exists(tiles_dir):
         os.makedirs(tiles_dir)
 
-    if any(elem is None for elem in str(bbox).split(' ')) or bbox is None:
-        # on recupere l'emprise globale du chantier dont on veut extraire xmin, xmax, ymin, ymax
-        info = gdal.Info(path_out + '_32bits_tmp.vrt')
-        info_list = info.split('\n')
-
-        upper_left, lower_right = '', ''
-        for line in info_list:
-            if 'Upper Left' in line:
-                upper_left = line
-            elif 'Lower Right' in line:
-                lower_right = line
-
-        upper_left = upper_left.replace('(', '').replace(')', '').replace(',', '')
-        ul_split = upper_left.split(' ')
-        x_min = ul_split[5]
-        y_max = ul_split[6]
-
-        lower_right = lower_right.replace('(', '').replace(')', '').replace(',', '')
-        lr_split = lower_right.split(' ')
-        x_max = lr_split[4]
-        y_min = lr_split[5]
-
-        # ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
-        x_min = int(float(x_min) // 1000 * 1000)
-        y_min = int(float(y_min) // 1000 * 1000)
-
-        x_max = int((float(x_max) // 1000 + 1) * 1000)
-        y_max = int((float(y_max) // 1000 + 1) * 1000)
-    else:
-        coords = str(bbox).split(' ')
-        x_min = coords[0]
-        y_min = coords[1]
-        x_max = coords[2]
-        y_max = coords[3]
+    # on recupere l'emprise de l'export dont on veut extraire xmin, xmax, ymin, ymax
+    info = gdal.Info(path_out + '_32bits_tmp.vrt')
+    info_list = info.split('\n')
+
+    upper_left, lower_right = '', ''
+    for line in info_list:
+        if 'Upper Left' in line:
+            upper_left = line
+        elif 'Lower Right' in line:
+            lower_right = line
+
+    upper_left = upper_left.replace('(', '').replace(')', '').replace(',', '')
+    ul_split = upper_left.split(' ')
+    x_min = ul_split[5]
+    y_max = ul_split[6]
+
+    lower_right = lower_right.replace('(', '').replace(')', '').replace(',', '')
+    lr_split = lower_right.split(' ')
+    x_max = lr_split[4]
+    y_min = lr_split[5]
+
+    # ces valeurs vont servir a gerer l'ensemble des gdalbuildvrt sur le chantier
+    x_min = int(float(x_min) // 1000 * 1000)
+    y_min = int(float(y_min) // 1000 * 1000)
+
+    x_max = int((float(x_max) // 1000 + 1) * 1000)
+    y_max = int((float(y_max) // 1000 + 1) * 1000)
 
     tile_size = int(resol * tilesize)
 

From 2214659255a3ff97640b497d5c7ce8c4a8567bab Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Mon, 13 Feb 2023 11:11:44 +0100
Subject: [PATCH 6/7] tests(export): update pylint and flake in coveralls

---
 .github/workflows/lint-python.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/lint-python.yml b/.github/workflows/lint-python.yml
index 7cf0f30a6..8f7a9128c 100644
--- a/.github/workflows/lint-python.yml
+++ b/.github/workflows/lint-python.yml
@@ -21,10 +21,10 @@ jobs:
 
     - name: Analysing the code with pylint
       run: |
-        pylint scripts/update_cache.py scripts/create_cache.py scripts/cache.py scripts/cache_def.py scripts/export_mtd.py scripts/prep_vectorise_graph.py scripts/vectorise_graph.py scripts/create_qgis_view.py scripts/process_requests.py scripts/process_qlayers.py
+        pylint scripts/update_cache.py scripts/create_cache.py scripts/cache.py scripts/cache_def.py scripts/export_mtd.py scripts/export_graph/prep_vectorise_graph.py scripts/export_graph/vectorise_graph.py scripts/export_graph/export_graph.py scripts/create_qgis_view.py scripts/process_requests.py scripts/process_qlayers.py
 
       continue-on-error: true
 
     - name: Analysing the code with flake8
       run: |
-        flake8  --max-line-length 100 scripts/update_cache.py scripts/create_cache.py scripts/cache.py scripts/cache_def.py scripts/export_mtd.py scripts/prep_vectorise_graph.py scripts/vectorise_graph.py scripts/create_qgis_view.py scripts/process_requests.py scripts/process_qlayers.py
+        flake8  --max-line-length 100 scripts/update_cache.py scripts/create_cache.py scripts/cache.py scripts/cache_def.py scripts/export_mtd.py scripts/export_graph/prep_vectorise_graph.py scripts/export_graph/vectorise_graph.py scripts/export_graph/export_graph.py scripts/create_qgis_view.py scripts/process_requests.py scripts/process_qlayers.py

From aedb1dde875e3d48bebb8099ca54762716e3cd11 Mon Sep 17 00:00:00 2001
From: Nathan Lenglet <nathan.lenglet@ign.fr>
Date: Thu, 9 Feb 2023 14:58:52 +0100
Subject: [PATCH 7/7] docs(export): update doc for new version of export_graph

docs(export): update doc for export
---
 README.md | 76 +++++++++++++------------------------------------------
 1 file changed, 17 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index bd0148cb6..35af8cbe5 100644
--- a/README.md
+++ b/README.md
@@ -492,87 +492,45 @@ gdal_translate "WMTS:http://[serveur]:[port]/[idBranch]/wmts?SERVICE=WMTS&REQUES
 gdal_translate -of Jpeg ortho.xml ortho.jpg
 ````
 
-
 ## Export d'un graphe vecteur à partir d'un cache
 
-
-Le traitement permettant d'exporter un graphe vecteur à partir d'un cache PackO est divisé en deux parties :
-- une première partie pour créer les paramétrages pour les traitements qui sont parallélisables (fichier JSON compatible avec le service gpao de l'IGN) : ***prep_vectorise_graph.py***
-- une deuxième partie qui permet d'exécuter tous les traitements parallélisables via le service gpao de l'IGN : ***vectorise_graph.py***
-
-Pour le bon fonctionnement du script *prep_vectorise_graph.py*, il est impératif de mettre la variable d'environnement **GDAL_VRT_ENABLE_PYTHON** à **YES** avant de le lancer.
+Le traitement permettant d'exporter un graphe vecteur à partir d'un cache PackO est le script *export_graph.py*.
+Ce script va générer un fichier json utilisable par le service de gpao de l'IGN.
+Pour le bon fonctionnement de ce script, il est impératif de mettre la variable d'environnement **GDAL_VRT_ENABLE_PYTHON** à **YES** avant de le lancer.
 
 ````
-usage: prep_vectorise_graph.py [-h] -i INPUT -o OUTPUT [-b BRANCH] -p PATCHES [-t TILESIZE] [-v VERBOSE]
+usage: export_graph.py [-h] -c CACHE -o OUTPUT -b BRANCH [-u URL] [-t TILESIZE] [--bbox BBOX BBOX BBOX BBOX] [--shapefile SHAPEFILE] [-v VERBOSE]
 
 optional arguments:
   -h, --help            show this help message and exit
-  -i INPUT, --input INPUT
-                        input cache folder
+  -c CACHE, --cache CACHE
+                        path of input cache
   -o OUTPUT, --output OUTPUT
                         output folder
   -b BRANCH, --branch BRANCH
-                        id of branch of cache to use as source for patches (default: None)
-  -p PATCHES, --patches PATCHES
-                        file containing patches on the branch to export
+                        id of branch of cache to use as source for patches
+  -u URL, --url URL     http://[serveur]:[port] (default: http://localhost:8081)
   -t TILESIZE, --tilesize TILESIZE
-                        tile size (in pixels) for vectorising graph tiles (default: 100000)
+                        tile size (in pixels) for vectorising graph tiles (default: 5000)
+  --bbox BBOX BBOX BBOX BBOX
+                        bbox for export (in meters), xmin ymin xmax ymax
+  --shapefile SHAPEFILE
+                        filepath of shapefile containing extent of export
   -v VERBOSE, --verbose VERBOSE
                         verbose (default: 0)
 ````
 
-La variable "-b" est optionnelle. Si elle n'est pas donnée, alors elle prend la valeur de la branche du fichier json d'export de retouches dans le cas où des retouches ont été effectuées, sinon le calcul se fait sur le graphe initial.
-
-A l'heure actuelle, il faut utiliser des chemins absolus pour que le script fonctionne correctement.
-
-Il est nécessaire de recourir à l'API pour pouvoir renseigner deux de ces informations :
-- l'id de la branche à partir de laquelle on souhaite exporter le graphe vecteur.
-- et le résultat de la route GET /{idBranch}/patches sur celle-ci (au format json).
+Les chemins donnés en paramètre doivent être absolus.
+Il est nécessaire d'utiliser l'API pour récupérer l'id de la branche à partir de laquelle on souhaite exporter le graphe.
 
-Le résultat du script *prep_vectorise_graph.py* est un dossier contenant des fichiers vrt temporaires et un sous-dossier (./tiles) avec les dalles de graphe nécessaires pour le script *vectorise_graph.py*.
-
-Le script vectorise_graph.py crée un fichier json, utilisable avec le service gpao de l'IGN, pour créer deux chantiers : le premier (chantier_polygonize), le deuxième (chantier_merge) qui dépend du premier.
+Les options *bbox* et *shapefile* ne peuvent pas être utilisées simultanément.
 
 Sous Windows, l'environnement recommandé pour avoir accès aux scripts Gdal et Gdal/Ogr est par le moyen de QGis (qui contient une version de Gdal supérieure ou égale à la version minimale demandée, voir plus haut).
 Il faut initialiser l'environnement QGis via le script qui est à l'emplacement : **{QGis_DIR}\bin\o4w_env.bat**
 Pour exécuter *vectorise_graph.py* sous Windows, il est nécessaire d'avoir configuré la variable d'environnement OSGEO4W_ROOT qui doit pointer vers la racine de QGis.
 Il est également nécessaire d'ajouter dans le PATH les emplacements des exécutables et scripts utilisant Gdal et Gdal/Ogr de QGis : *%OSGEO4W_ROOT%\bin* ainsi que *%OSGEO4W_ROOT%\apps\Python\*\Scripts*. * étant la version de Python embarqué par QGis.
 
-````
-usage: vectorise_graph.py [-h] -i INPUT -o OUTPUT [-g GRAPH] [-v VERBOSE]
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -i INPUT, --input INPUT
-                        input data folder (created by prep_vectorise_graph.py)
-  -o OUTPUT, --output OUTPUT
-                        output json gpao filepath
-  -g GRAPH, --graph GRAPH
-                        output vectorised graph pathfile (default: OUTPUT.gpkg)
-  -v VERBOSE, --verbose VERBOSE
-                        verbose (default: 0)
-````
-
-Le résultat final du calcul gpao de vectorisation, GRAPH_final.gpkg, est au format GeoPackage.
-
-### Récupération des métadonnées dans le graphe
-
-Le script export_mtd.py crée un fichier json, utilisable avec le service gpao de l'IGN pour ajouter les métadonnées au graphe vecteur exporté précédemment. L'environnement nécessaire à son exécution est le même que pour les deux premiers scripts.
-
-````
-usage: export_mtd.py [-h] -g GRAPH -c CACHE -o OUTPUT [-v VERBOSE]
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -g GRAPH, --graph GRAPH
-                        input graph
-  -c CACHE, --cache CACHE
-                        cache associated with the graph
-  -o OUTPUT, --output OUTPUT
-                        output json gpao filepath
-  -v VERBOSE, --verbose VERBOSE
-                        verbose (default: 0)
-````
+Le résultat final du calcul gpao de vectorisation, GRAPH_mtd.gpkg, est au format GeoPackage.
 
 ## Raccourcis clavier