From 13b8ccba92b155c6b44e923aef67b6a2cbb847d2 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 8 Aug 2025 13:39:11 +0200 Subject: [PATCH 01/24] Initial dirty version of push v2 --- mergin/client.py | 23 ++- mergin/client_push.py | 396 +++++++++++++++++++++++++++++------------- scripts/inv.py | 14 ++ scripts/push_file.py | 35 ++++ scripts/sync.py | 38 ++++ 5 files changed, 382 insertions(+), 124 deletions(-) create mode 100644 scripts/inv.py create mode 100644 scripts/push_file.py create mode 100644 scripts/sync.py diff --git a/mergin/client.py b/mergin/client.py index 625ddd7b..77ce4bc3 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -40,7 +40,7 @@ download_diffs_finalize, ) from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import push_project_async, push_project_wait, push_project_finalize +from .client_push import push_project_async, push_project_wait, push_project_finalize, UploadChunksCache from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -106,6 +106,8 @@ def __init__( self._user_info = None self._server_type = None self._server_version = None + self._feature_flags = {} + self.upload_chunks_cache = UploadChunksCache() self.client_version = "Python-client/" + __version__ if plugin_version is not None: # this could be e.g. "Plugin/2020.1 QGIS/3.14" self.client_version += " " + plugin_version @@ -362,8 +364,7 @@ def server_type(self): """ if not self._server_type: try: - resp = self.get("/config", validate_auth=False) - config = json.load(resp) + config = self.server_config() if config["server_type"] == "ce": self._server_type = ServerType.CE elif config["server_type"] == "ee": @@ -384,14 +385,26 @@ def server_version(self): """ if self._server_version is None: try: - resp = self.get("/config", validate_auth=False) - config = json.load(resp) + config = self.server_config() self._server_version = config["version"] except (ClientError, KeyError): self._server_version = "" return self._server_version + def server_features(self): + """ + Returns feature flags of the server. + """ + if self._feature_flags: + return self._feature_flags + config = self.server_config() + return { + "v2_push_enabled": config.get("v2_push_enabled", False) + and is_version_acceptable(self.server_version(), "2025.6.2"), + "v2_pull_enabled": config.get("v2_pull_enabled", False), + } + def workspaces_list(self): """ Find all available workspaces diff --git a/mergin/client_push.py b/mergin/client_push.py index 885db9ac..a17c8646 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -15,17 +15,110 @@ import tempfile import concurrent.futures import os +import math +from typing import List, Tuple, Optional +import uuid from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject from .editor import filter_changes +class UploadChunksCache: + """A cache for uploaded chunks to avoid re-uploading them, using checksum as key.""" + + def __init__(self): + self.cache = {} + + def add(self, checksum, chunk_id): + self.cache[checksum] = chunk_id + + def get_chunk_id(self, checksum): + """Get chunk_id by checksum, or None if not present.""" + return self.cache.get(checksum) + + +class UploadQueueItem: + """A single chunk of data that needs to be uploaded""" + + def __init__(self, mc, mp, file_path, size, file_checksum, chunk_id, chunk_index): + self.mc = mc # MerginClient instance, set when uploading + self.mp: MerginProject = mp # MerginProject instance, set when uploading + self.file_path = file_path # full path to the file + self.size = size # size of the chunk in bytes + self.file_checksum = file_checksum # checksum of the file, set when uploading + self.chunk_id = chunk_id # ID of the chunk within transaction + self.chunk_index = chunk_index # index (starting from zero) of the chunk within the file + self.transaction_id = "" # ID of the transaction, assigned by the server when starting the upload + + self._request_headers = {"Content-Type": "application/octet-stream"} + + def upload_chunk(self, data: bytes, checksum: str): + """ + Uploads the chunk to the server. + """ + resp = self.mc.post( + f"/v1/project/push/chunk/{self.transaction_id}/{self.chunk_id}", + data, + self._request_headers, + ) + resp_dict = json.load(resp) + + if not (resp_dict["size"] == len(data) and resp_dict["checksum"] == checksum): + try: + self.mc.post("/v1/project/push/cancel/{}".format(self.transaction_id)) + except ClientError: + pass + raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) + + def upload_chunk_v2_api(self, data: bytes, checksum: str): + """ + Uploads the chunk to the server. + :param mc: MerginClient instance + """ + # try to lookup the chunk in the cache, if yes use it. + cached_chunk_id = self.mc.upload_chunks_cache.get_chunk_id(checksum) + if cached_chunk_id: + self.chunk_id = cached_chunk_id + self.mp.log.debug(f"Chunk {self.chunk_id} already uploaded, skipping upload") + return + project_id = self.mp.project_id() + resp = self.mc.post( + f"/v2/projects/{project_id}/chunks", + data, + self._request_headers, + ) + resp_dict = json.load(resp) + self.chunk_id = resp_dict.get("id") + self.mp.log.debug(f"Upload chunk finished: {self.file_path}") + + def upload_blocking(self): + with open(self.file_path, "rb") as file_handle: + file_handle.seek(self.chunk_index * UPLOAD_CHUNK_SIZE) + data = file_handle.read(UPLOAD_CHUNK_SIZE) + checksum = hashlib.sha1() + checksum.update(data) + checksum_str = checksum.hexdigest() + + self.mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") + + if self.mc.server_features().get("v2_push_enabled"): + # use v2 API for uploading chunks + self.upload_chunk_v2_api(data, checksum_str) + else: + # use v1 API for uploading chunks + self.upload_chunk(data, checksum_str) + self.mc.upload_chunks_cache.add(checksum_str, self.chunk_id) + + self.mp.log.debug(f"Upload chunk finished: {self.file_path}") + + class UploadJob: """Keeps all the important data about a pending upload job""" - def __init__(self, project_path, changes, transaction_id, mp, mc, tmp_dir): - self.project_path = project_path # full project name ("username/projectname") + def __init__(self, version: str, changes: dict, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir): + # self.project_path = project_path # full project name ("username/projectname") + self.version = version self.changes = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server self.total_size = 0 # size of data to upload (in bytes) @@ -45,44 +138,138 @@ def dump(self): print("- {} {} {}".format(item.file_path, item.chunk_index, item.size)) print("--- END ---") + def __upload_item(self, item: UploadQueueItem): + """Upload a single item in the background""" + if self.is_cancelled: + return -class UploadQueueItem: - """A single chunk of data that needs to be uploaded""" + item.upload_blocking() + self.transferred_size += item.size - def __init__(self, file_path, size, transaction_id, chunk_id, chunk_index): - self.file_path = file_path # full path to the file - self.size = size # size of the chunk in bytes - self.chunk_id = chunk_id # ID of the chunk within transaction - self.chunk_index = chunk_index # index (starting from zero) of the chunk within the file - self.transaction_id = transaction_id # ID of the transaction + def submit_item_to_thread(self, item: UploadQueueItem): + """Upload a single item in the background""" + feature = self.executor.submit(self.__upload_item, item) + self.futures.append(feature) - def upload_blocking(self, mc, mp): - with open(self.file_path, "rb") as file_handle: - file_handle.seek(self.chunk_index * UPLOAD_CHUNK_SIZE) - data = file_handle.read(UPLOAD_CHUNK_SIZE) + def add_items(self, items: List[UploadQueueItem], total_size: int): + """Add multiple chunks to the upload queue""" + self.total_size = total_size + self.upload_queue_items = items - checksum = hashlib.sha1() - checksum.update(data) + self.mp.log.info(f"will upload {len(items)} items with total size {total_size}") + + # start uploads in background + self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + for item in items: + item.transaction_id = self.transaction_id # ensure transaction ID is set + self.submit_item_to_thread(item) - mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") - headers = {"Content-Type": "application/octet-stream"} - resp = mc.post( - "/v1/project/push/chunk/{}/{}".format(self.transaction_id, self.chunk_id), - data, - headers, +def create_upload_chunks(mc, mp: MerginProject, files: dict) -> Tuple[List[UploadQueueItem], int]: + """ + Create a list of UploadQueueItem objects from the changes dictionary and calculate total size of files. + This is used to prepare the upload queue for the upload job. + """ + upload_queue_items = [] + total_size = 0 + # prepare file chunks for upload + for file in files: + file_checksum = file.get("checksum") + if "diff" in file: + # versioned file - uploading diff + file_location = mp.fpath_meta(file["diff"]["path"]) + file_size = file["diff"]["size"] + elif "upload_file" in file: + # versioned file - uploading full (a temporary copy) + file_location = file["upload_file"] + file_size = file["size"] + else: + # non-versioned file + file_location = mp.fpath(file["path"]) + file_size = file["size"] + + for chunk_index, chunk_id in enumerate(file["chunks"]): + size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) + upload_queue_items.append( + UploadQueueItem(mc, mp, file_location, size, file_checksum, chunk_id, chunk_index) ) - resp_dict = json.load(resp) - mp.log.debug(f"Upload finished: {self.file_path}") - if not (resp_dict["size"] == len(data) and resp_dict["checksum"] == checksum.hexdigest()): - try: - mc.post("/v1/project/push/cancel/{}".format(self.transaction_id)) - except ClientError: - pass - raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) + total_size += file_size + + return upload_queue_items, total_size + + +def create_upload_job(mc, mp: MerginProject, changes: dict, tmp_dir: tempfile.TemporaryDirectory): + project_path = mp.project_full_name() + local_version = mp.version() + + data = {"version": local_version, "changes": changes} + try: + resp = mc.post( + f"/v1/project/push/{project_path}", + data, + {"Content-Type": "application/json"}, + ) + except ClientError as err: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") + raise + server_resp = json.load(resp) + + upload_files = data["changes"]["added"] + data["changes"]["updated"] + + transaction_id = server_resp["transaction"] if upload_files else None + job = UploadJob(local_version, changes, transaction_id, mp, mc, tmp_dir) + + if not upload_files: + mp.log.info("not uploading any files") + job.server_resp = server_resp + push_project_finalize(job) + return None # all done - no pending job + + mp.log.info(f"got transaction ID {transaction_id}") + + # prepare file chunks for upload + upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_files) + job.add_items(upload_queue_items, total_size) + return job + + +def create_upload_job_v2_api(mc, mp: MerginProject, changes: dict, tmp_dir: tempfile.TemporaryDirectory): + project_id = mp.project_id() + local_version = mp.version() + + data = {"version": local_version, "changes": changes} + try: + resp = mc.post( + f"/v2/projects/{project_id}/versions", + {**data, "check_only": True}, + {"Content-Type": "application/json"}, + ) + except ClientError as err: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") + raise + + upload_files = data["changes"]["added"] + data["changes"]["updated"] + + job = UploadJob(local_version, changes, None, mp, mc, tmp_dir) -def push_project_async(mc, directory): + if not upload_files: + mp.log.info("not uploading any files") + job.server_resp = resp + push_project_finalize(job) + return None # all done - no pending job + + mp.log.info(f"Starting upload chunks for project {project_id}") + + # prepare file chunks for upload + upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_files) + job.add_items(upload_queue_items, total_size) + return job + + +def push_project_async(mc, directory) -> Optional[UploadJob]: """Starts push of a project and returns pending upload job""" mp = MerginProject(directory) @@ -146,67 +333,14 @@ def push_project_async(mc, directory): # drop internal info from being sent to server for item in changes["updated"]: item.pop("origin_checksum", None) - data = {"version": local_version, "changes": changes} - - try: - resp = mc.post( - f"/v1/project/push/{project_path}", - data, - {"Content-Type": "application/json"}, - ) - except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - server_resp = json.load(resp) - - upload_files = data["changes"]["added"] + data["changes"]["updated"] - - transaction_id = server_resp["transaction"] if upload_files else None - job = UploadJob(project_path, changes, transaction_id, mp, mc, tmp_dir) - - if not upload_files: - mp.log.info("not uploading any files") - job.server_resp = server_resp - push_project_finalize(job) - return None # all done - no pending job - - mp.log.info(f"got transaction ID {transaction_id}") - - upload_queue_items = [] - total_size = 0 - # prepare file chunks for upload - for file in upload_files: - if "diff" in file: - # versioned file - uploading diff - file_location = mp.fpath_meta(file["diff"]["path"]) - file_size = file["diff"]["size"] - elif "upload_file" in file: - # versioned file - uploading full (a temporary copy) - file_location = file["upload_file"] - file_size = file["size"] - else: - # non-versioned file - file_location = mp.fpath(file["path"]) - file_size = file["size"] - - for chunk_index, chunk_id in enumerate(file["chunks"]): - size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) - upload_queue_items.append(UploadQueueItem(file_location, size, transaction_id, chunk_id, chunk_index)) - - total_size += file_size - - job.total_size = total_size - job.upload_queue_items = upload_queue_items - - mp.log.info(f"will upload {len(upload_queue_items)} items with total size {total_size}") - - # start uploads in background - job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - for item in upload_queue_items: - future = job.executor.submit(_do_upload, item, job) - job.futures.append(future) - + server_feaure_flags = mc.server_features() + job = None + if server_feaure_flags.get("v2_push_enabled"): + # v2 push uses a different endpoint + job = create_upload_job_v2_api(mc, mp, changes, tmp_dir) + else: + # v1 push uses the old endpoint + job = create_upload_job(mc, mp, changes, tmp_dir) return job @@ -233,7 +367,7 @@ def push_project_is_running(job): return False -def push_project_finalize(job): +def push_project_finalize(job: UploadJob): """ To be called when push in the background is finished and we need to do the finalization @@ -244,6 +378,7 @@ def push_project_finalize(job): """ with_upload_of_files = job.executor is not None + server_features = job.mc.server_features() if with_upload_of_files: job.executor.shutdown(wait=True) @@ -262,25 +397,57 @@ def push_project_finalize(job): job.mp.log.error("--- push finish failed! " + error_msg) raise ClientError("Upload error: " + error_msg) + # map chunk ids to file checksums + change_keys = ["added", "updated"] + for key in change_keys: + if (key not in job.changes): + continue + uploaded_chunks = [] + for change in job.changes.get(key, []): + change["chunks"] = [ + upload.chunk_id for upload in job.upload_queue_items if upload.file_checksum == change.get("checksum") + ] + uploaded_chunks.append(change) + print(change) + job.changes[key] = uploaded_chunks + if with_upload_of_files: - try: - job.mp.log.info(f"Finishing transaction {job.transaction_id}") - resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) - job.server_resp = json.load(resp) - except ClientError as err: - # server returns various error messages with filename or something generic - # it would be better if it returned list of failed files (and reasons) whenever possible - job.mp.log.error("--- push finish failed! " + str(err)) - - # if push finish fails, the transaction is not killed, so we - # need to cancel it so it does not block further uploads - job.mp.log.info("canceling the pending transaction...") + if server_features.get("v2_push_enabled"): + # v2 push uses a different endpoint + try: + job.mp.log.info(f"Finishing transaction for project {job.mp.project_full_name()}") + job.mc.post( + f"/v2/projects/{job.mp.project_id()}/versions", + data={ + "version": job.version, + "changes": job.changes, + }, + headers={"Content-Type": "application/json"}, + ) + project_info = job.mc.project_info(job.mp.project_id()) + job.server_resp = project_info + except ClientError as err: + job.mp.log.error("--- push finish failed! " + str(err)) + raise err + else: try: - resp_cancel = job.mc.post("/v1/project/push/cancel/%s" % job.transaction_id) - job.mp.log.info("cancel response: " + resp_cancel.msg) - except ClientError as err2: - job.mp.log.info("cancel response: " + str(err2)) - raise err + job.mp.log.info(f"Finishing transaction {job.transaction_id}") + resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) + job.server_resp = json.load(resp) + except ClientError as err: + # server returns various error messages with filename or something generic + # it would be better if it returned list of failed files (and reasons) whenever possible + job.mp.log.error("--- push finish failed! " + str(err)) + + # if push finish fails, the transaction is not killed, so we + # need to cancel it so it does not block further uploads + job.mp.log.info("canceling the pending transaction...") + try: + resp_cancel = job.mc.post("/v1/project/push/cancel/%s" % job.transaction_id) + job.mp.log.info("cancel response: " + resp_cancel.msg) + except ClientError as err2: + job.mp.log.info("cancel response: " + str(err2)) + raise err job.mp.update_metadata(job.server_resp) try: @@ -317,15 +484,6 @@ def push_project_cancel(job): job.mp.log.info("--- push cancel response: " + str(job.server_resp)) -def _do_upload(item, job): - """runs in worker thread""" - if job.is_cancelled: - return - - item.upload_blocking(job.mc, job.mp) - job.transferred_size += item.size - - def remove_diff_files(job) -> None: """Looks for diff files in the job and removes them.""" diff --git a/scripts/inv.py b/scripts/inv.py new file mode 100644 index 00000000..eeb58cca --- /dev/null +++ b/scripts/inv.py @@ -0,0 +1,14 @@ +import mergin + +LOGIN="r22overviews" +PASSWORD="MerginOverviews123" +URL="https://app.dev.merginmaps.com" +WS_ID = 10173 + +client = mergin.MerginClient( + url=URL, + login=LOGIN, + password=PASSWORD +) + +client.create_invitation(email="marcel.kocisek@lutraconsulting.co.uk", workspace_id=WS_ID, workspace_role=mergin.common.WorkspaceRole.EDITOR) \ No newline at end of file diff --git a/scripts/push_file.py b/scripts/push_file.py new file mode 100644 index 00000000..3189b6b1 --- /dev/null +++ b/scripts/push_file.py @@ -0,0 +1,35 @@ +import mergin +import os +import random +import string + +LOGIN="admin3" +PASSWORD="topsecret" +URL="http://localhost:8080" + +client = mergin.MerginClient( + url=URL, + login=LOGIN, + password=PASSWORD +) + +PROJECT="mergin/hejj6" +LOCAL_FOLDER=f"/tmp/project{random.choices(string.ascii_letters + string.digits, k=10)}" +client.download_project(PROJECT, LOCAL_FOLDER) + +# create dummy random named file to LOCAL_FOLDER +def create_dummy_file(folder, filename): + file_path = os.path.join(folder, filename) + with open(file_path, 'w') as f: + f.write('This is a dummy file.\n' * (10 * (1024 * 1024))) + return file_path + +dummy_filename = ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '.txt' +dummy_file_path = create_dummy_file(LOCAL_FOLDER, dummy_filename) +print(f"Dummy file created at: {dummy_file_path}") + +print(client.upload_chunks_cache.cache) + +client.push_project(LOCAL_FOLDER) + +print(client.upload_chunks_cache.cache) diff --git a/scripts/sync.py b/scripts/sync.py new file mode 100644 index 00000000..32660463 --- /dev/null +++ b/scripts/sync.py @@ -0,0 +1,38 @@ +LOGIN="admin3" +PASSWORD="topsecret" +URL="http://localhost:8080" + + +import pandas as pd +import geopandas as gpd +import os +import mergin + +client = mergin.MerginClient( + url=URL, + login=LOGIN, + password=PASSWORD +) + +PROJECT="mergin/vienna" + + +LOCAL_FOLDER="/tmp/project3" +# os.rmdir(LOCAL_FOLDER) + +client.download_project(PROJECT, LOCAL_FOLDER) + +# # Get the data from the CSV (use just sample of data) +# csv_file = os.path.join(LOCAL_FOLDER, "vienna_trees_gansehauffel.csv") +# csv_df = pd.read_csv(csv_file, nrows=20, dtype={"health": str}) +# # Convert geometry in WKT format to GeoDataFrame +# gdf = gpd.GeoDataFrame(csv_df, geometry=gpd.GeoSeries.from_wkt(csv_df.geometry)) +# print(gdf.head()) +# # Save the GeoDataFrame to a Geopackage +# gdf.to_file( +# os.path.join(LOCAL_FOLDER, "Ready_to_survey_trees.gpkg"), +# layer="Ready_to_survey_trees", driver="GPKG", +# ) + +# print (client.upload_chunks_cache.cache) +# client.push_project(LOCAL_FOLDER) \ No newline at end of file From 216c3bde7399e91006e7e7d4a005fd2cd86f434e Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 8 Aug 2025 15:15:56 +0200 Subject: [PATCH 02/24] tweak _do_upload running outside of class to prevent threads mix --- mergin/client.py | 11 ++++++----- mergin/client_push.py | 19 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 77ce4bc3..a5c26b89 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -106,7 +106,7 @@ def __init__( self._user_info = None self._server_type = None self._server_version = None - self._feature_flags = {} + self._server_features = {} self.upload_chunks_cache = UploadChunksCache() self.client_version = "Python-client/" + __version__ if plugin_version is not None: # this could be e.g. "Plugin/2020.1 QGIS/3.14" @@ -396,14 +396,15 @@ def server_features(self): """ Returns feature flags of the server. """ - if self._feature_flags: - return self._feature_flags + if self._server_features: + return self._server_features config = self.server_config() - return { + self._server_features = { "v2_push_enabled": config.get("v2_push_enabled", False) - and is_version_acceptable(self.server_version(), "2025.6.2"), + and is_version_acceptable(self.server_version(), "2025.6.1"), "v2_pull_enabled": config.get("v2_pull_enabled", False), } + return self._server_features def workspaces_list(self): """ diff --git a/mergin/client_push.py b/mergin/client_push.py index a17c8646..578cef2e 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -23,6 +23,14 @@ from .merginproject import MerginProject from .editor import filter_changes +def _do_upload(item, job): + """runs in worker thread, have to be defined here to avoid worker threads repeating this function""" + if job.is_cancelled: + return + + item.upload_blocking() + job.transferred_size += item.size + class UploadChunksCache: """A cache for uploaded chunks to avoid re-uploading them, using checksum as key.""" @@ -101,14 +109,12 @@ def upload_blocking(self): checksum_str = checksum.hexdigest() self.mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") - if self.mc.server_features().get("v2_push_enabled"): # use v2 API for uploading chunks self.upload_chunk_v2_api(data, checksum_str) else: # use v1 API for uploading chunks self.upload_chunk(data, checksum_str) - self.mc.upload_chunks_cache.add(checksum_str, self.chunk_id) self.mp.log.debug(f"Upload chunk finished: {self.file_path}") @@ -138,17 +144,10 @@ def dump(self): print("- {} {} {}".format(item.file_path, item.chunk_index, item.size)) print("--- END ---") - def __upload_item(self, item: UploadQueueItem): - """Upload a single item in the background""" - if self.is_cancelled: - return - - item.upload_blocking() - self.transferred_size += item.size def submit_item_to_thread(self, item: UploadQueueItem): """Upload a single item in the background""" - feature = self.executor.submit(self.__upload_item, item) + feature = self.executor.submit(_do_upload, item, self) self.futures.append(feature) def add_items(self, items: List[UploadQueueItem], total_size: int): From 57c6bf4cee820882927668ce7a54af2fae619c7e Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 14 Aug 2025 11:35:08 +0200 Subject: [PATCH 03/24] add local changes - attempts of failing chunks =- init for sync_project --- mergin/client.py | 34 +++- mergin/client_push.py | 274 +++++++++++++++++------------- mergin/local_changes.py | 83 +++++++++ mergin/merginproject.py | 2 +- mergin/test/test_local_changes.py | 113 ++++++++++++ scripts/push_file.py | 5 +- 6 files changed, 385 insertions(+), 126 deletions(-) create mode 100644 mergin/local_changes.py create mode 100644 mergin/test/test_local_changes.py diff --git a/mergin/client.py b/mergin/client.py index a5c26b89..0ff26caf 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -3,6 +3,7 @@ import os import json import shutil +import time import zlib import base64 import urllib.parse @@ -400,8 +401,7 @@ def server_features(self): return self._server_features config = self.server_config() self._server_features = { - "v2_push_enabled": config.get("v2_push_enabled", False) - and is_version_acceptable(self.server_version(), "2025.6.1"), + "v2_push_enabled": config.get("v2_push_enabled", False), "v2_pull_enabled": config.get("v2_pull_enabled", False), } return self._server_features @@ -1485,3 +1485,33 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works params = {"email": email, "role": workspace_role.value} ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) + + def sync_project(self, project_dir): + """ + Syncs project by loop with these steps: + 1. Pull server version + 2. Get local changes + 3. Push first change batch + Repeat if there are more local changes. + The batch pushing makes use of the server ability to handle simultaneously exclusive upload (that blocks + other uploads) and non-exclusive upload (for adding assets) + """ + attempts = 2 + for attempt in range(attempts): + try: + pull_job = pull_project_async(self, project_dir) + if pull_job: + pull_project_wait(pull_job) + pull_project_finalize(pull_job) + + job = push_project_async(self, project_dir) + if job: + push_project_wait(job) + push_project_finalize(job) + break + except ClientError as e: + if e.http_error == 409 and attempt < attempts - 1: + # retry on conflict, e.g. when server has changes that we do not have yet + time.sleep(5) + continue + raise e diff --git a/mergin/client_push.py b/mergin/client_push.py index 578cef2e..e14830c0 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -9,20 +9,23 @@ To finish the upload job, we have to call push_project_finalize(job). """ +from dataclasses import asdict import json import hashlib import pprint import tempfile import concurrent.futures import os -import math +import time from typing import List, Tuple, Optional -import uuid + +from .local_changes import LocalChange, LocalChanges from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject from .editor import filter_changes + def _do_upload(item, job): """runs in worker thread, have to be defined here to avoid worker threads repeating this function""" if job.is_cancelled: @@ -45,17 +48,24 @@ def get_chunk_id(self, checksum): """Get chunk_id by checksum, or None if not present.""" return self.cache.get(checksum) + def clear(self): + """Clear the cache.""" + self.cache.clear() + class UploadQueueItem: """A single chunk of data that needs to be uploaded""" - def __init__(self, mc, mp, file_path, size, file_checksum, chunk_id, chunk_index): + def __init__( + self, mc, mp: MerginProject, file_path: str, size: int, file_checksum: str, chunk_id: str, chunk_index: int + ): self.mc = mc # MerginClient instance, set when uploading self.mp: MerginProject = mp # MerginProject instance, set when uploading self.file_path = file_path # full path to the file self.size = size # size of the chunk in bytes - self.file_checksum = file_checksum # checksum of the file, set when uploading + self.file_checksum = file_checksum # checksum of the file uploaded file self.chunk_id = chunk_id # ID of the chunk within transaction + self.server_chunk_id = None # ID of the chunk on the server, set when uploading self.chunk_index = chunk_index # index (starting from zero) of the chunk within the file self.transaction_id = "" # ID of the transaction, assigned by the server when starting the upload @@ -87,8 +97,8 @@ def upload_chunk_v2_api(self, data: bytes, checksum: str): # try to lookup the chunk in the cache, if yes use it. cached_chunk_id = self.mc.upload_chunks_cache.get_chunk_id(checksum) if cached_chunk_id: - self.chunk_id = cached_chunk_id - self.mp.log.debug(f"Chunk {self.chunk_id} already uploaded, skipping upload") + self.server_chunk_id = cached_chunk_id + self.mp.log.debug(f"Chunk {self.server_chunk_id} already uploaded, skipping upload") return project_id = self.mp.project_id() resp = self.mc.post( @@ -97,7 +107,8 @@ def upload_chunk_v2_api(self, data: bytes, checksum: str): self._request_headers, ) resp_dict = json.load(resp) - self.chunk_id = resp_dict.get("id") + self.server_chunk_id = resp_dict.get("id") + self.mc.upload_chunks_cache.add(checksum, self.server_chunk_id) self.mp.log.debug(f"Upload chunk finished: {self.file_path}") def upload_blocking(self): @@ -109,12 +120,22 @@ def upload_blocking(self): checksum_str = checksum.hexdigest() self.mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") - if self.mc.server_features().get("v2_push_enabled"): - # use v2 API for uploading chunks - self.upload_chunk_v2_api(data, checksum_str) - else: - # use v1 API for uploading chunks - self.upload_chunk(data, checksum_str) + attempts = 2 + for attempt in range(attempts): + try: + if self.mc.server_features().get("v2_push_enabled"): + # use v2 API for uploading chunks + self.upload_chunk_v2_api(data, checksum_str) + else: + # use v1 API for uploading chunks + self.upload_chunk(data, checksum_str) + break # exit loop if upload was successful + except ClientError as e: + if e.http_error == 409 and attempt < attempts - 1: + # retry on conflict, e.g. when server has changes that we do not have yet + time.sleep(5) + continue + raise self.mp.log.debug(f"Upload chunk finished: {self.file_path}") @@ -122,14 +143,16 @@ def upload_blocking(self): class UploadJob: """Keeps all the important data about a pending upload job""" - def __init__(self, version: str, changes: dict, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir): + def __init__( + self, version: str, changes: LocalChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir + ): # self.project_path = project_path # full project name ("username/projectname") self.version = version - self.changes = changes # dictionary of local changes to the project + self.changes: LocalChanges = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server self.total_size = 0 # size of data to upload (in bytes) self.transferred_size = 0 # size of data already uploaded (in bytes) - self.upload_queue_items = [] # list of items to upload in the background + self.upload_queue_items: List[UploadQueueItem] = [] # list of items to upload in the background self.mp = mp # MerginProject instance self.mc = mc # MerginClient instance self.tmp_dir = tmp_dir # TemporaryDirectory instance for any temp file we need @@ -144,7 +167,6 @@ def dump(self): print("- {} {} {}".format(item.file_path, item.chunk_index, item.size)) print("--- END ---") - def submit_item_to_thread(self, item: UploadQueueItem): """Upload a single item in the background""" feature = self.executor.submit(_do_upload, item, self) @@ -160,11 +182,16 @@ def add_items(self, items: List[UploadQueueItem], total_size: int): # start uploads in background self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) for item in items: - item.transaction_id = self.transaction_id # ensure transaction ID is set + if self.transaction_id: + item.transaction_id = self.transaction_id self.submit_item_to_thread(item) + def update_chunks_from_items(self): + """Update chunks in LocalChanges from the upload queue items.""" + self.changes.update_chunks([(item.file_checksum, item.server_chunk_id) for item in self.upload_queue_items]) + -def create_upload_chunks(mc, mp: MerginProject, files: dict) -> Tuple[List[UploadQueueItem], int]: +def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange]) -> Tuple[List[UploadQueueItem], int]: """ Create a list of UploadQueueItem objects from the changes dictionary and calculate total size of files. This is used to prepare the upload queue for the upload job. @@ -172,22 +199,23 @@ def create_upload_chunks(mc, mp: MerginProject, files: dict) -> Tuple[List[Uploa upload_queue_items = [] total_size = 0 # prepare file chunks for upload - for file in files: - file_checksum = file.get("checksum") - if "diff" in file: + for change in local_changes: + file_checksum = change.checksum + diff = change.get_diff() + if diff: # versioned file - uploading diff - file_location = mp.fpath_meta(file["diff"]["path"]) - file_size = file["diff"]["size"] - elif "upload_file" in file: + file_location = mp.fpath_meta(diff.path) + file_size = diff.size + elif change.upload_file: # versioned file - uploading full (a temporary copy) - file_location = file["upload_file"] - file_size = file["size"] + file_location = change.upload_file + file_size = change.size else: # non-versioned file - file_location = mp.fpath(file["path"]) - file_size = file["size"] + file_location = mp.fpath(change.path) + file_size = change.size - for chunk_index, chunk_id in enumerate(file["chunks"]): + for chunk_index, chunk_id in enumerate(change.chunks): size = min(UPLOAD_CHUNK_SIZE, file_size - chunk_index * UPLOAD_CHUNK_SIZE) upload_queue_items.append( UploadQueueItem(mc, mp, file_location, size, file_checksum, chunk_id, chunk_index) @@ -198,11 +226,16 @@ def create_upload_chunks(mc, mp: MerginProject, files: dict) -> Tuple[List[Uploa return upload_queue_items, total_size -def create_upload_job(mc, mp: MerginProject, changes: dict, tmp_dir: tempfile.TemporaryDirectory): +def create_upload_job( + mc, mp: MerginProject, changes: LocalChanges, tmp_dir: tempfile.TemporaryDirectory +) -> Optional[UploadJob]: + """ + Prepare transaction and create an upload job for the project using the v1 API. + """ project_path = mp.project_full_name() local_version = mp.version() - data = {"version": local_version, "changes": changes} + data = {"version": local_version, "changes": changes.get_server_request()} try: resp = mc.post( f"/v1/project/push/{project_path}", @@ -213,58 +246,66 @@ def create_upload_job(mc, mp: MerginProject, changes: dict, tmp_dir: tempfile.Te mp.log.error("Error starting transaction: " + str(err)) mp.log.info("--- push aborted") raise - server_resp = json.load(resp) - - upload_files = data["changes"]["added"] + data["changes"]["updated"] + push_start_resp = json.load(resp) - transaction_id = server_resp["transaction"] if upload_files else None + upload_changes = changes.get_upload_changes() + transaction_id = push_start_resp["transaction"] if upload_changes else None job = UploadJob(local_version, changes, transaction_id, mp, mc, tmp_dir) - if not upload_files: + if not upload_changes: mp.log.info("not uploading any files") - job.server_resp = server_resp + job.server_resp = push_start_resp push_project_finalize(job) - return None # all done - no pending job + return # all done - no pending job mp.log.info(f"got transaction ID {transaction_id}") # prepare file chunks for upload - upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_files) + upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_changes) job.add_items(upload_queue_items, total_size) return job -def create_upload_job_v2_api(mc, mp: MerginProject, changes: dict, tmp_dir: tempfile.TemporaryDirectory): +def create_upload_job_v2_api( + mc, mp: MerginProject, changes: LocalChanges, tmp_dir: tempfile.TemporaryDirectory +) -> Optional[UploadJob]: + """ + Call checkonly endpoint to check compatibility of the project with the server + and prepare an upload job for the project using the v2 API. + """ project_id = mp.project_id() local_version = mp.version() - data = {"version": local_version, "changes": changes} + data = {"version": local_version, "changes": changes.get_server_request()} try: - resp = mc.post( + mc.post( f"/v2/projects/{project_id}/versions", {**data, "check_only": True}, {"Content-Type": "application/json"}, ) except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - - upload_files = data["changes"]["added"] + data["changes"]["updated"] + # ignore 409 error, it means that the project is already in a transaction + # but we still want to continue with the upload + if err.http_error not in [409]: + mp.log.error("Error doing push check compatibility: " + str(err)) + mp.log.error("--- push aborted") + raise + else: + mp.log.info("Project version exists or another upload is running, continuing with upload.") + upload_changes = changes.get_upload_changes() job = UploadJob(local_version, changes, None, mp, mc, tmp_dir) + upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_changes) + job.add_items(upload_queue_items, total_size) - if not upload_files: + if not upload_queue_items: mp.log.info("not uploading any files") - job.server_resp = resp push_project_finalize(job) return None # all done - no pending job mp.log.info(f"Starting upload chunks for project {project_id}") # prepare file chunks for upload - upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_files) - job.add_items(upload_queue_items, total_size) return job @@ -299,12 +340,13 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: mp.log.error(f"--- push {project_path} - username {username} does not have write access") raise ClientError(f"You do not seem to have write access to the project (username '{username}')") - if local_version != server_version: - mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") - raise ClientError( - "There is a new version of the project on the server. Please update your local copy." - + f"\n\nLocal version: {local_version}\nServer version: {server_version}" - ) + # DISCUSSION: do we want to check if the project is up to date before pushing? For now, we removed this part + # if local_version != server_version: + # mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") + # raise ClientError( + # "There is a new version of the project on the server. Please update your local copy." + # + f"\n\nLocal version: {local_version}\nServer version: {server_version}" + # ) changes = mp.get_push_changes() changes = filter_changes(mc, project_info, changes) @@ -329,17 +371,19 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: mp.log.info(f"--- push {project_path} - nothing to do") return - # drop internal info from being sent to server - for item in changes["updated"]: - item.pop("origin_checksum", None) server_feaure_flags = mc.server_features() job = None + local_changes = LocalChanges( + added=[LocalChange(**change) for change in changes["added"]], + updated=[LocalChange(**change) for change in changes["updated"]], + removed=[LocalChange(**change) for change in changes["removed"]], + ) if server_feaure_flags.get("v2_push_enabled"): # v2 push uses a different endpoint - job = create_upload_job_v2_api(mc, mp, changes, tmp_dir) + job = create_upload_job_v2_api(mc, mp, local_changes, tmp_dir) else: # v1 push uses the old endpoint - job = create_upload_job(mc, mp, changes, tmp_dir) + job = create_upload_job(mc, mp, local_changes, tmp_dir) return job @@ -349,7 +393,7 @@ def push_project_wait(job): concurrent.futures.wait(job.futures) -def push_project_is_running(job): +def push_project_is_running(job: UploadJob): """ Returns true/false depending on whether we have some pending uploads @@ -396,74 +440,62 @@ def push_project_finalize(job: UploadJob): job.mp.log.error("--- push finish failed! " + error_msg) raise ClientError("Upload error: " + error_msg) - # map chunk ids to file checksums - change_keys = ["added", "updated"] - for key in change_keys: - if (key not in job.changes): - continue - uploaded_chunks = [] - for change in job.changes.get(key, []): - change["chunks"] = [ - upload.chunk_id for upload in job.upload_queue_items if upload.file_checksum == change.get("checksum") - ] - uploaded_chunks.append(change) - print(change) - job.changes[key] = uploaded_chunks + job.update_chunks_from_items() - if with_upload_of_files: - if server_features.get("v2_push_enabled"): - # v2 push uses a different endpoint - try: - job.mp.log.info(f"Finishing transaction for project {job.mp.project_full_name()}") - job.mc.post( - f"/v2/projects/{job.mp.project_id()}/versions", - data={ - "version": job.version, - "changes": job.changes, - }, - headers={"Content-Type": "application/json"}, - ) - project_info = job.mc.project_info(job.mp.project_id()) - job.server_resp = project_info - except ClientError as err: - job.mp.log.error("--- push finish failed! " + str(err)) - raise err - else: + if server_features.get("v2_push_enabled"): + # v2 push uses a different endpoint + try: + job.mp.log.info(f"Finishing transaction for project {job.mp.project_full_name()}") + job.mc.post( + f"/v2/projects/{job.mp.project_id()}/versions", + data={ + "version": job.version, + "changes": job.changes.get_server_request(), + }, + headers={"Content-Type": "application/json"}, + ) + project_info = job.mc.project_info(job.mp.project_id()) + job.server_resp = project_info + except ClientError as err: + job.mp.log.error("--- push finish failed! " + str(err)) + raise err + elif with_upload_of_files: + try: + job.mp.log.info(f"Finishing transaction {job.transaction_id}") + resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) + job.server_resp = json.load(resp) + except ClientError as err: + # server returns various error messages with filename or something generic + # it would be better if it returned list of failed files (and reasons) whenever possible + job.mp.log.error("--- push finish failed! " + str(err)) + + # if push finish fails, the transaction is not killed, so we + # need to cancel it so it does not block further uploads + job.mp.log.info("canceling the pending transaction...") try: - job.mp.log.info(f"Finishing transaction {job.transaction_id}") - resp = job.mc.post("/v1/project/push/finish/%s" % job.transaction_id) - job.server_resp = json.load(resp) - except ClientError as err: - # server returns various error messages with filename or something generic - # it would be better if it returned list of failed files (and reasons) whenever possible - job.mp.log.error("--- push finish failed! " + str(err)) - - # if push finish fails, the transaction is not killed, so we - # need to cancel it so it does not block further uploads - job.mp.log.info("canceling the pending transaction...") - try: - resp_cancel = job.mc.post("/v1/project/push/cancel/%s" % job.transaction_id) - job.mp.log.info("cancel response: " + resp_cancel.msg) - except ClientError as err2: - job.mp.log.info("cancel response: " + str(err2)) - raise err + resp_cancel = job.mc.post("/v1/project/push/cancel/%s" % job.transaction_id) + job.mp.log.info("cancel response: " + resp_cancel.msg) + except ClientError as err2: + job.mp.log.info("cancel response: " + str(err2)) + raise err job.mp.update_metadata(job.server_resp) try: - job.mp.apply_push_changes(job.changes) + job.mp.apply_push_changes(asdict(job.changes)) except Exception as e: job.mp.log.error("Failed to apply push changes: " + str(e)) job.mp.log.info("--- push aborted") raise ClientError("Failed to apply push changes: " + str(e)) job.tmp_dir.cleanup() # delete our temporary dir and all its content + job.mc.upload_chunks_cache.clear() # clear the upload chunks cache remove_diff_files(job) job.mp.log.info("--- push finished - new project version " + job.server_resp["version"]) -def push_project_cancel(job): +def push_project_cancel(job: UploadJob): """ To be called (from main thread) to cancel a job that has uploads in progress. Returns once all background tasks have exited (may block for a bit of time). @@ -474,6 +506,9 @@ def push_project_cancel(job): job.is_cancelled = True job.executor.shutdown(wait=True) + if not job.transaction_id: + job.mp.log.info("--- push cancelled") + return try: resp_cancel = job.mc.post("/v1/project/push/cancel/%s" % job.transaction_id) job.server_resp = resp_cancel.msg @@ -483,11 +518,12 @@ def push_project_cancel(job): job.mp.log.info("--- push cancel response: " + str(job.server_resp)) -def remove_diff_files(job) -> None: +def remove_diff_files(job: UploadJob) -> None: """Looks for diff files in the job and removes them.""" - for change in job.changes["updated"]: - if "diff" in change.keys(): - diff_file = job.mp.fpath_meta(change["diff"]["path"]) + for change in job.changes.updated: + diff = change.get_diff() + if diff: + diff_file = job.mp.fpath_meta(diff.path) if os.path.exists(diff_file): os.remove(diff_file) diff --git a/mergin/local_changes.py b/mergin/local_changes.py new file mode 100644 index 00000000..35f3a290 --- /dev/null +++ b/mergin/local_changes.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass, field +from datetime import datetime +from typing import Dict, Optional, List, Tuple + +@dataclass +class BaseLocalChange: + path: str + checksum: str + size: int + mtime: datetime + +@dataclass +class LocalChange(BaseLocalChange): + origin_checksum: Optional[str] = None + chunks: List[str] = field(default_factory=list) + diff: dict = field(default_factory=dict) # Assuming diff is a dict with relevant data + upload_file: Optional[str] = None + + def get_diff(self) -> Optional[BaseLocalChange]: + if self.diff: + return BaseLocalChange( + path=self.diff.get("path", ""), + checksum=self.diff.get("checksum", ""), + size=self.diff.get("size", 0), + mtime=self.diff.get("mtime", datetime.now()) + ) + return None + + def get_server_data(self) -> dict: + result = { + "path": self.path, + "checksum": self.checksum, + "size": self.size, + "chunks": self.chunks, + } + if self.diff: + result["diff"] = { + "path": self.diff.get("path", ""), + "checksum": self.diff.get("checksum", ""), + "size": self.diff.get("size", 0) + } + return result + +@dataclass +class LocalChanges: + added: List[LocalChange] = field(default_factory=list) + updated: List[LocalChange] = field(default_factory=list) + removed: List[LocalChange] = field(default_factory=list) + + def get_server_request(self) -> dict: + return { + "added": [change.get_server_data() for change in self.added], + "updated": [change.get_server_data() for change in self.updated], + "removed": [change.get_server_data() for change in self.removed], + } + + def get_upload_changes(self) -> List[LocalChange]: + """ + Get all changes that need to be uploaded. + This includes added and updated files. + """ + return self.added + self.updated + + def update_chunks(self, chunks: List[Tuple[str, str]]) -> None: + """ + Map chunk ids to file checksums. + + This method updates the `chunks` attribute of each change in `added` and `updated` + lists based on the provided `chunks` list, which contains tuples of (checksum, chunk_id). + """ + for change in self.added: + change.chunks = [ + chunk[1] + for chunk in chunks + if chunk[0] == change.checksum + ] + + for change in self.updated: + change.chunks = [ + chunk[1] + for chunk in chunks + if chunk[0] == change.checksum + ] \ No newline at end of file diff --git a/mergin/merginproject.py b/mergin/merginproject.py index a82f5796..3d013bb8 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -34,7 +34,6 @@ except (ImportError, ModuleNotFoundError): import pygeodiff - class MerginProject: """Base class for Mergin Maps local projects. @@ -440,6 +439,7 @@ def get_push_changes(self): diff_id = str(uuid.uuid4()) diff_name = path + "-diff-" + diff_id diff_file = self.fpath_meta(diff_name) + print(f"Creating changeset for {path} - {diff_name}") try: self.geodiff.create_changeset(origin_file, current_file, diff_file) if self.geodiff.has_changes(diff_file): diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py new file mode 100644 index 00000000..4a57bab9 --- /dev/null +++ b/mergin/test/test_local_changes.py @@ -0,0 +1,113 @@ +from datetime import datetime + +from ..local_changes import LocalChange, LocalChanges + +def test_local_changes_from_dict(): + """Test generating LocalChanges from a dictionary.""" + changes_dict = { + "added": [ + {"path": "file1.txt", "checksum": "abc123", "size": 1024, "mtime": datetime.now()} + ], + "updated": [ + {"path": "file2.txt", "checksum": "xyz789", "size": 2048, "mtime": datetime.now()} + ], + "removed": [ + {"path": "file3.txt", "checksum": "lmn456", "size": 512, "mtime": datetime.now()} + ] + } + + # Convert dictionary to LocalChanges + added = [LocalChange(**file) for file in changes_dict["added"]] + updated = [LocalChange(**file) for file in changes_dict["updated"]] + removed = [LocalChange(**file) for file in changes_dict["removed"]] + + local_changes = LocalChanges(added=added, updated=updated, removed=removed) + + # Assertions + assert len(local_changes.added) == 1 + assert len(local_changes.updated) == 1 + assert len(local_changes.removed) == 1 + assert local_changes.added[0].path == "file1.txt" + assert local_changes.updated[0].path == "file2.txt" + assert local_changes.removed[0].path == "file3.txt" + + +def test_local_change_get_diff(): + """Test the get_diff method of LocalChange.""" + local_change = LocalChange( + path="file1.txt", + checksum="abc123", + size=1024, + mtime=datetime.now(), + diff={"path": "file1-diff", "checksum": "diff123", "size": 512, "mtime": datetime.now()} + ) + + diff = local_change.get_diff() + assert diff is not None + assert diff.path == "file1-diff" + assert diff.checksum == "diff123" + assert diff.size == 512 + + +def test_local_changes_get_server_request(): + """Test the get_server_request method of LocalChanges.""" + added = [ + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) + ] + updated = [ + LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) + ] + removed = [ + LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now()) + ] + + local_changes = LocalChanges(added=added, updated=updated, removed=removed) + server_request = local_changes.get_server_request() + + assert "added" in server_request + assert "updated" in server_request + assert "removed" in server_request + assert server_request["added"][0]["path"] == "file1.txt" + assert server_request["updated"][0]["path"] == "file2.txt" + assert server_request["removed"][0]["path"] == "file3.txt" + +def test_local_changes_update_chunks(): + """Test the update_chunks method of LocalChanges.""" + added = [ + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) + ] + updated = [ + LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) + ] + + local_changes = LocalChanges(added=added, updated=updated) + chunks = [("abc123", "chunk1"), ("xyz789", "chunk2")] + + local_changes.update_chunks(chunks) + + assert local_changes.added[0].chunks == ["chunk1"] + assert local_changes.updated[0].chunks == ["chunk2"] + +def test_local_changes_get_upload_changes(): + """Test the get_upload_changes method of LocalChanges.""" + # Create sample LocalChange instances + added = [ + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) + ] + updated = [ + LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) + ] + removed = [ + LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now()) + ] + + # Initialize LocalChanges with added, updated, and removed changes + local_changes = LocalChanges(added=added, updated=updated, removed=removed) + + # Call get_upload_changes + upload_changes = local_changes.get_upload_changes() + + # Assertions + assert len(upload_changes) == 2 # Only added and updated should be included + assert upload_changes[0].path == "file1.txt" # First change is from added + assert upload_changes[1].path == "file2.txt" # Second change is from updated \ No newline at end of file diff --git a/scripts/push_file.py b/scripts/push_file.py index 3189b6b1..f9121df2 100644 --- a/scripts/push_file.py +++ b/scripts/push_file.py @@ -21,15 +21,12 @@ def create_dummy_file(folder, filename): file_path = os.path.join(folder, filename) with open(file_path, 'w') as f: - f.write('This is a dummy file.\n' * (10 * (1024 * 1024))) + f.write('This is a dummy file.\n' * (2 * (1024 * 1024))) return file_path dummy_filename = ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '.txt' dummy_file_path = create_dummy_file(LOCAL_FOLDER, dummy_filename) print(f"Dummy file created at: {dummy_file_path}") -print(client.upload_chunks_cache.cache) client.push_project(LOCAL_FOLDER) - -print(client.upload_chunks_cache.cache) From 88f00ea54833c850e290e25bc0f626f8eada7e35 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 14 Aug 2025 14:02:22 +0200 Subject: [PATCH 04/24] make tests runnable with checking current version - Please update your local copy --- mergin/client_push.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index e14830c0..0e01ad60 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -341,12 +341,12 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: raise ClientError(f"You do not seem to have write access to the project (username '{username}')") # DISCUSSION: do we want to check if the project is up to date before pushing? For now, we removed this part - # if local_version != server_version: - # mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") - # raise ClientError( - # "There is a new version of the project on the server. Please update your local copy." - # + f"\n\nLocal version: {local_version}\nServer version: {server_version}" - # ) + if local_version != server_version: + mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") + raise ClientError( + "There is a new version of the project on the server. Please update your local copy." + + f"\n\nLocal version: {local_version}\nServer version: {server_version}" + ) changes = mp.get_push_changes() changes = filter_changes(mc, project_info, changes) From a2cfe62c0449feb80c9f50fa9f14da51814ac8ab Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 14 Aug 2025 16:17:52 +0200 Subject: [PATCH 05/24] Cleanup and fix getting diff from dictionary to be more safe --- mergin/local_changes.py | 2 +- mergin/merginproject.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mergin/local_changes.py b/mergin/local_changes.py index 35f3a290..7d71ef53 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -13,7 +13,7 @@ class BaseLocalChange: class LocalChange(BaseLocalChange): origin_checksum: Optional[str] = None chunks: List[str] = field(default_factory=list) - diff: dict = field(default_factory=dict) # Assuming diff is a dict with relevant data + diff: Optional[dict] = None upload_file: Optional[str] = None def get_diff(self) -> Optional[BaseLocalChange]: diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 3d013bb8..dba70d45 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -439,7 +439,6 @@ def get_push_changes(self): diff_id = str(uuid.uuid4()) diff_name = path + "-diff-" + diff_id diff_file = self.fpath_meta(diff_name) - print(f"Creating changeset for {path} - {diff_name}") try: self.geodiff.create_changeset(origin_file, current_file, diff_file) if self.geodiff.has_changes(diff_file): @@ -704,13 +703,14 @@ def apply_push_changes(self, changes): self.geodiff.make_copy_sqlite(self.fpath(path), basefile) elif k == "updated": # in case for geopackage cannot be created diff (e.g. forced update with committed changes from wal file) - if "diff" not in item: + diff = item.get("diff") + if not diff: self.log.info("updating basefile (copy) for: " + path) self.geodiff.make_copy_sqlite(self.fpath(path), basefile) else: self.log.info("updating basefile (diff) for: " + path) # better to apply diff to previous basefile to avoid issues with geodiff tmp files - changeset = self.fpath_meta(item["diff"]["path"]) + changeset = self.fpath_meta(diff["path"]) patch_error = self.apply_diffs(basefile, [changeset]) if patch_error: # in case of local sync issues it is safier to remove basefile, next time it will be downloaded from server From 054a818fff6532e4153659e61cd20df4b7a18139 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 15 Aug 2025 16:01:15 +0200 Subject: [PATCH 06/24] fixes for missing schema - fixing update chunks for files with the same checksum - added minimal version of sync command --- mergin/cli.py | 32 +++++ mergin/client.py | 78 ++++++++--- mergin/client_pull.py | 2 +- mergin/client_push.py | 31 +++-- mergin/local_changes.py | 14 +- mergin/test/test_client.py | 223 ++++++++++++++++++++++++++---- mergin/test/test_local_changes.py | 30 +++- 7 files changed, 343 insertions(+), 67 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index e479b886..91af9c28 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -468,6 +468,38 @@ def pull(ctx): except Exception as e: _print_unhandled_exception() +@cli.command() +@click.pass_context +def sync(ctx): + """Synchronize the project. Pull latest project version from the server and push split changes.""" + mc = ctx.obj["client"] + if mc is None: + return + directory = os.getcwd() + upload_job = None + length = 1 + try: + def on_progress(increment, push_job): + nonlocal upload_job + upload_job = push_job + + # run pull & push cycles until there are no local changes + mc.sync_project_with_callback(directory, progress_callback=on_progress) + + click.secho("Sync complete.", fg="green") + + except InvalidProject as e: + click.secho("Invalid project directory ({})".format(str(e)), fg="red") + except ClientError as e: + click.secho("Error: " + str(e), fg="red") + return + except KeyboardInterrupt: + click.secho("Cancelling...") + if upload_job: + push_project_cancel(upload_job) + except Exception as e: + _print_unhandled_exception() + @cli.command() @click.argument("version") diff --git a/mergin/client.py b/mergin/client.py index 0ff26caf..29f040b4 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -3,7 +3,6 @@ import os import json import shutil -import time import zlib import base64 import urllib.parse @@ -17,6 +16,7 @@ import re import typing import warnings +from time import sleep from typing import List @@ -41,7 +41,7 @@ download_diffs_finalize, ) from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import push_project_async, push_project_wait, push_project_finalize, UploadChunksCache +from .client_push import get_push_changes_batch, push_project_async, push_project_is_running, push_project_wait, push_project_finalize, UploadChunksCache from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -1486,32 +1486,72 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) - def sync_project(self, project_dir): + def sync_project(self, project_directory): """ Syncs project by loop with these steps: 1. Pull server version 2. Get local changes 3. Push first change batch Repeat if there are more local changes. - The batch pushing makes use of the server ability to handle simultaneously exclusive upload (that blocks - other uploads) and non-exclusive upload (for adding assets) """ - attempts = 2 - for attempt in range(attempts): + mp = MerginProject(project_directory) + has_changes = True + server_conflict_attempts = 0 + while has_changes: + pull_job = pull_project_async(self, project_directory) + if pull_job: + pull_project_wait(pull_job) + pull_project_finalize(pull_job) + try: - pull_job = pull_project_async(self, project_dir) - if pull_job: - pull_project_wait(pull_job) - pull_project_finalize(pull_job) - - job = push_project_async(self, project_dir) - if job: - push_project_wait(job) - push_project_finalize(job) - break + job = push_project_async(self, project_directory) + if not job: + break + push_project_wait(job) + push_project_finalize(job) + _, has_changes = get_push_changes_batch(self, mp, job.server_resp) + except ClientError as e: + if e.http_error == 409 and server_conflict_attempts < 2: + # retry on conflict, e.g. when server has changes that we do not have yet + mp.log.info("Attempting sync process due to conflicts between server and local directory or another user is syncing.") + server_conflict_attempts += 1 + sleep(5) + continue + raise e + + def sync_project_with_callback(self, project_directory, progress_callback=None, sleep_time=0.1): + """ + Syncs project while sending push progress info as callback. + Sync is done in this loop: + Pending changes? -> Pull -> Get changes batch -> Push the changes -> repeat + :param progress_callback: updates the progress bar in CLI, on_progress(increment) + :param sleep_time: sleep time between calling the callback function + """ + mp = MerginProject(project_directory) + has_changes = True + server_conflict_attempts = 0 + while has_changes: + pull_job = pull_project_async(self, project_directory) + if pull_job: + pull_project_wait(pull_job) + pull_project_finalize(pull_job) + try: + job = push_project_async(self, project_directory) + if not job: + break + last = 0 + while push_project_is_running(job): + sleep(sleep_time) + now = job.transferred_size + progress_callback(now - last, job) # update progressbar with transferred size increment + last = now + push_project_finalize(job) + _, has_changes = get_push_changes_batch(self, mp, job.server_resp) except ClientError as e: - if e.http_error == 409 and attempt < attempts - 1: + if e.http_error == 409 and server_conflict_attempts < 2: # retry on conflict, e.g. when server has changes that we do not have yet - time.sleep(5) + mp.log.info("Attempting sync process due to conflicts between server and local directory or another user is syncing.") + server_conflict_attempts += 1 + sleep(5) continue raise e diff --git a/mergin/client_pull.py b/mergin/client_pull.py index a46e6f96..7337facf 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -373,7 +373,7 @@ def dump(self): print("--- END ---") -def pull_project_async(mc, directory): +def pull_project_async(mc, directory) -> PullJob: """ Starts project pull in background and returns handle to the pending job. Using that object it is possible to watch progress or cancel the ongoing work. diff --git a/mergin/client_push.py b/mergin/client_push.py index 0e01ad60..812e9824 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -109,7 +109,6 @@ def upload_chunk_v2_api(self, data: bytes, checksum: str): resp_dict = json.load(resp) self.server_chunk_id = resp_dict.get("id") self.mc.upload_chunks_cache.add(checksum, self.server_chunk_id) - self.mp.log.debug(f"Upload chunk finished: {self.file_path}") def upload_blocking(self): with open(self.file_path, "rb") as file_handle: @@ -137,7 +136,7 @@ def upload_blocking(self): continue raise - self.mp.log.debug(f"Upload chunk finished: {self.file_path}") + self.mp.log.debug(f"Upload chunk {self.chunk_id} finished: {self.file_path}") class UploadJob: @@ -188,7 +187,9 @@ def add_items(self, items: List[UploadQueueItem], total_size: int): def update_chunks_from_items(self): """Update chunks in LocalChanges from the upload queue items.""" - self.changes.update_chunks([(item.file_checksum, item.server_chunk_id) for item in self.upload_queue_items]) + self.changes.update_chunks( + [(item.file_checksum, item.server_chunk_id) for item in self.upload_queue_items] + ) def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange]) -> Tuple[List[UploadQueueItem], int]: @@ -348,11 +349,13 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: + f"\n\nLocal version: {local_version}\nServer version: {server_version}" ) - changes = mp.get_push_changes() - changes = filter_changes(mc, project_info, changes) - mp.log.debug("push changes:\n" + pprint.pformat(changes)) - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") + changes, changes_len = get_push_changes_batch(mc, mp, project_info) + if not changes_len: + mp.log.info(f"--- push {project_path} - nothing to do") + return + + mp.log.debug("push changes:\n" + pprint.pformat(changes)) # If there are any versioned files (aka .gpkg) that are not updated through a diff, # we need to make a temporary copy somewhere to be sure that we are uploading full content. @@ -367,10 +370,6 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - if not sum(len(v) for v in changes.values()): - mp.log.info(f"--- push {project_path} - nothing to do") - return - server_feaure_flags = mc.server_features() job = None local_changes = LocalChanges( @@ -527,3 +526,13 @@ def remove_diff_files(job: UploadJob) -> None: diff_file = job.mp.fpath_meta(diff.path) if os.path.exists(diff_file): os.remove(diff_file) + + +def get_push_changes_batch(mc, mp: MerginProject, project_info: dict) -> Tuple[dict, int]: + """ + Get changes that need to be pushed to the server. + """ + changes = mp.get_push_changes() + changes = filter_changes(mc, project_info, changes) + + return changes, sum(len(v) for v in changes.values()) diff --git a/mergin/local_changes.py b/mergin/local_changes.py index 7d71ef53..78da8e59 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -15,6 +15,12 @@ class LocalChange(BaseLocalChange): chunks: List[str] = field(default_factory=list) diff: Optional[dict] = None upload_file: Optional[str] = None + # some functions (MerginProject.compare_file_sets) are adding version to the change from project info + version: Optional[str] = None + # some functions (MerginProject.compare_file_sets) are adding history dict to the change from project info + history: Optional[dict] = None + # some functions (MerginProject.compare_file_sets) are adding location dict to the change from project info + location: Optional[str] = None def get_diff(self) -> Optional[BaseLocalChange]: if self.diff: @@ -69,15 +75,15 @@ def update_chunks(self, chunks: List[Tuple[str, str]]) -> None: lists based on the provided `chunks` list, which contains tuples of (checksum, chunk_id). """ for change in self.added: - change.chunks = [ + change.chunks = list({ chunk[1] for chunk in chunks if chunk[0] == change.checksum - ] + }) for change in self.updated: - change.chunks = [ + change.chunks = list({ chunk[1] for chunk in chunks if chunk[0] == change.checksum - ] \ No newline at end of file + }) \ No newline at end of file diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 32bc192f..d1d4831a 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1,3 +1,4 @@ +import hashlib import json import logging import os @@ -22,13 +23,16 @@ TokenError, ServerType, ) -from ..client_push import push_project_async, push_project_cancel +from ..client_push import push_project_async, push_project_cancel, push_project_finalize, push_project_wait from ..client_pull import ( download_project_async, download_project_wait, download_project_finalize, download_project_is_running, download_project_cancel, + pull_project_async, + pull_project_finalize, + pull_project_wait, ) from ..utils import ( generate_checksum, @@ -267,6 +271,66 @@ def test_create_remote_project_from_local(mc): with pytest.raises(Exception, match="Project directory already exists"): mc.download_project(project, download_dir) +def test_new_project_sync_v1_api(mc): + """ + Create a new project, download it, add a file and then do sync - it should not fail + This is using v1 endpoint. + """ + server_features = mc.server_features() + mc._server_features = { + "v2_push_enabled": False + } + test_project = "test_new_project_sync_v1" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + + cleanup(mc, project, [project_dir]) + # create remote project + mc.create_project(test_project) + + # download the project + mc.download_project(project, project_dir) + + # add a test file + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + + # do a full sync - it should not fail + mc.pull_project(project_dir) + mc.push_project(project_dir) + + # make sure everything is up-to-date + mp = MerginProject(project_dir) + local_changes = mp.get_push_changes() + assert not local_changes["added"] and not local_changes["removed"] and not local_changes["updated"] + mc._server_features = server_features + +def test_new_project_sync_v2_api(mc): + """ + Create a new project, download it, add a file and then do sync - it should not fail + This is using v2 endpoint. + """ + test_project = "test_new_project_sync_v2" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + + cleanup(mc, project, [project_dir]) + # create remote project + mc.create_project(test_project) + + # download the project + mc.download_project(project, project_dir) + + # add a test file + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + + # do a full sync - it should not fail + mc.pull_project(project_dir) + mc.push_project(project_dir) + + # make sure everything is up-to-date + mp = MerginProject(project_dir) + local_changes = mp.get_push_changes() + assert not local_changes["added"] and not local_changes["removed"] and not local_changes["updated"] def test_push_pull_changes(mc): test_project = "test_push" @@ -363,6 +427,27 @@ def test_push_pull_changes(mc): ) assert generate_checksum(os.path.join(project_dir_2, f_updated)) == f_remote_checksum +def test_sync_remove(mc): + """ + Integration test for sync process, where just files are removed. + In that case, there is not chunks creation (UploadQueueItems) and process is going directly to finalize. + """ + test_project = "test_sync_remove" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + project_dir_removed = os.path.join(TMP_DIR, f"{test_project}_removed") # primary project dir for updates + cleanup(mc, project, [project_dir, project_dir_removed]) + # create remote project + mc.create_project(test_project) + # download the project + mc.download_project(project, project_dir) + mc.download_project(project, project_dir_removed) + shutil.copy(os.path.join(TEST_DATA_DIR, "base.gpkg"), project_dir) + mc.sync_project(project_dir) + mc.sync_project(project_dir_removed) + os.remove(os.path.join(project_dir_removed, "base.gpkg")) # Remove base.gpkg + mc.sync_project(project_dir_removed) + def test_cancel_push(mc): """ @@ -568,33 +653,6 @@ def test_force_gpkg_update(mc): assert "diff" not in f_remote -def test_new_project_sync(mc): - """Create a new project, download it, add a file and then do sync - it should not fail""" - - test_project = "test_new_project_sync" - project = API_USER + "/" + test_project - project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates - - cleanup(mc, project, [project_dir]) - # create remote project - mc.create_project(test_project) - - # download the project - mc.download_project(project, project_dir) - - # add a test file - shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) - - # do a full sync - it should not fail - mc.pull_project(project_dir) - mc.push_project(project_dir) - - # make sure everything is up-to-date - mp = MerginProject(project_dir) - local_changes = mp.get_push_changes() - assert not local_changes["added"] and not local_changes["removed"] and not local_changes["updated"] - - def test_missing_basefile_pull(mc): """Test pull of a project where basefile of a .gpkg is missing for some reason (it should gracefully handle it by downloading the missing basefile) @@ -2984,3 +3042,112 @@ def test_validate_auth(mc: MerginClient): # this should pass and not raise an error, as the client is able to re-login with pytest.raises(LoginError): mc_auth_token_login_wrong_password.validate_auth() + +def test_uploaded_chunks_cache(mc): + """Create a new project, download it, add a file and then do sync - it should not fail""" + + test_project = "test_uploaded_chunks_cache" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + + cleanup(mc, project, [project_dir]) + # create remote project + mc.create_project(test_project) + + # download the project + mc.download_project(project, project_dir) + mp = MerginProject(project_dir) + + # add a test file + file = os.path.join(TEST_DATA_DIR, "test.txt") + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), os.path.join(project_dir, "test2.txt")) + + with open(file, "rb") as file_handle: + data = file_handle.read() + checksum = hashlib.sha1() + checksum.update(data) + checksum_str = checksum.hexdigest() + resp = mc.post(f"/v2/projects/{mp.project_id()}/chunks", data, {"Content-Type": "application/octet-stream"}) + resp_dict = json.load(resp) + chunk_id = resp_dict.get("id") + mc.upload_chunks_cache.add(checksum_str, chunk_id) + job = push_project_async(mc, project_dir) + push_project_wait(job) + push_project_finalize(job) + assert job.upload_queue_items[0].server_chunk_id == chunk_id + assert job.upload_queue_items[1].server_chunk_id == chunk_id + assert mc.upload_chunks_cache.cache == {} + +def test_client_push_project_async(mc): + """ + Integration tests for low level client_push functions + """ + test_project = "test_client_push" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + cleanup(mc, project, [project_dir]) + # create remote project + mc.create_project(test_project) + # download the project + mc.download_project(project, project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "base.gpkg"), project_dir) + job = push_project_async(mc, project_dir) + assert job + assert len(job.upload_queue_items) == 2 + assert len(job.changes.added) == 2 + assert len(job.changes.updated) == 0 + assert len(job.changes.removed) == 0 + push_project_wait(job) + assert job.is_cancelled == False + assert job.total_size == job.transferred_size + assert job.version == "v0" + push_project_finalize(job) + assert not os.path.exists(job.tmp_dir.name) + +def test_client_pull_project_async(mc): + """ + Integration tests for low level client_pull functions + """ + test_project = "test_client_pull" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + project_dir_pull = os.path.join(TMP_DIR, f"{test_project}_pull") # primary project dir for updates + cleanup(mc, project, [project_dir, project_dir_pull]) + # create remote project + mc.create_project(test_project) + # download the project + mc.download_project(project, project_dir) + mc.download_project(project, project_dir_pull) + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "base.gpkg"), project_dir) + mc.sync_project(project_dir) + + job = pull_project_async(mc, project_dir_pull) + assert job + assert len(job.pull_changes.get("added")) == 2 + pull_project_wait(job) + assert job.total_size == job.transferred_size + pull_project_finalize(job) + assert not os.path.exists(job.temp_dir) + +def test_client_project_sync(mc): + test_project = "test_client_project_sync" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) + project_dir_2 = os.path.join(TMP_DIR, f"{test_project}_2") + cleanup(mc, project, [project_dir, project_dir_2]) + # create remote project + mc.create_project(test_project) + # download the project + mc.download_project(project, project_dir) + mc.download_project(project, project_dir_2) + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "base.gpkg"), project_dir) + mc.sync_project(project_dir) + shutil.copy( + os.path.join(TEST_DATA_DIR, "inserted_1_A.gpkg"), + os.path.join(project_dir_2, "base.gpkg"), + ) + mc.sync_project(project_dir_2) \ No newline at end of file diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py index 4a57bab9..be753e20 100644 --- a/mergin/test/test_local_changes.py +++ b/mergin/test/test_local_changes.py @@ -12,7 +12,24 @@ def test_local_changes_from_dict(): {"path": "file2.txt", "checksum": "xyz789", "size": 2048, "mtime": datetime.now()} ], "removed": [ - {"path": "file3.txt", "checksum": "lmn456", "size": 512, "mtime": datetime.now()} + { + "checksum": "a4e7e35d1e8c203b5d342ecd9676adbebc924c84", + "diff": None, + "history": { + "v1": { + "change": "added", + "checksum": "a4e7e35d1e8c203b5d342ecd9676adbebc924c84", + "expiration": None, + "path": "base.gpkg", + "size": 98304 + } + }, + "location": "v1/base.gpkg", + "mtime": "2025-08-15T09:04:21.690590Z", + "path": "base.gpkg", + "size": 98304, + "version": "v1" + } ] } @@ -29,7 +46,10 @@ def test_local_changes_from_dict(): assert len(local_changes.removed) == 1 assert local_changes.added[0].path == "file1.txt" assert local_changes.updated[0].path == "file2.txt" - assert local_changes.removed[0].path == "file3.txt" + assert local_changes.removed[0].path == "base.gpkg" + assert local_changes.removed[0].history + assert local_changes.removed[0].location + assert not len(local_changes.removed[0].chunks) def test_local_change_get_diff(): @@ -74,18 +94,20 @@ def test_local_changes_get_server_request(): def test_local_changes_update_chunks(): """Test the update_chunks method of LocalChanges.""" added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()), + LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now()) ] updated = [ LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) ] local_changes = LocalChanges(added=added, updated=updated) - chunks = [("abc123", "chunk1"), ("xyz789", "chunk2")] + chunks = [("abc123", "chunk1"), ("abc123", "chunk1"), ("xyz789", "chunk2")] local_changes.update_chunks(chunks) assert local_changes.added[0].chunks == ["chunk1"] + assert local_changes.added[1].chunks == ["chunk1"] assert local_changes.updated[0].chunks == ["chunk2"] def test_local_changes_get_upload_changes(): From 7ba097195659375ed1239191400ff05a09e02f6c Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 15 Aug 2025 16:07:12 +0200 Subject: [PATCH 07/24] cleanup --- scripts/inv.py | 14 -------------- scripts/push_file.py | 32 -------------------------------- scripts/sync.py | 38 -------------------------------------- 3 files changed, 84 deletions(-) delete mode 100644 scripts/inv.py delete mode 100644 scripts/push_file.py delete mode 100644 scripts/sync.py diff --git a/scripts/inv.py b/scripts/inv.py deleted file mode 100644 index eeb58cca..00000000 --- a/scripts/inv.py +++ /dev/null @@ -1,14 +0,0 @@ -import mergin - -LOGIN="r22overviews" -PASSWORD="MerginOverviews123" -URL="https://app.dev.merginmaps.com" -WS_ID = 10173 - -client = mergin.MerginClient( - url=URL, - login=LOGIN, - password=PASSWORD -) - -client.create_invitation(email="marcel.kocisek@lutraconsulting.co.uk", workspace_id=WS_ID, workspace_role=mergin.common.WorkspaceRole.EDITOR) \ No newline at end of file diff --git a/scripts/push_file.py b/scripts/push_file.py deleted file mode 100644 index f9121df2..00000000 --- a/scripts/push_file.py +++ /dev/null @@ -1,32 +0,0 @@ -import mergin -import os -import random -import string - -LOGIN="admin3" -PASSWORD="topsecret" -URL="http://localhost:8080" - -client = mergin.MerginClient( - url=URL, - login=LOGIN, - password=PASSWORD -) - -PROJECT="mergin/hejj6" -LOCAL_FOLDER=f"/tmp/project{random.choices(string.ascii_letters + string.digits, k=10)}" -client.download_project(PROJECT, LOCAL_FOLDER) - -# create dummy random named file to LOCAL_FOLDER -def create_dummy_file(folder, filename): - file_path = os.path.join(folder, filename) - with open(file_path, 'w') as f: - f.write('This is a dummy file.\n' * (2 * (1024 * 1024))) - return file_path - -dummy_filename = ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '.txt' -dummy_file_path = create_dummy_file(LOCAL_FOLDER, dummy_filename) -print(f"Dummy file created at: {dummy_file_path}") - - -client.push_project(LOCAL_FOLDER) diff --git a/scripts/sync.py b/scripts/sync.py deleted file mode 100644 index 32660463..00000000 --- a/scripts/sync.py +++ /dev/null @@ -1,38 +0,0 @@ -LOGIN="admin3" -PASSWORD="topsecret" -URL="http://localhost:8080" - - -import pandas as pd -import geopandas as gpd -import os -import mergin - -client = mergin.MerginClient( - url=URL, - login=LOGIN, - password=PASSWORD -) - -PROJECT="mergin/vienna" - - -LOCAL_FOLDER="/tmp/project3" -# os.rmdir(LOCAL_FOLDER) - -client.download_project(PROJECT, LOCAL_FOLDER) - -# # Get the data from the CSV (use just sample of data) -# csv_file = os.path.join(LOCAL_FOLDER, "vienna_trees_gansehauffel.csv") -# csv_df = pd.read_csv(csv_file, nrows=20, dtype={"health": str}) -# # Convert geometry in WKT format to GeoDataFrame -# gdf = gpd.GeoDataFrame(csv_df, geometry=gpd.GeoSeries.from_wkt(csv_df.geometry)) -# print(gdf.head()) -# # Save the GeoDataFrame to a Geopackage -# gdf.to_file( -# os.path.join(LOCAL_FOLDER, "Ready_to_survey_trees.gpkg"), -# layer="Ready_to_survey_trees", driver="GPKG", -# ) - -# print (client.upload_chunks_cache.cache) -# client.push_project(LOCAL_FOLDER) \ No newline at end of file From 42c34a22e97293ce7641f851d4c7d13d410c1903 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 19 Aug 2025 15:09:07 +0200 Subject: [PATCH 08/24] Retry sync job using Introduce mapping based on chunk_id (generated by client) and server_chunk_id generated by server. --- mergin/client.py | 28 +++++++++++++++++++++----- mergin/client_push.py | 17 +++++++++------- mergin/common.py | 4 ++++ mergin/local_changes.py | 33 +++++++++++++++++++------------ mergin/test/test_local_changes.py | 6 +++--- 5 files changed, 60 insertions(+), 28 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 29f040b4..3d0b32a7 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -22,6 +22,7 @@ from .common import ( ClientError, + ErrorCode, LoginError, WorkspaceRole, ProjectRole, @@ -41,7 +42,14 @@ download_diffs_finalize, ) from .client_pull import pull_project_async, pull_project_wait, pull_project_finalize -from .client_push import get_push_changes_batch, push_project_async, push_project_is_running, push_project_wait, push_project_finalize, UploadChunksCache +from .client_push import ( + get_push_changes_batch, + push_project_async, + push_project_is_running, + push_project_wait, + push_project_finalize, + UploadChunksCache, +) from .utils import DateTimeEncoder, get_versions_with_file_changes, int_version, is_version_acceptable from .version import __version__ @@ -1511,9 +1519,14 @@ def sync_project(self, project_directory): push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp, job.server_resp) except ClientError as e: - if e.http_error == 409 and server_conflict_attempts < 2: + if ( + e.sync_retry + and server_conflict_attempts < 2 + ): # retry on conflict, e.g. when server has changes that we do not have yet - mp.log.info("Attempting sync process due to conflicts between server and local directory or another user is syncing.") + mp.log.info( + "Attempting sync process due to conflicts between server and local directory or another user is syncing." + ) server_conflict_attempts += 1 sleep(5) continue @@ -1548,9 +1561,14 @@ def sync_project_with_callback(self, project_directory, progress_callback=None, push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp, job.server_resp) except ClientError as e: - if e.http_error == 409 and server_conflict_attempts < 2: + if ( + e.sync_retry + and server_conflict_attempts < 2 + ): # retry on conflict, e.g. when server has changes that we do not have yet - mp.log.info("Attempting sync process due to conflicts between server and local directory or another user is syncing.") + mp.log.info( + "Attempting sync process due to conflicts between server and local directory or another user is syncing." + ) server_conflict_attempts += 1 sleep(5) continue diff --git a/mergin/client_push.py b/mergin/client_push.py index 812e9824..2c2e6ad1 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -21,7 +21,7 @@ from .local_changes import LocalChange, LocalChanges -from .common import UPLOAD_CHUNK_SIZE, ClientError +from .common import UPLOAD_CHUNK_SIZE, ClientError, ErrorCode from .merginproject import MerginProject from .editor import filter_changes @@ -136,7 +136,7 @@ def upload_blocking(self): continue raise - self.mp.log.debug(f"Upload chunk {self.chunk_id} finished: {self.file_path}") + self.mp.log.debug(f"Upload chunk {self.server_chunk_id or self.chunk_id} finished: {self.file_path}") class UploadJob: @@ -188,7 +188,7 @@ def add_items(self, items: List[UploadQueueItem], total_size: int): def update_chunks_from_items(self): """Update chunks in LocalChanges from the upload queue items.""" self.changes.update_chunks( - [(item.file_checksum, item.server_chunk_id) for item in self.upload_queue_items] + [(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items] ) @@ -285,9 +285,9 @@ def create_upload_job_v2_api( {"Content-Type": "application/json"}, ) except ClientError as err: - # ignore 409 error, it means that the project is already in a transaction + # ignore 409 error codes, it means that the project is already in a transaction # but we still want to continue with the upload - if err.http_error not in [409]: + if err.server_code not in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists]: mp.log.error("Error doing push check compatibility: " + str(err)) mp.log.error("--- push aborted") raise @@ -439,11 +439,10 @@ def push_project_finalize(job: UploadJob): job.mp.log.error("--- push finish failed! " + error_msg) raise ClientError("Upload error: " + error_msg) - job.update_chunks_from_items() - if server_features.get("v2_push_enabled"): # v2 push uses a different endpoint try: + job.update_chunks_from_items() job.mp.log.info(f"Finishing transaction for project {job.mp.project_full_name()}") job.mc.post( f"/v2/projects/{job.mp.project_id()}/versions", @@ -456,6 +455,10 @@ def push_project_finalize(job: UploadJob): project_info = job.mc.project_info(job.mp.project_id()) job.server_resp = project_info except ClientError as err: + if err.server_code in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists]: + err.sync_retry = True + else: + job.mc.upload_chunks_cache.clear() # clear the upload chunks cache, as we are getting fatal from server job.mp.log.error("--- push finish failed! " + str(err)) raise err elif with_upload_of_files: diff --git a/mergin/common.py b/mergin/common.py index b27c95ec..fc9c3565 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -19,6 +19,8 @@ class ErrorCode(Enum): ProjectsLimitHit = "ProjectsLimitHit" StorageLimitHit = "StorageLimitHit" + ProjectVersionExists = "ProjectVersionExists" + AnotherUploadRunning = "AnotherUploadRunning" class ClientError(Exception): @@ -32,6 +34,8 @@ def __init__(self, detail, url=None, server_code=None, server_response=None, htt self.server_response = server_response self.extra = None + # Param to mark error as candidate for retry sync process + self.sync_retry = False def __str__(self): string_res = f"Detail: {self.detail}\n" diff --git a/mergin/local_changes.py b/mergin/local_changes.py index 78da8e59..ccb0a08d 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -66,24 +66,31 @@ def get_upload_changes(self) -> List[LocalChange]: This includes added and updated files. """ return self.added + self.updated + + def _map_unique_chunks(self, change_chunks: List[str], server_chunks: List[Tuple[str, str]]) -> List[str]: + """ + Helper function to map and deduplicate chunk ids for a single change. + """ + mapped = [] + seen = set() + for chunk in change_chunks: + for server_chunk in server_chunks: + chunk_id = server_chunk[0] + server_chunk_id = server_chunk[1] + if chunk_id == chunk and server_chunk_id not in seen: + mapped.append(server_chunk_id) + seen.add(server_chunk_id) + return mapped - def update_chunks(self, chunks: List[Tuple[str, str]]) -> None: + def update_chunks(self, server_chunks: List[Tuple[str, str]]) -> None: """ - Map chunk ids to file checksums. + Map chunk ids to chunks returned from server (server_chunk_id). This method updates the `chunks` attribute of each change in `added` and `updated` - lists based on the provided `chunks` list, which contains tuples of (checksum, chunk_id). + lists based on the provided `server_chunks` list, which contains tuples of (chunk_id, server_chunk_id). """ for change in self.added: - change.chunks = list({ - chunk[1] - for chunk in chunks - if chunk[0] == change.checksum - }) + change.chunks = self._map_unique_chunks(change.chunks, server_chunks) for change in self.updated: - change.chunks = list({ - chunk[1] - for chunk in chunks - if chunk[0] == change.checksum - }) \ No newline at end of file + change.chunks = self._map_unique_chunks(change.chunks, server_chunks) \ No newline at end of file diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py index be753e20..c6b31654 100644 --- a/mergin/test/test_local_changes.py +++ b/mergin/test/test_local_changes.py @@ -94,11 +94,11 @@ def test_local_changes_get_server_request(): def test_local_changes_update_chunks(): """Test the update_chunks method of LocalChanges.""" added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()), - LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now()) + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(),chunks=["abc123"]), + LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now(),chunks=["abc123"]) ] updated = [ - LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) + LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"]) ] local_changes = LocalChanges(added=added, updated=updated) From 0128a061f59fb7005ba1601d97fa856bb685874e Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 3 Sep 2025 12:50:28 +0200 Subject: [PATCH 09/24] Address comstic comments from @varmar05 and some from @wonder-sk + black staff --- mergin/cli.py | 3 +- mergin/client.py | 68 +++--------- mergin/client_push.py | 178 ++++++++++++------------------ mergin/common.py | 9 ++ mergin/local_changes.py | 26 +++-- mergin/test/test_client.py | 16 ++- mergin/test/test_local_changes.py | 67 +++++------ mergin/utils.py | 12 ++ 8 files changed, 159 insertions(+), 220 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index 91af9c28..d55d4dab 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -477,14 +477,13 @@ def sync(ctx): return directory = os.getcwd() upload_job = None - length = 1 try: def on_progress(increment, push_job): nonlocal upload_job upload_job = push_job # run pull & push cycles until there are no local changes - mc.sync_project_with_callback(directory, progress_callback=on_progress) + mc.sync_project(directory, progress_callback=on_progress) click.secho("Sync complete.", fg="green") diff --git a/mergin/client.py b/mergin/client.py index 3d0b32a7..a76db253 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -21,6 +21,9 @@ from typing import List from .common import ( + PUSH_ATTEMPT_WAIT, + PUSH_ATTEMPTS, + SYNC_CALLBACK_WAIT, ClientError, ErrorCode, LoginError, @@ -916,7 +919,7 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - job = push_project_async(self, directory) + job = push_project_async(self, directory, check_version=True) if job is None: return # there is nothing to push (or we only deleted some files) push_project_wait(job) @@ -1494,7 +1497,7 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) - def sync_project(self, project_directory): + def sync_project(self, project_directory, progress_callback=None): """ Syncs project by loop with these steps: 1. Pull server version @@ -1515,61 +1518,24 @@ def sync_project(self, project_directory): job = push_project_async(self, project_directory) if not job: break - push_project_wait(job) - push_project_finalize(job) - _, has_changes = get_push_changes_batch(self, mp, job.server_resp) - except ClientError as e: - if ( - e.sync_retry - and server_conflict_attempts < 2 - ): - # retry on conflict, e.g. when server has changes that we do not have yet - mp.log.info( - "Attempting sync process due to conflicts between server and local directory or another user is syncing." - ) - server_conflict_attempts += 1 - sleep(5) - continue - raise e - - def sync_project_with_callback(self, project_directory, progress_callback=None, sleep_time=0.1): - """ - Syncs project while sending push progress info as callback. - Sync is done in this loop: - Pending changes? -> Pull -> Get changes batch -> Push the changes -> repeat - :param progress_callback: updates the progress bar in CLI, on_progress(increment) - :param sleep_time: sleep time between calling the callback function - """ - mp = MerginProject(project_directory) - has_changes = True - server_conflict_attempts = 0 - while has_changes: - pull_job = pull_project_async(self, project_directory) - if pull_job: - pull_project_wait(pull_job) - pull_project_finalize(pull_job) - try: - job = push_project_async(self, project_directory) - if not job: - break - last = 0 - while push_project_is_running(job): - sleep(sleep_time) - now = job.transferred_size - progress_callback(now - last, job) # update progressbar with transferred size increment - last = now + if not progress_callback: + push_project_wait(job) + else: + last_size = 0 + while push_project_is_running(job): + sleep(SYNC_CALLBACK_WAIT) + current_size = job.transferred_size + progress_callback(current_size - last_size, job) # call callback with transferred size increment + last = current_size push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp, job.server_resp) except ClientError as e: - if ( - e.sync_retry - and server_conflict_attempts < 2 - ): + if e.sync_retry and server_conflict_attempts <= PUSH_ATTEMPTS: # retry on conflict, e.g. when server has changes that we do not have yet mp.log.info( - "Attempting sync process due to conflicts between server and local directory or another user is syncing." + f"Restarting sync process (conflict on server) - {server_conflict_attempts + 1}/{PUSH_ATTEMPTS}" ) server_conflict_attempts += 1 - sleep(5) + sleep(PUSH_ATTEMPT_WAIT) continue raise e diff --git a/mergin/client_push.py b/mergin/client_push.py index 2c2e6ad1..d9fe1e78 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -7,32 +7,27 @@ calling push_project_is_running(job) that will just return True/False or by calling push_project_wait(job) that will block the current thread (not good for GUI). To finish the upload job, we have to call push_project_finalize(job). + +We introduced here compability with: +v1 Push API using transaction approach +v2 Push API using raw chunks upload approach """ from dataclasses import asdict import json -import hashlib import pprint import tempfile import concurrent.futures import os import time -from typing import List, Tuple, Optional +from typing import List, Tuple, Optional, ByteString from .local_changes import LocalChange, LocalChanges from .common import UPLOAD_CHUNK_SIZE, ClientError, ErrorCode from .merginproject import MerginProject from .editor import filter_changes - - -def _do_upload(item, job): - """runs in worker thread, have to be defined here to avoid worker threads repeating this function""" - if job.is_cancelled: - return - - item.upload_blocking() - job.transferred_size += item.size +from .utils import get_data_checksum class UploadChunksCache: @@ -71,7 +66,7 @@ def __init__( self._request_headers = {"Content-Type": "application/octet-stream"} - def upload_chunk(self, data: bytes, checksum: str): + def upload_chunk(self, data: ByteString, checksum: str): """ Uploads the chunk to the server. """ @@ -84,15 +79,14 @@ def upload_chunk(self, data: bytes, checksum: str): if not (resp_dict["size"] == len(data) and resp_dict["checksum"] == checksum): try: - self.mc.post("/v1/project/push/cancel/{}".format(self.transaction_id)) + self.mc.post(f"/v1/project/push/cancel/{self.transaction_id}") except ClientError: pass - raise ClientError("Mismatch between uploaded file chunk {} and local one".format(self.chunk_id)) + raise ClientError(f"Mismatch between uploaded file chunk {self.chunk_id} and local one") - def upload_chunk_v2_api(self, data: bytes, checksum: str): + def upload_chunk_v2_api(self, data: ByteString, checksum: str): """ Uploads the chunk to the server. - :param mc: MerginClient instance """ # try to lookup the chunk in the cache, if yes use it. cached_chunk_id = self.mc.upload_chunks_cache.get_chunk_id(checksum) @@ -114,9 +108,7 @@ def upload_blocking(self): with open(self.file_path, "rb") as file_handle: file_handle.seek(self.chunk_index * UPLOAD_CHUNK_SIZE) data = file_handle.read(UPLOAD_CHUNK_SIZE) - checksum = hashlib.sha1() - checksum.update(data) - checksum_str = checksum.hexdigest() + checksum_str = get_data_checksum(data) self.mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") attempts = 2 @@ -130,8 +122,7 @@ def upload_blocking(self): self.upload_chunk(data, checksum_str) break # exit loop if upload was successful except ClientError as e: - if e.http_error == 409 and attempt < attempts - 1: - # retry on conflict, e.g. when server has changes that we do not have yet + if attempt < attempts - 1: time.sleep(5) continue raise @@ -145,7 +136,6 @@ class UploadJob: def __init__( self, version: str, changes: LocalChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir ): - # self.project_path = project_path # full project name ("username/projectname") self.version = version self.changes: LocalChanges = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server @@ -163,42 +153,48 @@ def __init__( def dump(self): print("--- JOB ---", self.total_size, "bytes") for item in self.upload_queue_items: - print("- {} {} {}".format(item.file_path, item.chunk_index, item.size)) + print(f"- {item.file_path} {item.chunk_index} {item.size}") print("--- END ---") - def submit_item_to_thread(self, item: UploadQueueItem): + def _submit_item_to_thread(self, item: UploadQueueItem): """Upload a single item in the background""" - feature = self.executor.submit(_do_upload, item, self) - self.futures.append(feature) + future = self.executor.submit(_do_upload, item, self) + self.futures.append(future) - def add_items(self, items: List[UploadQueueItem], total_size: int): + def add_items(self, items: List[UploadQueueItem]): """Add multiple chunks to the upload queue""" - self.total_size = total_size + self.total_size = sum(item.size for item in items) self.upload_queue_items = items - self.mp.log.info(f"will upload {len(items)} items with total size {total_size}") + self.mp.log.info(f"will upload {len(self.upload_queue_items)} items with total size {self.total_size}") # start uploads in background self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) for item in items: if self.transaction_id: item.transaction_id = self.transaction_id - self.submit_item_to_thread(item) + self._submit_item_to_thread(item) def update_chunks_from_items(self): """Update chunks in LocalChanges from the upload queue items.""" - self.changes.update_chunks( - [(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items] - ) + self.changes.update_chunks([(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items]) + + +def _do_upload(item: UploadQueueItem, job: UploadJob): + """runs in worker thread, have to be defined here to avoid worker threads repeating this function""" + if job.is_cancelled: + return + + item.upload_blocking() + job.transferred_size += item.size -def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange]) -> Tuple[List[UploadQueueItem], int]: +def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange]) -> List[UploadQueueItem]: """ Create a list of UploadQueueItem objects from the changes dictionary and calculate total size of files. This is used to prepare the upload queue for the upload job. """ upload_queue_items = [] - total_size = 0 # prepare file chunks for upload for change in local_changes: file_checksum = change.checksum @@ -222,9 +218,7 @@ def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange] UploadQueueItem(mc, mp, file_location, size, file_checksum, chunk_id, chunk_index) ) - total_size += file_size - - return upload_queue_items, total_size + return upload_queue_items def create_upload_job( @@ -234,83 +228,56 @@ def create_upload_job( Prepare transaction and create an upload job for the project using the v1 API. """ project_path = mp.project_full_name() - local_version = mp.version() - - data = {"version": local_version, "changes": changes.get_server_request()} - try: - resp = mc.post( - f"/v1/project/push/{project_path}", - data, - {"Content-Type": "application/json"}, - ) - except ClientError as err: - mp.log.error("Error starting transaction: " + str(err)) - mp.log.info("--- push aborted") - raise - push_start_resp = json.load(resp) - - upload_changes = changes.get_upload_changes() - transaction_id = push_start_resp["transaction"] if upload_changes else None - job = UploadJob(local_version, changes, transaction_id, mp, mc, tmp_dir) - - if not upload_changes: - mp.log.info("not uploading any files") - job.server_resp = push_start_resp - push_project_finalize(job) - return # all done - no pending job - - mp.log.info(f"got transaction ID {transaction_id}") - - # prepare file chunks for upload - upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_changes) - job.add_items(upload_queue_items, total_size) - return job - - -def create_upload_job_v2_api( - mc, mp: MerginProject, changes: LocalChanges, tmp_dir: tempfile.TemporaryDirectory -) -> Optional[UploadJob]: - """ - Call checkonly endpoint to check compatibility of the project with the server - and prepare an upload job for the project using the v2 API. - """ project_id = mp.project_id() local_version = mp.version() + server_feaures = mc.server_features() - data = {"version": local_version, "changes": changes.get_server_request()} + data = {"version": local_version, "changes": changes.to_server_payload()} + push_start_resp = {} try: - mc.post( - f"/v2/projects/{project_id}/versions", - {**data, "check_only": True}, - {"Content-Type": "application/json"}, - ) + if server_feaures.get("v2_push_enabled"): + mc.post( + f"/v2/projects/{project_id}/versions", + {**data, "check_only": True}, + {"Content-Type": "application/json"}, + ) + else: + resp = mc.post( + f"/v1/project/push/{project_path}", + data, + {"Content-Type": "application/json"}, + ) + push_start_resp = json.load(resp) except ClientError as err: - # ignore 409 error codes, it means that the project is already in a transaction - # but we still want to continue with the upload - if err.server_code not in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists]: - mp.log.error("Error doing push check compatibility: " + str(err)) - mp.log.error("--- push aborted") + if err.server_code not in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists.value]: + mp.log.error("Error starting transaction: " + str(err)) + mp.log.info("--- push aborted") raise else: mp.log.info("Project version exists or another upload is running, continuing with upload.") upload_changes = changes.get_upload_changes() - job = UploadJob(local_version, changes, None, mp, mc, tmp_dir) - upload_queue_items, total_size = create_upload_chunks(mc, mp, upload_changes) - job.add_items(upload_queue_items, total_size) + transaction_id = push_start_resp.get("transaction") if upload_changes else None + job = UploadJob(local_version, changes, transaction_id, mp, mc, tmp_dir) + if transaction_id: + mp.log.info(f"got transaction ID {transaction_id}") + + # prepare file chunks for upload + upload_queue_items = create_upload_chunks(mc, mp, upload_changes) if not upload_queue_items: mp.log.info("not uploading any files") + if push_start_resp: + job.server_resp = push_start_resp push_project_finalize(job) - return None # all done - no pending job + return # all done - no pending job mp.log.info(f"Starting upload chunks for project {project_id}") - - # prepare file chunks for upload + job.add_items(upload_queue_items) return job -def push_project_async(mc, directory) -> Optional[UploadJob]: +def push_project_async(mc, directory, check_version=False) -> Optional[UploadJob]: """Starts push of a project and returns pending upload job""" mp = MerginProject(directory) @@ -342,7 +309,7 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: raise ClientError(f"You do not seem to have write access to the project (username '{username}')") # DISCUSSION: do we want to check if the project is up to date before pushing? For now, we removed this part - if local_version != server_version: + if check_version and local_version != server_version: mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") raise ClientError( "There is a new version of the project on the server. Please update your local copy." @@ -370,19 +337,12 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - server_feaure_flags = mc.server_features() - job = None local_changes = LocalChanges( added=[LocalChange(**change) for change in changes["added"]], updated=[LocalChange(**change) for change in changes["updated"]], removed=[LocalChange(**change) for change in changes["removed"]], ) - if server_feaure_flags.get("v2_push_enabled"): - # v2 push uses a different endpoint - job = create_upload_job_v2_api(mc, mp, local_changes, tmp_dir) - else: - # v1 push uses the old endpoint - job = create_upload_job(mc, mp, local_changes, tmp_dir) + job = create_upload_job(mc, mp, local_changes, tmp_dir) return job @@ -433,8 +393,8 @@ def push_project_finalize(job: UploadJob): raise future.exception() if job.transferred_size != job.total_size: - error_msg = "Transferred size ({}) and expected total size ({}) do not match!".format( - job.transferred_size, job.total_size + error_msg = ( + f"Transferred size ({job.transferred_size}) and expected total size ({job.total_size}) do not match!" ) job.mp.log.error("--- push finish failed! " + error_msg) raise ClientError("Upload error: " + error_msg) @@ -448,14 +408,14 @@ def push_project_finalize(job: UploadJob): f"/v2/projects/{job.mp.project_id()}/versions", data={ "version": job.version, - "changes": job.changes.get_server_request(), + "changes": job.changes.to_server_payload(), }, headers={"Content-Type": "application/json"}, ) project_info = job.mc.project_info(job.mp.project_id()) job.server_resp = project_info except ClientError as err: - if err.server_code in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists]: + if err.server_code in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists.value]: err.sync_retry = True else: job.mc.upload_chunks_cache.clear() # clear the upload chunks cache, as we are getting fatal from server diff --git a/mergin/common.py b/mergin/common.py index fc9c3565..8ba32584 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -9,6 +9,15 @@ # size of the log file part to send (if file is larger only this size from end will be sent) MAX_LOG_FILE_SIZE_TO_SEND = 5 * 1024 * 1024 +# number of attempts to push changes (in case of network issues etc) +PUSH_ATTEMPTS = 12 + +# seconds to wait between attempts +PUSH_ATTEMPT_WAIT = 5 + + # seconds to wait between sync callback calls +SYNC_CALLBACK_WAIT = 0.01 + # default URL for submitting logs MERGIN_DEFAULT_LOGS_URL = "https://g4pfq226j0.execute-api.eu-west-1.amazonaws.com/mergin_client_log_submit" diff --git a/mergin/local_changes.py b/mergin/local_changes.py index ccb0a08d..511960cd 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -2,6 +2,7 @@ from datetime import datetime from typing import Dict, Optional, List, Tuple + @dataclass class BaseLocalChange: path: str @@ -9,11 +10,12 @@ class BaseLocalChange: size: int mtime: datetime + @dataclass class LocalChange(BaseLocalChange): origin_checksum: Optional[str] = None chunks: List[str] = field(default_factory=list) - diff: Optional[dict] = None + diff: Optional[dict] = None upload_file: Optional[str] = None # some functions (MerginProject.compare_file_sets) are adding version to the change from project info version: Optional[str] = None @@ -28,11 +30,10 @@ def get_diff(self) -> Optional[BaseLocalChange]: path=self.diff.get("path", ""), checksum=self.diff.get("checksum", ""), size=self.diff.get("size", 0), - mtime=self.diff.get("mtime", datetime.now()) + mtime=self.diff.get("mtime", datetime.now()), ) - return None - def get_server_data(self) -> dict: + def to_server_data(self) -> dict: result = { "path": self.path, "checksum": self.checksum, @@ -43,30 +44,31 @@ def get_server_data(self) -> dict: result["diff"] = { "path": self.diff.get("path", ""), "checksum": self.diff.get("checksum", ""), - "size": self.diff.get("size", 0) + "size": self.diff.get("size", 0), } return result + @dataclass class LocalChanges: added: List[LocalChange] = field(default_factory=list) updated: List[LocalChange] = field(default_factory=list) removed: List[LocalChange] = field(default_factory=list) - def get_server_request(self) -> dict: + def to_server_payload(self) -> dict: return { - "added": [change.get_server_data() for change in self.added], - "updated": [change.get_server_data() for change in self.updated], - "removed": [change.get_server_data() for change in self.removed], + "added": [change.to_server_data() for change in self.added], + "updated": [change.to_server_data() for change in self.updated], + "removed": [change.to_server_data() for change in self.removed], } - + def get_upload_changes(self) -> List[LocalChange]: """ Get all changes that need to be uploaded. This includes added and updated files. """ return self.added + self.updated - + def _map_unique_chunks(self, change_chunks: List[str], server_chunks: List[Tuple[str, str]]) -> List[str]: """ Helper function to map and deduplicate chunk ids for a single change. @@ -93,4 +95,4 @@ def update_chunks(self, server_chunks: List[Tuple[str, str]]) -> None: change.chunks = self._map_unique_chunks(change.chunks, server_chunks) for change in self.updated: - change.chunks = self._map_unique_chunks(change.chunks, server_chunks) \ No newline at end of file + change.chunks = self._map_unique_chunks(change.chunks, server_chunks) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index d1d4831a..cdb79e20 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -271,15 +271,14 @@ def test_create_remote_project_from_local(mc): with pytest.raises(Exception, match="Project directory already exists"): mc.download_project(project, download_dir) + def test_new_project_sync_v1_api(mc): """ Create a new project, download it, add a file and then do sync - it should not fail This is using v1 endpoint. """ server_features = mc.server_features() - mc._server_features = { - "v2_push_enabled": False - } + mc._server_features = {"v2_push_enabled": False} test_project = "test_new_project_sync_v1" project = API_USER + "/" + test_project project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates @@ -304,6 +303,7 @@ def test_new_project_sync_v1_api(mc): assert not local_changes["added"] and not local_changes["removed"] and not local_changes["updated"] mc._server_features = server_features + def test_new_project_sync_v2_api(mc): """ Create a new project, download it, add a file and then do sync - it should not fail @@ -332,6 +332,7 @@ def test_new_project_sync_v2_api(mc): local_changes = mp.get_push_changes() assert not local_changes["added"] and not local_changes["removed"] and not local_changes["updated"] + def test_push_pull_changes(mc): test_project = "test_push" project = API_USER + "/" + test_project @@ -427,6 +428,7 @@ def test_push_pull_changes(mc): ) assert generate_checksum(os.path.join(project_dir_2, f_updated)) == f_remote_checksum + def test_sync_remove(mc): """ Integration test for sync process, where just files are removed. @@ -3043,9 +3045,10 @@ def test_validate_auth(mc: MerginClient): with pytest.raises(LoginError): mc_auth_token_login_wrong_password.validate_auth() + def test_uploaded_chunks_cache(mc): """Create a new project, download it, add a file and then do sync - it should not fail""" - + test_project = "test_uploaded_chunks_cache" project = API_USER + "/" + test_project project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates @@ -3079,6 +3082,7 @@ def test_uploaded_chunks_cache(mc): assert job.upload_queue_items[1].server_chunk_id == chunk_id assert mc.upload_chunks_cache.cache == {} + def test_client_push_project_async(mc): """ Integration tests for low level client_push functions @@ -3106,6 +3110,7 @@ def test_client_push_project_async(mc): push_project_finalize(job) assert not os.path.exists(job.tmp_dir.name) + def test_client_pull_project_async(mc): """ Integration tests for low level client_pull functions @@ -3132,6 +3137,7 @@ def test_client_pull_project_async(mc): pull_project_finalize(job) assert not os.path.exists(job.temp_dir) + def test_client_project_sync(mc): test_project = "test_client_project_sync" project = API_USER + "/" + test_project @@ -3150,4 +3156,4 @@ def test_client_project_sync(mc): os.path.join(TEST_DATA_DIR, "inserted_1_A.gpkg"), os.path.join(project_dir_2, "base.gpkg"), ) - mc.sync_project(project_dir_2) \ No newline at end of file + mc.sync_project(project_dir_2) diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py index c6b31654..6fe18388 100644 --- a/mergin/test/test_local_changes.py +++ b/mergin/test/test_local_changes.py @@ -2,35 +2,32 @@ from ..local_changes import LocalChange, LocalChanges + def test_local_changes_from_dict(): """Test generating LocalChanges from a dictionary.""" changes_dict = { - "added": [ - {"path": "file1.txt", "checksum": "abc123", "size": 1024, "mtime": datetime.now()} - ], - "updated": [ - {"path": "file2.txt", "checksum": "xyz789", "size": 2048, "mtime": datetime.now()} - ], + "added": [{"path": "file1.txt", "checksum": "abc123", "size": 1024, "mtime": datetime.now()}], + "updated": [{"path": "file2.txt", "checksum": "xyz789", "size": 2048, "mtime": datetime.now()}], "removed": [ { "checksum": "a4e7e35d1e8c203b5d342ecd9676adbebc924c84", "diff": None, "history": { "v1": { - "change": "added", - "checksum": "a4e7e35d1e8c203b5d342ecd9676adbebc924c84", - "expiration": None, - "path": "base.gpkg", - "size": 98304 + "change": "added", + "checksum": "a4e7e35d1e8c203b5d342ecd9676adbebc924c84", + "expiration": None, + "path": "base.gpkg", + "size": 98304, } }, "location": "v1/base.gpkg", "mtime": "2025-08-15T09:04:21.690590Z", "path": "base.gpkg", "size": 98304, - "version": "v1" + "version": "v1", } - ] + ], } # Convert dictionary to LocalChanges @@ -59,7 +56,7 @@ def test_local_change_get_diff(): checksum="abc123", size=1024, mtime=datetime.now(), - diff={"path": "file1-diff", "checksum": "diff123", "size": 512, "mtime": datetime.now()} + diff={"path": "file1-diff", "checksum": "diff123", "size": 512, "mtime": datetime.now()}, ) diff = local_change.get_diff() @@ -69,20 +66,14 @@ def test_local_change_get_diff(): assert diff.size == 512 -def test_local_changes_get_server_request(): - """Test the get_server_request method of LocalChanges.""" - added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) - ] - updated = [ - LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) - ] - removed = [ - LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now()) - ] +def test_local_changes_to_server_payload(): + """Test the to_server_payload method of LocalChanges.""" + added = [LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] + updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] + removed = [LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] local_changes = LocalChanges(added=added, updated=updated, removed=removed) - server_request = local_changes.get_server_request() + server_request = local_changes.to_server_payload() assert "added" in server_request assert "updated" in server_request @@ -91,15 +82,14 @@ def test_local_changes_get_server_request(): assert server_request["updated"][0]["path"] == "file2.txt" assert server_request["removed"][0]["path"] == "file3.txt" + def test_local_changes_update_chunks(): """Test the update_chunks method of LocalChanges.""" added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(),chunks=["abc123"]), - LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now(),chunks=["abc123"]) - ] - updated = [ - LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"]) + LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), + LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), ] + updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"])] local_changes = LocalChanges(added=added, updated=updated) chunks = [("abc123", "chunk1"), ("abc123", "chunk1"), ("xyz789", "chunk2")] @@ -110,18 +100,13 @@ def test_local_changes_update_chunks(): assert local_changes.added[1].chunks == ["chunk1"] assert local_changes.updated[0].chunks == ["chunk2"] + def test_local_changes_get_upload_changes(): """Test the get_upload_changes method of LocalChanges.""" # Create sample LocalChange instances - added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now()) - ] - updated = [ - LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now()) - ] - removed = [ - LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now()) - ] + added = [LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] + updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] + removed = [LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] # Initialize LocalChanges with added, updated, and removed changes local_changes = LocalChanges(added=added, updated=updated, removed=removed) @@ -132,4 +117,4 @@ def test_local_changes_get_upload_changes(): # Assertions assert len(upload_changes) == 2 # Only added and updated should be included assert upload_changes[0].path == "file1.txt" # First change is from added - assert upload_changes[1].path == "file2.txt" # Second change is from updated \ No newline at end of file + assert upload_changes[1].path == "file2.txt" # Second change is from updated diff --git a/mergin/utils.py b/mergin/utils.py index 7b6c9f9f..c7ddf55c 100644 --- a/mergin/utils.py +++ b/mergin/utils.py @@ -7,6 +7,7 @@ from datetime import datetime from pathlib import Path from .common import ClientError +from typing import ByteString def generate_checksum(file, chunk_size=4096): @@ -294,3 +295,14 @@ def bytes_to_human_size(bytes: int): return f"{round( bytes / 1024.0 / 1024.0 / 1024.0, precision )} GB" else: return f"{round( bytes / 1024.0 / 1024.0 / 1024.0 / 1024.0, precision )} TB" + +def get_data_checksum(data: ByteString) -> str: + """ + Generate sha1 checksum for given data. + + :param data: data to calculate checksum + :return: sha1 checksum + """ + checksum = hashlib.sha1() + checksum.update(data) + return checksum.hexdigest() From df88376447952dc1942ead92e2278d6223b0fb73 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 3 Sep 2025 15:31:46 +0200 Subject: [PATCH 10/24] fix uploading 0 size files --- mergin/client_push.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index d9fe1e78..7f0c563f 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -259,18 +259,19 @@ def create_upload_job( upload_changes = changes.get_upload_changes() transaction_id = push_start_resp.get("transaction") if upload_changes else None job = UploadJob(local_version, changes, transaction_id, mp, mc, tmp_dir) + if not upload_changes: + mp.log.info("not uploading any files") + if push_start_resp: + job.server_resp = push_start_resp + push_project_finalize(job) + return # all done - no pending job + if transaction_id: mp.log.info(f"got transaction ID {transaction_id}") # prepare file chunks for upload upload_queue_items = create_upload_chunks(mc, mp, upload_changes) - if not upload_queue_items: - mp.log.info("not uploading any files") - if push_start_resp: - job.server_resp = push_start_resp - push_project_finalize(job) - return # all done - no pending job mp.log.info(f"Starting upload chunks for project {project_id}") job.add_items(upload_queue_items) From 28aa757dfd41fb850e780cbd47aeef22ad9e4422 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 3 Sep 2025 16:04:24 +0200 Subject: [PATCH 11/24] handle project info directly from v2 projects versions --- mergin/client_push.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 7f0c563f..37089364 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -265,7 +265,6 @@ def create_upload_job( job.server_resp = push_start_resp push_project_finalize(job) return # all done - no pending job - if transaction_id: mp.log.info(f"got transaction ID {transaction_id}") @@ -405,7 +404,7 @@ def push_project_finalize(job: UploadJob): try: job.update_chunks_from_items() job.mp.log.info(f"Finishing transaction for project {job.mp.project_full_name()}") - job.mc.post( + resp = job.mc.post( f"/v2/projects/{job.mp.project_id()}/versions", data={ "version": job.version, @@ -413,7 +412,7 @@ def push_project_finalize(job: UploadJob): }, headers={"Content-Type": "application/json"}, ) - project_info = job.mc.project_info(job.mp.project_id()) + project_info = json.load(resp) job.server_resp = project_info except ClientError as err: if err.server_code in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists.value]: From efd7ed685df8fe6646507e7d14572d1a82f597b0 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 3 Sep 2025 20:04:30 +0200 Subject: [PATCH 12/24] Add unit tests and tunning of retry mechanism --- mergin/client.py | 7 +++--- mergin/client_push.py | 18 +++++++-------- mergin/common.py | 45 ++++++++++++++++++++++++++++++++----- mergin/test/test_client.py | 26 +++++++++++++++++++++ mergin/test/test_common.py | 46 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 20 deletions(-) create mode 100644 mergin/test/test_common.py diff --git a/mergin/client.py b/mergin/client.py index a76db253..377d8680 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -25,7 +25,6 @@ PUSH_ATTEMPTS, SYNC_CALLBACK_WAIT, ClientError, - ErrorCode, LoginError, WorkspaceRole, ProjectRole, @@ -807,7 +806,7 @@ def download_project(self, project_path, directory, version=None): def user_info(self): server_type = self.server_type() if server_type == ServerType.OLD: - resp = self.get("/v1/user/" + self.username()) + resp = self.get(f"/v1/user/{self.username()}") else: resp = self.get("/v1/user/profile") return json.load(resp) @@ -1526,11 +1525,11 @@ def sync_project(self, project_directory, progress_callback=None): sleep(SYNC_CALLBACK_WAIT) current_size = job.transferred_size progress_callback(current_size - last_size, job) # call callback with transferred size increment - last = current_size + last_size = current_size push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp, job.server_resp) except ClientError as e: - if e.sync_retry and server_conflict_attempts <= PUSH_ATTEMPTS: + if e.is_retryable_sync() and server_conflict_attempts < PUSH_ATTEMPTS - 1: # retry on conflict, e.g. when server has changes that we do not have yet mp.log.info( f"Restarting sync process (conflict on server) - {server_conflict_attempts + 1}/{PUSH_ATTEMPTS}" diff --git a/mergin/client_push.py b/mergin/client_push.py index 37089364..1fe4e2d7 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -24,7 +24,7 @@ from .local_changes import LocalChange, LocalChanges -from .common import UPLOAD_CHUNK_SIZE, ClientError, ErrorCode +from .common import UPLOAD_CHUNK_ATTEMPT_WAIT, UPLOAD_CHUNK_ATTEMPTS, UPLOAD_CHUNK_SIZE, ClientError, ErrorCode from .merginproject import MerginProject from .editor import filter_changes from .utils import get_data_checksum @@ -111,8 +111,7 @@ def upload_blocking(self): checksum_str = get_data_checksum(data) self.mp.log.debug(f"Uploading {self.file_path} part={self.chunk_index}") - attempts = 2 - for attempt in range(attempts): + for attempt in range(UPLOAD_CHUNK_ATTEMPTS): try: if self.mc.server_features().get("v2_push_enabled"): # use v2 API for uploading chunks @@ -122,8 +121,8 @@ def upload_blocking(self): self.upload_chunk(data, checksum_str) break # exit loop if upload was successful except ClientError as e: - if attempt < attempts - 1: - time.sleep(5) + if attempt < UPLOAD_CHUNK_ATTEMPTS - 1: + time.sleep(UPLOAD_CHUNK_ATTEMPT_WAIT) continue raise @@ -249,7 +248,7 @@ def create_upload_job( ) push_start_resp = json.load(resp) except ClientError as err: - if err.server_code not in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists.value]: + if not err.is_blocking_sync(): mp.log.error("Error starting transaction: " + str(err)) mp.log.info("--- push aborted") raise @@ -415,10 +414,9 @@ def push_project_finalize(job: UploadJob): project_info = json.load(resp) job.server_resp = project_info except ClientError as err: - if err.server_code in [ErrorCode.AnotherUploadRunning.value, ErrorCode.ProjectVersionExists.value]: - err.sync_retry = True - else: - job.mc.upload_chunks_cache.clear() # clear the upload chunks cache, as we are getting fatal from server + if not err.is_retryable_sync(): + # clear the upload chunks cache, as we are getting fatal from server + job.mc.upload_chunks_cache.clear() job.mp.log.error("--- push finish failed! " + str(err)) raise err elif with_upload_of_files: diff --git a/mergin/common.py b/mergin/common.py index 8ba32584..4705bad8 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -6,16 +6,22 @@ # there is an upper limit for chunk size on server, ideally should be requested from there once implemented UPLOAD_CHUNK_SIZE = 10 * 1024 * 1024 +# number of attempts to upload a chunk +UPLOAD_CHUNK_ATTEMPTS = 2 + +# seconds to wait between attempts of uploading a chunk +UPLOAD_CHUNK_ATTEMPT_WAIT = 5 + # size of the log file part to send (if file is larger only this size from end will be sent) MAX_LOG_FILE_SIZE_TO_SEND = 5 * 1024 * 1024 # number of attempts to push changes (in case of network issues etc) -PUSH_ATTEMPTS = 12 +PUSH_ATTEMPTS = 10 -# seconds to wait between attempts +# seconds to wait between attempts to push changes PUSH_ATTEMPT_WAIT = 5 - # seconds to wait between sync callback calls +# seconds to wait between sync callback calls SYNC_CALLBACK_WAIT = 0.01 # default URL for submitting logs @@ -33,7 +39,7 @@ class ErrorCode(Enum): class ClientError(Exception): - def __init__(self, detail, url=None, server_code=None, server_response=None, http_error=None, http_method=None): + def __init__(self, detail: str, url=None, server_code=None, server_response=None, http_error=None, http_method=None): self.detail = detail self.url = url self.http_error = http_error @@ -43,8 +49,6 @@ def __init__(self, detail, url=None, server_code=None, server_response=None, htt self.server_response = server_response self.extra = None - # Param to mark error as candidate for retry sync process - self.sync_retry = False def __str__(self): string_res = f"Detail: {self.detail}\n" @@ -60,6 +64,35 @@ def __str__(self): string_res += f"{self.extra}\n" return string_res + def is_rate_limit(self) -> bool: + """Check if error is rate limit error based on server code""" + return self.http_error == 429 + + def is_blocking_sync(self) -> bool: + """ + Check if error is blocking sync based on server code. + Blocking sync means, that the sync is blocked by another user in server. + """ + return self.server_code in [ + ErrorCode.AnotherUploadRunning.value, + ErrorCode.ProjectVersionExists.value, + ] + + def is_retryable_sync(self) -> bool: + # Check if error is retryable based on server code + if self.is_blocking_sync() or self.is_rate_limit(): + return True + + if ( + self.http_error + and self.detail + and self.http_error == 400 + and ("Another process" in self.detail or "Version mismatch" in self.detail) + ): + return True + + return False + class LoginError(Exception): pass diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index cdb79e20..7aedbbba 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -11,6 +11,7 @@ import pytz import sqlite3 import glob +from unittest.mock import patch, Mock from .. import InvalidProject from ..client import ( @@ -3157,3 +3158,28 @@ def test_client_project_sync(mc): os.path.join(project_dir_2, "base.gpkg"), ) mc.sync_project(project_dir_2) + + +def test_client_project_sync_retry(mc): + test_project = "test_client_project_sync_retry" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) + cleanup(mc, project, [project_dir]) + mc.create_project(test_project) + mc.download_project(project, project_dir) + shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir) + with patch("mergin.client.push_project_finalize", autospec=True) as mock_push_project_finalize: + mock_push_project_finalize.side_effect = ClientError("test error") + with pytest.raises(ClientError, match="test error"): + mc.sync_project(project_dir) + + with patch("mergin.client.push_project_finalize") as mock_push_project_finalize, patch( + "mergin.client.PUSH_ATTEMPTS", 2 + ): + mock_push_project_finalize.side_effect = ClientError( + detail="", + server_code=ErrorCode.AnotherUploadRunning.value, + ) + with pytest.raises(ClientError): + mc.sync_project(project_dir) + assert mock_push_project_finalize.call_count == 2 diff --git a/mergin/test/test_common.py b/mergin/test/test_common.py new file mode 100644 index 00000000..d86229e1 --- /dev/null +++ b/mergin/test/test_common.py @@ -0,0 +1,46 @@ +from ..common import ClientError, ErrorCode + +def test_client_error_is_blocked_sync(): + """Test the is_blocked_sync method of ClientError.""" + error = ClientError(detail="", server_code=None) + assert error.is_blocking_sync() is False + error.server_code = ErrorCode.ProjectsLimitHit.value + assert error.is_blocking_sync() is False + + error.server_code = ErrorCode.AnotherUploadRunning.value + assert error.is_blocking_sync() is True + error.server_code = ErrorCode.ProjectVersionExists.value + assert error.is_blocking_sync() is True + +def test_client_error_is_rate_limit(): + """Test the is_rate_limit method of ClientError.""" + error = ClientError(detail="", http_error=None) + assert error.is_rate_limit() is False + error.http_error = 500 + assert error.is_rate_limit() is False + error.http_error = 429 + assert error.is_rate_limit() is True + +def test_client_error_is_retryable_sync(): + """Test the is_retryable_sync method of ClientError.""" + error = ClientError(detail="", server_code=None, http_error=None) + assert error.is_retryable_sync() is False + + error.server_code = ErrorCode.ProjectsLimitHit.value + assert error.is_retryable_sync() is False + error.server_code = ErrorCode.AnotherUploadRunning.value + assert error.is_retryable_sync() is True + + error.server_code = None + error.http_error = 400 + error.detail = "Some other error" + assert error.is_retryable_sync() is False + error.detail = "Another process" + assert error.is_retryable_sync() is True + error.detail = "Version mismatch" + assert error.is_retryable_sync() is True + + error.http_error = 500 + assert error.is_retryable_sync() is False + error.http_error = 429 + assert error.is_retryable_sync() is True \ No newline at end of file From 29292daf2dd84b9a68cd63b721ad1fe26ad57887 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 5 Sep 2025 11:30:33 +0200 Subject: [PATCH 13/24] Update test calling count measurement - comment @varmar05 --- mergin/client.py | 2 ++ mergin/common.py | 1 + mergin/test/test_client.py | 1 + 3 files changed, 4 insertions(+) diff --git a/mergin/client.py b/mergin/client.py index 377d8680..34db3220 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1538,3 +1538,5 @@ def sync_project(self, project_directory, progress_callback=None): sleep(PUSH_ATTEMPT_WAIT) continue raise e + else: + server_conflict_attempts = 0 diff --git a/mergin/common.py b/mergin/common.py index 4705bad8..25b58c48 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -83,6 +83,7 @@ def is_retryable_sync(self) -> bool: if self.is_blocking_sync() or self.is_rate_limit(): return True + # Check retryable based on http error and message from v1 sync API endpoint if ( self.http_error and self.detail diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 7aedbbba..7c33c01e 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -3172,6 +3172,7 @@ def test_client_project_sync_retry(mc): mock_push_project_finalize.side_effect = ClientError("test error") with pytest.raises(ClientError, match="test error"): mc.sync_project(project_dir) + assert mock_push_project_finalize.call_count == 1 with patch("mergin.client.push_project_finalize") as mock_push_project_finalize, patch( "mergin.client.PUSH_ATTEMPTS", 2 From c6dafca49eddf2ab5530df28b67c7e20aa2cce30 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Fri, 5 Sep 2025 13:51:23 +0200 Subject: [PATCH 14/24] add test also for v1 - +v1 is raising error just from push start (push_project_async), because there are not chunks caches --- mergin/test/test_client.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 7c33c01e..0b9c8388 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -3174,13 +3174,25 @@ def test_client_project_sync_retry(mc): mc.sync_project(project_dir) assert mock_push_project_finalize.call_count == 1 - with patch("mergin.client.push_project_finalize") as mock_push_project_finalize, patch( + with patch("mergin.client.push_project_async") as mock_push_project_async, patch( "mergin.client.PUSH_ATTEMPTS", 2 ): - mock_push_project_finalize.side_effect = ClientError( + mock_push_project_async.side_effect = ClientError( detail="", server_code=ErrorCode.AnotherUploadRunning.value, ) with pytest.raises(ClientError): mc.sync_project(project_dir) - assert mock_push_project_finalize.call_count == 2 + assert mock_push_project_async.call_count == 2 + + # for v1 endpoints we are rising retry just from push start + with patch("mergin.client.push_project_async") as mock_push_project_async, patch( + "mergin.client.PUSH_ATTEMPTS", 2 + ): + mock_push_project_async.side_effect = ClientError( + detail="Another process is running. Please try later.", + http_error=400, + ) + with pytest.raises(ClientError): + mc.sync_project(project_dir) + assert mock_push_project_async.call_count == 2 From e54bde8fde64dd57b0c7f7806d8dcef21e35ea89 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 9 Sep 2025 16:49:17 +0200 Subject: [PATCH 15/24] Drop versions and permissions check --- mergin/client.py | 2 +- mergin/client_push.py | 35 ++++++----------------------------- mergin/editor.py | 7 +++---- mergin/merginproject.py | 4 ++++ mergin/test/test_client.py | 8 +++----- 5 files changed, 17 insertions(+), 39 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 34db3220..ba6b7589 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -918,7 +918,7 @@ def push_project(self, directory): :param directory: Project's directory :type directory: String """ - job = push_project_async(self, directory, check_version=True) + job = push_project_async(self, directory) if job is None: return # there is nothing to push (or we only deleted some files) push_project_wait(job) diff --git a/mergin/client_push.py b/mergin/client_push.py index 1fe4e2d7..a5aaabd3 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -276,7 +276,7 @@ def create_upload_job( return job -def push_project_async(mc, directory, check_version=False) -> Optional[UploadJob]: +def push_project_async(mc, directory) -> Optional[UploadJob]: """Starts push of a project and returns pending upload job""" mp = MerginProject(directory) @@ -289,34 +289,10 @@ def push_project_async(mc, directory, check_version=False) -> Optional[UploadJob mp.log.info("--- version: " + mc.user_agent_info()) mp.log.info(f"--- start push {project_path}") - try: - project_info = mc.project_info(project_path) - except ClientError as err: - mp.log.error("Error getting project info: " + str(err)) - mp.log.info("--- push aborted") - raise - server_version = project_info["version"] if project_info["version"] else "v0" - - mp.log.info(f"got project info: local version {local_version} / server version {server_version}") - - username = mc.username() - # permissions field contains information about update, delete and upload privileges of the user - # on a specific project. This is more accurate information then "writernames" field, as it takes - # into account namespace privileges. So we have to check only "permissions", namely "upload" one - if not mc.has_writing_permissions(project_path): - mp.log.error(f"--- push {project_path} - username {username} does not have write access") - raise ClientError(f"You do not seem to have write access to the project (username '{username}')") - - # DISCUSSION: do we want to check if the project is up to date before pushing? For now, we removed this part - if check_version and local_version != server_version: - mp.log.error(f"--- push {project_path} - not up to date (local {local_version} vs server {server_version})") - raise ClientError( - "There is a new version of the project on the server. Please update your local copy." - + f"\n\nLocal version: {local_version}\nServer version: {server_version}" - ) + mp.log.info(f"got project info: local version {local_version}") tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") - changes, changes_len = get_push_changes_batch(mc, mp, project_info) + changes, changes_len = get_push_changes_batch(mc, mp) if not changes_len: mp.log.info(f"--- push {project_path} - nothing to do") return @@ -489,11 +465,12 @@ def remove_diff_files(job: UploadJob) -> None: os.remove(diff_file) -def get_push_changes_batch(mc, mp: MerginProject, project_info: dict) -> Tuple[dict, int]: +def get_push_changes_batch(mc, mp: MerginProject) -> Tuple[dict, int]: """ Get changes that need to be pushed to the server. """ changes = mp.get_push_changes() - changes = filter_changes(mc, project_info, changes) + project_role = mp.project_role() + changes = filter_changes(mc, project_role, changes) return changes, sum(len(v) for v in changes.values()) diff --git a/mergin/editor.py b/mergin/editor.py index 237b0ea1..df03f58b 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -14,12 +14,11 @@ _disallowed_changes: Callable[[dict], bool] = lambda change: is_qgis_file(change["path"]) -def is_editor_enabled(mc, project_info: dict) -> bool: +def is_editor_enabled(mc, project_role: str) -> bool: """ The function checks if the server supports editor access, and if the current user's project role matches the expected role name for editors. """ server_support = mc.has_editor_support() - project_role = project_info.get("role") return server_support and project_role == EDITOR_ROLE_NAME @@ -40,7 +39,7 @@ def _apply_editor_filters(changes: Dict[str, List[dict]]) -> Dict[str, List[dict return changes -def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: +def filter_changes(mc, project_role: str, changes: Dict[str, List[dict]]) -> Dict[str, List[dict]]: """ Filters the given changes dictionary based on the editor's enabled state. @@ -52,7 +51,7 @@ def filter_changes(mc, project_info: dict, changes: Dict[str, List[dict]]) -> Di Returns: dict[str, list[dict]]: The filtered changes dictionary. """ - if not is_editor_enabled(mc, project_info): + if not is_editor_enabled(mc, project_role): return changes return _apply_editor_filters(changes) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index dba70d45..f630a593 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -155,6 +155,10 @@ def workspace_name(self) -> str: full_name = self.project_full_name() slash_index = full_name.index("/") return full_name[:slash_index] + + def project_role(self) -> str: + self._read_metadata() + return self._metadata.get("role") def project_id(self) -> str: """Returns ID of the project (UUID using 8-4-4-4-12 formatting without braces) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 0b9c8388..6bfd615c 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -2639,14 +2639,12 @@ def test_download_failure(mc): def test_editor(mc: MerginClient): """Test editor handler class and push with editor""" - project_info = {"role": "editor"} if not mc.has_editor_support(): - assert is_editor_enabled(mc, project_info) is False + assert is_editor_enabled(mc, EDITOR_ROLE_NAME) is False return # mock that user is editor - project_info["role"] = EDITOR_ROLE_NAME - assert is_editor_enabled(mc, project_info) is True + assert is_editor_enabled(mc, EDITOR_ROLE_NAME) is True # unit test for editor methods qgs_changeset = { @@ -2654,7 +2652,7 @@ def test_editor(mc: MerginClient): "updated": [{"path": "/folder/project.updated.Qgs"}], "removed": [{"path": "/folder/project.removed.qgs"}], } - qgs_changeset = filter_changes(mc, project_info, qgs_changeset) + qgs_changeset = filter_changes(mc, EDITOR_ROLE_NAME, qgs_changeset) assert sum(len(v) for v in qgs_changeset.values()) == 2 From 57e854b77347a927cc0f4a6f1b48c3daa3b5aee9 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 9 Sep 2025 17:11:49 +0200 Subject: [PATCH 16/24] get rid of project info from editor --- mergin/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index ba6b7589..c483026b 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1524,10 +1524,12 @@ def sync_project(self, project_directory, progress_callback=None): while push_project_is_running(job): sleep(SYNC_CALLBACK_WAIT) current_size = job.transferred_size - progress_callback(current_size - last_size, job) # call callback with transferred size increment + progress_callback( + current_size - last_size, job + ) # call callback with transferred size increment last_size = current_size push_project_finalize(job) - _, has_changes = get_push_changes_batch(self, mp, job.server_resp) + _, has_changes = get_push_changes_batch(self, mp) except ClientError as e: if e.is_retryable_sync() and server_conflict_attempts < PUSH_ATTEMPTS - 1: # retry on conflict, e.g. when server has changes that we do not have yet From 747ea46daabe2a8a421927bd48164c5459ff1a34 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Tue, 9 Sep 2025 17:38:41 +0200 Subject: [PATCH 17/24] black prevent conflcted copy fix --- mergin/cli.py | 2 ++ mergin/editor.py | 4 ++-- mergin/merginproject.py | 5 +++-- mergin/test/test_client.py | 10 +++------- mergin/utils.py | 3 ++- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index d55d4dab..397ecedb 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -468,6 +468,7 @@ def pull(ctx): except Exception as e: _print_unhandled_exception() + @cli.command() @click.pass_context def sync(ctx): @@ -478,6 +479,7 @@ def sync(ctx): directory = os.getcwd() upload_job = None try: + def on_progress(increment, push_job): nonlocal upload_job upload_job = push_job diff --git a/mergin/editor.py b/mergin/editor.py index df03f58b..b1dac863 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -56,7 +56,7 @@ def filter_changes(mc, project_role: str, changes: Dict[str, List[dict]]) -> Dic return _apply_editor_filters(changes) -def prevent_conflicted_copy(path: str, mc, project_info: dict) -> bool: +def prevent_conflicted_copy(path: str, mc, project_role: str) -> bool: """ Decides whether a file path should be blocked from creating a conflicted copy. Note: This is used when the editor is active and attempting to modify files (e.g., .ggs) that are also updated on the server, preventing unnecessary conflict files creation. @@ -69,4 +69,4 @@ def prevent_conflicted_copy(path: str, mc, project_info: dict) -> bool: Returns: bool: True if the file path should be prevented from ceating conflicted copy, False otherwise. """ - return is_editor_enabled(mc, project_info) and any([is_qgis_file(path)]) + return is_editor_enabled(mc, project_role) and any([is_qgis_file(path)]) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index f630a593..44b36520 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -34,6 +34,7 @@ except (ImportError, ModuleNotFoundError): import pygeodiff + class MerginProject: """Base class for Mergin Maps local projects. @@ -155,7 +156,7 @@ def workspace_name(self) -> str: full_name = self.project_full_name() slash_index = full_name.index("/") return full_name[:slash_index] - + def project_role(self) -> str: self._read_metadata() return self._metadata.get("role") @@ -555,7 +556,7 @@ def apply_pull_changes(self, changes, temp_dir, server_project, mc): if ( path in modified_local_paths and item["checksum"] != local_files_map[path]["checksum"] - and not prevent_conflicted_copy(path, mc, server_project) + and not prevent_conflicted_copy(path, mc, server_project.get("role")) ): conflict = self.create_conflicted_copy(path, mc.username()) conflicts.append(conflict) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 6bfd615c..01234c87 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -406,7 +406,7 @@ def test_push_pull_changes(mc): f_conflict_checksum = generate_checksum(os.path.join(project_dir_2, f_updated)) # not at latest server version - with pytest.raises(ClientError, match="Please update your local copy"): + with pytest.raises(ClientError, match="Version mismatch"): mc.push_project(project_dir_2) # check changes in project_dir_2 before applied @@ -3172,9 +3172,7 @@ def test_client_project_sync_retry(mc): mc.sync_project(project_dir) assert mock_push_project_finalize.call_count == 1 - with patch("mergin.client.push_project_async") as mock_push_project_async, patch( - "mergin.client.PUSH_ATTEMPTS", 2 - ): + with patch("mergin.client.push_project_async") as mock_push_project_async, patch("mergin.client.PUSH_ATTEMPTS", 2): mock_push_project_async.side_effect = ClientError( detail="", server_code=ErrorCode.AnotherUploadRunning.value, @@ -3184,9 +3182,7 @@ def test_client_project_sync_retry(mc): assert mock_push_project_async.call_count == 2 # for v1 endpoints we are rising retry just from push start - with patch("mergin.client.push_project_async") as mock_push_project_async, patch( - "mergin.client.PUSH_ATTEMPTS", 2 - ): + with patch("mergin.client.push_project_async") as mock_push_project_async, patch("mergin.client.PUSH_ATTEMPTS", 2): mock_push_project_async.side_effect = ClientError( detail="Another process is running. Please try later.", http_error=400, diff --git a/mergin/utils.py b/mergin/utils.py index c7ddf55c..e3625a1f 100644 --- a/mergin/utils.py +++ b/mergin/utils.py @@ -295,7 +295,8 @@ def bytes_to_human_size(bytes: int): return f"{round( bytes / 1024.0 / 1024.0 / 1024.0, precision )} GB" else: return f"{round( bytes / 1024.0 / 1024.0 / 1024.0 / 1024.0, precision )} TB" - + + def get_data_checksum(data: ByteString) -> str: """ Generate sha1 checksum for given data. From b6cfa3e08ad1966043fc9d7bf081e28079a49b16 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 10 Sep 2025 19:11:28 +0200 Subject: [PATCH 18/24] temp_dir fix --- mergin/client_pull.py | 4 ++-- mergin/client_push.py | 2 +- mergin/test/test_client.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index a1528d77..13323df8 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -145,7 +145,7 @@ def download_project_async(mc, project_path, directory, project_version=None): mp.log.info("--- version: " + mc.user_agent_info()) mp.log.info(f"--- start download {project_path}") - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-", ignore_cleanup_errors=True, delete=True) + tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") try: # check whether we download the latest version or not @@ -424,7 +424,7 @@ def pull_project_async(mc, directory) -> PullJob: # then we just download the whole file _pulling_file_with_diffs = lambda f: "diffs" in f and len(f["diffs"]) != 0 - tmp_dir = tempfile.TemporaryDirectory(prefix="mm-pull-", ignore_cleanup_errors=True, delete=True) + tmp_dir = tempfile.TemporaryDirectory(prefix="mm-pull-") pull_changes = mp.get_pull_changes(server_info["files"]) mp.log.debug("pull changes:\n" + pprint.pformat(pull_changes)) fetch_files = [] diff --git a/mergin/client_push.py b/mergin/client_push.py index 03e8a425..2400c959 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -297,7 +297,7 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: return mp.log.debug("push changes:\n" + pprint.pformat(changes)) - tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-", ignore_cleanup_errors=True, delete=True) + tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-") # If there are any versioned files (aka .gpkg) that are not updated through a diff, # we need to make a temporary copy somewhere to be sure that we are uploading full content. diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 8a6368f1..fdd988a3 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -3155,7 +3155,7 @@ def test_client_pull_project_async(mc): pull_project_wait(job) assert job.total_size == job.transferred_size pull_project_finalize(job) - assert not os.path.exists(job.temp_dir) + assert not os.path.exists(job.tmp_dir.name) def test_client_project_sync(mc): From 77b82e9fc122850631cc2a356406cbf47b21003e Mon Sep 17 00:00:00 2001 From: Marcel Kocisek Date: Fri, 12 Sep 2025 10:57:11 +0200 Subject: [PATCH 19/24] Close connection to make runnable test in new python and skip test for chunks cache on v1 supported server (#276) --- mergin/test/test_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index fdd988a3..d366b25a 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1364,6 +1364,7 @@ def _create_test_table(db_file): cursor.execute("CREATE TABLE test (fid SERIAL, txt TEXT);") cursor.execute("INSERT INTO test VALUES (123, 'hello');") cursor.execute("COMMIT;") + con.close() def _create_spatial_table(db_file): @@ -3069,6 +3070,8 @@ def test_validate_auth(mc: MerginClient): def test_uploaded_chunks_cache(mc): """Create a new project, download it, add a file and then do sync - it should not fail""" + if not mc.server_features().get("v2_push_enabled"): + pytest.skip("Server does not support v2 push") test_project = "test_uploaded_chunks_cache" project = API_USER + "/" + test_project project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates From e36ae8d1dda115432b8344971a4739d97fecd86e Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Wed, 17 Sep 2025 11:16:05 +0200 Subject: [PATCH 20/24] address comments without progress bar @wonder-sk --- mergin/client.py | 6 ++++-- mergin/client_push.py | 36 ++++++++++++++++++------------- mergin/editor.py | 2 +- mergin/local_changes.py | 33 +++++++++++++++++----------- mergin/test/test_local_changes.py | 36 +++++++++++++++---------------- 5 files changed, 65 insertions(+), 48 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index e11627cb..9ee1e1ff 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1519,6 +1519,9 @@ def sync_project(self, project_directory, progress_callback=None): 2. Get local changes 3. Push first change batch Repeat if there are more local changes. + + :param project_directory: Project's directory + :param progress_callback: Optional callback function to report progress during push. """ mp = MerginProject(project_directory) has_changes = True @@ -1546,6 +1549,7 @@ def sync_project(self, project_directory, progress_callback=None): last_size = current_size push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp) + server_conflict_attempts = 0 except ClientError as e: if e.is_retryable_sync() and server_conflict_attempts < PUSH_ATTEMPTS - 1: # retry on conflict, e.g. when server has changes that we do not have yet @@ -1556,5 +1560,3 @@ def sync_project(self, project_directory, progress_callback=None): sleep(PUSH_ATTEMPT_WAIT) continue raise e - else: - server_conflict_attempts = 0 diff --git a/mergin/client_push.py b/mergin/client_push.py index 2400c959..48877842 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -22,7 +22,7 @@ import time from typing import List, Tuple, Optional, ByteString -from .local_changes import LocalChange, LocalChanges +from .local_changes import FileChange, LocalPojectChanges from .common import UPLOAD_CHUNK_ATTEMPT_WAIT, UPLOAD_CHUNK_ATTEMPTS, UPLOAD_CHUNK_SIZE, ClientError, ErrorCode from .merginproject import MerginProject @@ -66,7 +66,7 @@ def __init__( self._request_headers = {"Content-Type": "application/octet-stream"} - def upload_chunk(self, data: ByteString, checksum: str): + def upload_chunk_v1_api(self, data: ByteString, checksum: str): """ Uploads the chunk to the server. """ @@ -118,7 +118,7 @@ def upload_blocking(self): self.upload_chunk_v2_api(data, checksum_str) else: # use v1 API for uploading chunks - self.upload_chunk(data, checksum_str) + self.upload_chunk_v1_api(data, checksum_str) break # exit loop if upload was successful except ClientError as e: if attempt < UPLOAD_CHUNK_ATTEMPTS - 1: @@ -133,10 +133,10 @@ class UploadJob: """Keeps all the important data about a pending upload job""" def __init__( - self, version: str, changes: LocalChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir + self, version: str, changes: LocalPojectChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir ): self.version = version - self.changes: LocalChanges = changes # dictionary of local changes to the project + self.changes: LocalPojectChanges = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server self.total_size = 0 # size of data to upload (in bytes) self.transferred_size = 0 # size of data already uploaded (in bytes) @@ -160,8 +160,8 @@ def _submit_item_to_thread(self, item: UploadQueueItem): future = self.executor.submit(_do_upload, item, self) self.futures.append(future) - def add_items(self, items: List[UploadQueueItem]): - """Add multiple chunks to the upload queue""" + def start(self, items: List[UploadQueueItem]): + """Starting upload in background with multiple upload items (UploadQueueItem)""" self.total_size = sum(item.size for item in items) self.upload_queue_items = items @@ -175,7 +175,10 @@ def add_items(self, items: List[UploadQueueItem]): self._submit_item_to_thread(item) def update_chunks_from_items(self): - """Update chunks in LocalChanges from the upload queue items.""" + """ + Update chunks in LocalChanges from the upload queue items. + Used just before finalizing the transaction to set the server_chunk_id in v2 API. + """ self.changes.update_chunks([(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items]) @@ -188,7 +191,7 @@ def _do_upload(item: UploadQueueItem, job: UploadJob): job.transferred_size += item.size -def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange]) -> List[UploadQueueItem]: +def create_upload_chunks(mc, mp: MerginProject, local_changes: List[FileChange]) -> List[UploadQueueItem]: """ Create a list of UploadQueueItem objects from the changes dictionary and calculate total size of files. This is used to prepare the upload queue for the upload job. @@ -221,7 +224,7 @@ def create_upload_chunks(mc, mp: MerginProject, local_changes: List[LocalChange] def create_upload_job( - mc, mp: MerginProject, changes: LocalChanges, tmp_dir: tempfile.TemporaryDirectory + mc, mp: MerginProject, changes: LocalPojectChanges, tmp_dir: tempfile.TemporaryDirectory ) -> Optional[UploadJob]: """ Prepare transaction and create an upload job for the project using the v1 API. @@ -261,18 +264,20 @@ def create_upload_job( if not upload_changes: mp.log.info("not uploading any files") if push_start_resp: + # This is related just to v1 API job.server_resp = push_start_resp push_project_finalize(job) return # all done - no pending job if transaction_id: + # This is related just to v1 API mp.log.info(f"got transaction ID {transaction_id}") # prepare file chunks for upload upload_queue_items = create_upload_chunks(mc, mp, upload_changes) mp.log.info(f"Starting upload chunks for project {project_id}") - job.add_items(upload_queue_items) + job.start(upload_queue_items) return job @@ -312,10 +317,10 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - local_changes = LocalChanges( - added=[LocalChange(**change) for change in changes["added"]], - updated=[LocalChange(**change) for change in changes["updated"]], - removed=[LocalChange(**change) for change in changes["removed"]], + local_changes = LocalPojectChanges( + added=[FileChange(**change) for change in changes["added"]], + updated=[FileChange(**change) for change in changes["updated"]], + removed=[FileChange(**change) for change in changes["removed"]], ) job = create_upload_job(mc, mp, local_changes, tmp_dir) return job @@ -449,6 +454,7 @@ def push_project_cancel(job: UploadJob): job.executor.shutdown(wait=True) if not job.transaction_id: + # If not v2 api endpoint with transaction, nothing to cancel on server job.mp.log.info("--- push cancelled") return try: diff --git a/mergin/editor.py b/mergin/editor.py index b1dac863..759991e3 100644 --- a/mergin/editor.py +++ b/mergin/editor.py @@ -64,7 +64,7 @@ def prevent_conflicted_copy(path: str, mc, project_role: str) -> bool: Args: path (str): The file path to check. mc: The Mergin client object. - project_info (dict): Information about the Mergin project from server. + project_role: Current project role. Returns: bool: True if the file path should be prevented from ceating conflicted copy, False otherwise. diff --git a/mergin/local_changes.py b/mergin/local_changes.py index 511960cd..f718ea08 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -4,7 +4,7 @@ @dataclass -class BaseLocalChange: +class FileDiffChange: path: str checksum: str size: int @@ -12,9 +12,20 @@ class BaseLocalChange: @dataclass -class LocalChange(BaseLocalChange): +class FileChange: + # path to the file relative to the project root + path: str + # file checksum + checksum: str + # file size in bytes + size: int + # file modification time + mtime: datetime + # original file checksum used for compairison origin_checksum: Optional[str] = None + # list of chunk ids that make up this file chunks: List[str] = field(default_factory=list) + # optional diff information for geopackage files with geodiff metadata diff: Optional[dict] = None upload_file: Optional[str] = None # some functions (MerginProject.compare_file_sets) are adding version to the change from project info @@ -24,9 +35,9 @@ class LocalChange(BaseLocalChange): # some functions (MerginProject.compare_file_sets) are adding location dict to the change from project info location: Optional[str] = None - def get_diff(self) -> Optional[BaseLocalChange]: + def get_diff(self) -> Optional[FileDiffChange]: if self.diff: - return BaseLocalChange( + return FileDiffChange( path=self.diff.get("path", ""), checksum=self.diff.get("checksum", ""), size=self.diff.get("size", 0), @@ -50,10 +61,10 @@ def to_server_data(self) -> dict: @dataclass -class LocalChanges: - added: List[LocalChange] = field(default_factory=list) - updated: List[LocalChange] = field(default_factory=list) - removed: List[LocalChange] = field(default_factory=list) +class LocalPojectChanges: + added: List[FileChange] = field(default_factory=list) + updated: List[FileChange] = field(default_factory=list) + removed: List[FileChange] = field(default_factory=list) def to_server_payload(self) -> dict: return { @@ -62,7 +73,7 @@ def to_server_payload(self) -> dict: "removed": [change.to_server_data() for change in self.removed], } - def get_upload_changes(self) -> List[LocalChange]: + def get_upload_changes(self) -> List[FileChange]: """ Get all changes that need to be uploaded. This includes added and updated files. @@ -76,9 +87,7 @@ def _map_unique_chunks(self, change_chunks: List[str], server_chunks: List[Tuple mapped = [] seen = set() for chunk in change_chunks: - for server_chunk in server_chunks: - chunk_id = server_chunk[0] - server_chunk_id = server_chunk[1] + for chunk_id, server_chunk_id in server_chunks: if chunk_id == chunk and server_chunk_id not in seen: mapped.append(server_chunk_id) seen.add(server_chunk_id) diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py index 6fe18388..250bf26f 100644 --- a/mergin/test/test_local_changes.py +++ b/mergin/test/test_local_changes.py @@ -1,6 +1,6 @@ from datetime import datetime -from ..local_changes import LocalChange, LocalChanges +from ..local_changes import FileChange, LocalPojectChanges def test_local_changes_from_dict(): @@ -31,11 +31,11 @@ def test_local_changes_from_dict(): } # Convert dictionary to LocalChanges - added = [LocalChange(**file) for file in changes_dict["added"]] - updated = [LocalChange(**file) for file in changes_dict["updated"]] - removed = [LocalChange(**file) for file in changes_dict["removed"]] + added = [FileChange(**file) for file in changes_dict["added"]] + updated = [FileChange(**file) for file in changes_dict["updated"]] + removed = [FileChange(**file) for file in changes_dict["removed"]] - local_changes = LocalChanges(added=added, updated=updated, removed=removed) + local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) # Assertions assert len(local_changes.added) == 1 @@ -51,7 +51,7 @@ def test_local_changes_from_dict(): def test_local_change_get_diff(): """Test the get_diff method of LocalChange.""" - local_change = LocalChange( + local_change = FileChange( path="file1.txt", checksum="abc123", size=1024, @@ -68,11 +68,11 @@ def test_local_change_get_diff(): def test_local_changes_to_server_payload(): """Test the to_server_payload method of LocalChanges.""" - added = [LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] - updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] - removed = [LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] + added = [FileChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] + updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] + removed = [FileChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] - local_changes = LocalChanges(added=added, updated=updated, removed=removed) + local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) server_request = local_changes.to_server_payload() assert "added" in server_request @@ -86,12 +86,12 @@ def test_local_changes_to_server_payload(): def test_local_changes_update_chunks(): """Test the update_chunks method of LocalChanges.""" added = [ - LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), - LocalChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), + FileChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), + FileChange(path="file2.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), ] - updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"])] + updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"])] - local_changes = LocalChanges(added=added, updated=updated) + local_changes = LocalPojectChanges(added=added, updated=updated) chunks = [("abc123", "chunk1"), ("abc123", "chunk1"), ("xyz789", "chunk2")] local_changes.update_chunks(chunks) @@ -104,12 +104,12 @@ def test_local_changes_update_chunks(): def test_local_changes_get_upload_changes(): """Test the get_upload_changes method of LocalChanges.""" # Create sample LocalChange instances - added = [LocalChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] - updated = [LocalChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] - removed = [LocalChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] + added = [FileChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] + updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] + removed = [FileChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] # Initialize LocalChanges with added, updated, and removed changes - local_changes = LocalChanges(added=added, updated=updated, removed=removed) + local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) # Call get_upload_changes upload_changes = local_changes.get_upload_changes() From 0119b7d4b32e31f3c4ba387b786740dc5a951d74 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 18 Sep 2025 09:57:42 +0200 Subject: [PATCH 21/24] First version for sync progress bar --- mergin/cli.py | 43 +++++++++++++++++++++++++++++++------------ mergin/client.py | 10 ++++------ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index 397ecedb..e2731699 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -477,17 +477,36 @@ def sync(ctx): if mc is None: return directory = os.getcwd() - upload_job = None + current_job = None + current_bar = None try: - - def on_progress(increment, push_job): - nonlocal upload_job - upload_job = push_job - - # run pull & push cycles until there are no local changes - mc.sync_project(directory, progress_callback=on_progress) - - click.secho("Sync complete.", fg="green") + # Iterate over the generator to get updates + for size_change, job in mc.sync_project(directory, upload_progress=True): + # Check if this is a new job (a new push operation) + if job and job != current_job: + # If a previous bar exists, close it + if current_bar: + current_bar.finish() + + # A new push job has started. Initialize a new progress bar. + click.echo(f"Starting upload") + current_job = job + + # The length of the progress bar should be the total size of the job + # You'll need to get this from your job object (e.g., job.total_size) + total_size = job.total_size + current_bar = click.progressbar( + length=total_size, + label=f"Uploading project", + ) + + # Update the current progress bar with the size increment + current_bar.update(size_change) + + # After the loop finishes, make sure to close the final progress bar + if current_bar: + current_bar.finish() + click.secho("\nProject synced successfully", fg="green") except InvalidProject as e: click.secho("Invalid project directory ({})".format(str(e)), fg="red") @@ -496,8 +515,8 @@ def on_progress(increment, push_job): return except KeyboardInterrupt: click.secho("Cancelling...") - if upload_job: - push_project_cancel(upload_job) + if current_job: + push_project_cancel(current_job) except Exception as e: _print_unhandled_exception() diff --git a/mergin/client.py b/mergin/client.py index 9ee1e1ff..3981b0e2 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1512,7 +1512,7 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) - def sync_project(self, project_directory, progress_callback=None): + def sync_project(self, project_directory, upload_progress=False): """ Syncs project by loop with these steps: 1. Pull server version @@ -1521,7 +1521,7 @@ def sync_project(self, project_directory, progress_callback=None): Repeat if there are more local changes. :param project_directory: Project's directory - :param progress_callback: Optional callback function to report progress during push. + :param upload_progress: If True, the method will be a generator yielding upload progress as (size_change, job) tuples. """ mp = MerginProject(project_directory) has_changes = True @@ -1536,16 +1536,14 @@ def sync_project(self, project_directory, progress_callback=None): job = push_project_async(self, project_directory) if not job: break - if not progress_callback: + if not upload_progress: push_project_wait(job) else: last_size = 0 while push_project_is_running(job): sleep(SYNC_CALLBACK_WAIT) current_size = job.transferred_size - progress_callback( - current_size - last_size, job - ) # call callback with transferred size increment + yield (current_size - last_size, job) # Yields the size change and the job object last_size = current_size push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp) From 7dbed14f66dea185986a3d868f6ddce96abb48c7 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 18 Sep 2025 11:12:57 +0200 Subject: [PATCH 22/24] fix typo and some docstrings for localProjectChanges --- mergin/client_push.py | 12 ++++++------ mergin/local_changes.py | 8 +++++--- mergin/test/test_local_changes.py | 18 +++++++++--------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/mergin/client_push.py b/mergin/client_push.py index 48877842..c8fb4205 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -22,7 +22,7 @@ import time from typing import List, Tuple, Optional, ByteString -from .local_changes import FileChange, LocalPojectChanges +from .local_changes import FileChange, LocalProjectChanges from .common import UPLOAD_CHUNK_ATTEMPT_WAIT, UPLOAD_CHUNK_ATTEMPTS, UPLOAD_CHUNK_SIZE, ClientError, ErrorCode from .merginproject import MerginProject @@ -133,10 +133,10 @@ class UploadJob: """Keeps all the important data about a pending upload job""" def __init__( - self, version: str, changes: LocalPojectChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir + self, version: str, changes: LocalProjectChanges, transaction_id: Optional[str], mp: MerginProject, mc, tmp_dir ): self.version = version - self.changes: LocalPojectChanges = changes # dictionary of local changes to the project + self.changes: LocalProjectChanges = changes # dictionary of local changes to the project self.transaction_id = transaction_id # ID of the transaction assigned by the server self.total_size = 0 # size of data to upload (in bytes) self.transferred_size = 0 # size of data already uploaded (in bytes) @@ -179,7 +179,7 @@ def update_chunks_from_items(self): Update chunks in LocalChanges from the upload queue items. Used just before finalizing the transaction to set the server_chunk_id in v2 API. """ - self.changes.update_chunks([(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items]) + self.changes.update_chunk_ids([(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items]) def _do_upload(item: UploadQueueItem, job: UploadJob): @@ -224,7 +224,7 @@ def create_upload_chunks(mc, mp: MerginProject, local_changes: List[FileChange]) def create_upload_job( - mc, mp: MerginProject, changes: LocalPojectChanges, tmp_dir: tempfile.TemporaryDirectory + mc, mp: MerginProject, changes: LocalProjectChanges, tmp_dir: tempfile.TemporaryDirectory ) -> Optional[UploadJob]: """ Prepare transaction and create an upload job for the project using the v1 API. @@ -317,7 +317,7 @@ def push_project_async(mc, directory) -> Optional[UploadJob]: if mp.is_versioned_file(f["path"]): mp.copy_versioned_file_for_upload(f, tmp_dir.name) - local_changes = LocalPojectChanges( + local_changes = LocalProjectChanges( added=[FileChange(**change) for change in changes["added"]], updated=[FileChange(**change) for change in changes["updated"]], removed=[FileChange(**change) for change in changes["removed"]], diff --git a/mergin/local_changes.py b/mergin/local_changes.py index f718ea08..c0fb980f 100644 --- a/mergin/local_changes.py +++ b/mergin/local_changes.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from datetime import datetime -from typing import Dict, Optional, List, Tuple +from typing import Optional, List, Tuple @dataclass @@ -27,6 +27,7 @@ class FileChange: chunks: List[str] = field(default_factory=list) # optional diff information for geopackage files with geodiff metadata diff: Optional[dict] = None + # File path to be used for reading a file by creating and uploading file in chunks upload_file: Optional[str] = None # some functions (MerginProject.compare_file_sets) are adding version to the change from project info version: Optional[str] = None @@ -45,6 +46,7 @@ def get_diff(self) -> Optional[FileDiffChange]: ) def to_server_data(self) -> dict: + """Convert the FileChange instance to a dictionary format suitable for server payload.""" result = { "path": self.path, "checksum": self.checksum, @@ -61,7 +63,7 @@ def to_server_data(self) -> dict: @dataclass -class LocalPojectChanges: +class LocalProjectChanges: added: List[FileChange] = field(default_factory=list) updated: List[FileChange] = field(default_factory=list) removed: List[FileChange] = field(default_factory=list) @@ -93,7 +95,7 @@ def _map_unique_chunks(self, change_chunks: List[str], server_chunks: List[Tuple seen.add(server_chunk_id) return mapped - def update_chunks(self, server_chunks: List[Tuple[str, str]]) -> None: + def update_chunk_ids(self, server_chunks: List[Tuple[str, str]]) -> None: """ Map chunk ids to chunks returned from server (server_chunk_id). diff --git a/mergin/test/test_local_changes.py b/mergin/test/test_local_changes.py index 250bf26f..32b195d8 100644 --- a/mergin/test/test_local_changes.py +++ b/mergin/test/test_local_changes.py @@ -1,10 +1,10 @@ from datetime import datetime -from ..local_changes import FileChange, LocalPojectChanges +from ..local_changes import FileChange, LocalProjectChanges def test_local_changes_from_dict(): - """Test generating LocalChanges from a dictionary.""" + """Test generating LocalProjectChanges from a dictionary.""" changes_dict = { "added": [{"path": "file1.txt", "checksum": "abc123", "size": 1024, "mtime": datetime.now()}], "updated": [{"path": "file2.txt", "checksum": "xyz789", "size": 2048, "mtime": datetime.now()}], @@ -35,7 +35,7 @@ def test_local_changes_from_dict(): updated = [FileChange(**file) for file in changes_dict["updated"]] removed = [FileChange(**file) for file in changes_dict["removed"]] - local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) + local_changes = LocalProjectChanges(added=added, updated=updated, removed=removed) # Assertions assert len(local_changes.added) == 1 @@ -72,7 +72,7 @@ def test_local_changes_to_server_payload(): updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] removed = [FileChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] - local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) + local_changes = LocalProjectChanges(added=added, updated=updated, removed=removed) server_request = local_changes.to_server_payload() assert "added" in server_request @@ -83,7 +83,7 @@ def test_local_changes_to_server_payload(): assert server_request["removed"][0]["path"] == "file3.txt" -def test_local_changes_update_chunks(): +def test_local_changes_update_chunk_ids(): """Test the update_chunks method of LocalChanges.""" added = [ FileChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now(), chunks=["abc123"]), @@ -91,10 +91,10 @@ def test_local_changes_update_chunks(): ] updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now(), chunks=["xyz789"])] - local_changes = LocalPojectChanges(added=added, updated=updated) + local_changes = LocalProjectChanges(added=added, updated=updated) chunks = [("abc123", "chunk1"), ("abc123", "chunk1"), ("xyz789", "chunk2")] - local_changes.update_chunks(chunks) + local_changes.update_chunk_ids(chunks) assert local_changes.added[0].chunks == ["chunk1"] assert local_changes.added[1].chunks == ["chunk1"] @@ -102,14 +102,14 @@ def test_local_changes_update_chunks(): def test_local_changes_get_upload_changes(): - """Test the get_upload_changes method of LocalChanges.""" + """Test the get_upload_changes method of LocalProjectChanges.""" # Create sample LocalChange instances added = [FileChange(path="file1.txt", checksum="abc123", size=1024, mtime=datetime.now())] updated = [FileChange(path="file2.txt", checksum="xyz789", size=2048, mtime=datetime.now())] removed = [FileChange(path="file3.txt", checksum="lmn456", size=512, mtime=datetime.now())] # Initialize LocalChanges with added, updated, and removed changes - local_changes = LocalPojectChanges(added=added, updated=updated, removed=removed) + local_changes = LocalProjectChanges(added=added, updated=updated, removed=removed) # Call get_upload_changes upload_changes = local_changes.get_upload_changes() From 2e38b8420ec5565290541067ca50b0385ce97a18 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 18 Sep 2025 13:44:34 +0200 Subject: [PATCH 23/24] create generator yielding function for sync project --- mergin/cli.py | 2 +- mergin/client.py | 53 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index e2731699..3451e012 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -481,7 +481,7 @@ def sync(ctx): current_bar = None try: # Iterate over the generator to get updates - for size_change, job in mc.sync_project(directory, upload_progress=True): + for size_change, job in mc._sync_project_generator(directory): # Check if this is a new job (a new push operation) if job and job != current_job: # If a previous bar exists, close it diff --git a/mergin/client.py b/mergin/client.py index 3981b0e2..509d7b93 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1512,7 +1512,42 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) - def sync_project(self, project_directory, upload_progress=False): + def _sync_project_generator(self, project_directory): + """ + See `sync_project` for details. This method is a generator yielding upload progress as (size_change, job) tuples. + + :param project_directory: Project's directory + """ + mp = MerginProject(project_directory) + has_changes = True + server_conflict_attempts = 0 + while has_changes: + self.pull_project(project_directory) + try: + job = push_project_async(self, project_directory) + if not job: + break + last_size = 0 + while push_project_is_running(job): + sleep(SYNC_CALLBACK_WAIT) + current_size = job.transferred_size + yield (current_size - last_size, job) # Yields the size change and the job object + last_size = current_size + push_project_finalize(job) + _, has_changes = get_push_changes_batch(self, mp) + server_conflict_attempts = 0 + except ClientError as e: + if e.is_retryable_sync() and server_conflict_attempts < PUSH_ATTEMPTS - 1: + # retry on conflict, e.g. when server has changes that we do not have yet + mp.log.info( + f"Restarting sync process (conflict on server) - {server_conflict_attempts + 1}/{PUSH_ATTEMPTS}" + ) + server_conflict_attempts += 1 + sleep(PUSH_ATTEMPT_WAIT) + continue + raise e + + def sync_project(self, project_directory): """ Syncs project by loop with these steps: 1. Pull server version @@ -1527,24 +1562,12 @@ def sync_project(self, project_directory, upload_progress=False): has_changes = True server_conflict_attempts = 0 while has_changes: - pull_job = pull_project_async(self, project_directory) - if pull_job: - pull_project_wait(pull_job) - pull_project_finalize(pull_job) - + self.pull_project(project_directory) try: job = push_project_async(self, project_directory) if not job: break - if not upload_progress: - push_project_wait(job) - else: - last_size = 0 - while push_project_is_running(job): - sleep(SYNC_CALLBACK_WAIT) - current_size = job.transferred_size - yield (current_size - last_size, job) # Yields the size change and the job object - last_size = current_size + push_project_wait(job) push_project_finalize(job) _, has_changes = get_push_changes_batch(self, mp) server_conflict_attempts = 0 From 3d006314def2e57a2d8d3b9ba9c3dcb03209eab8 Mon Sep 17 00:00:00 2001 From: "marcel.kocisek" Date: Thu, 18 Sep 2025 14:24:21 +0200 Subject: [PATCH 24/24] Introduce generator for getting progress - final commit probably --- mergin/cli.py | 2 +- mergin/client.py | 43 +++++++++++--------------------------- mergin/common.py | 4 +++- mergin/test/test_common.py | 5 ++++- 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/mergin/cli.py b/mergin/cli.py index 3451e012..4256f567 100755 --- a/mergin/cli.py +++ b/mergin/cli.py @@ -481,7 +481,7 @@ def sync(ctx): current_bar = None try: # Iterate over the generator to get updates - for size_change, job in mc._sync_project_generator(directory): + for size_change, job in mc.sync_project_generator(directory): # Check if this is a new job (a new push operation) if job and job != current_job: # If a previous bar exists, close it diff --git a/mergin/client.py b/mergin/client.py index 509d7b93..72edf8ba 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1512,9 +1512,13 @@ def create_invitation(self, workspace_id: int, email: str, workspace_role: Works ws_inv = self.post(f"v2/workspaces/{workspace_id}/invitations", params, json_headers) return json.load(ws_inv) - def _sync_project_generator(self, project_directory): + def sync_project_generator(self, project_directory): """ - See `sync_project` for details. This method is a generator yielding upload progress as (size_change, job) tuples. + Syncs project by loop with these steps: + 1. Pull server version + 2. Get local changes + 3. Push first change batch + Repeat if there are more local changes. :param project_directory: Project's directory """ @@ -1527,6 +1531,7 @@ def _sync_project_generator(self, project_directory): job = push_project_async(self, project_directory) if not job: break + # waiting for progress last_size = 0 while push_project_is_running(job): sleep(SYNC_CALLBACK_WAIT) @@ -1549,35 +1554,11 @@ def _sync_project_generator(self, project_directory): def sync_project(self, project_directory): """ - Syncs project by loop with these steps: - 1. Pull server version - 2. Get local changes - 3. Push first change batch - Repeat if there are more local changes. + See description of _sync_project_generator(). :param project_directory: Project's directory - :param upload_progress: If True, the method will be a generator yielding upload progress as (size_change, job) tuples. """ - mp = MerginProject(project_directory) - has_changes = True - server_conflict_attempts = 0 - while has_changes: - self.pull_project(project_directory) - try: - job = push_project_async(self, project_directory) - if not job: - break - push_project_wait(job) - push_project_finalize(job) - _, has_changes = get_push_changes_batch(self, mp) - server_conflict_attempts = 0 - except ClientError as e: - if e.is_retryable_sync() and server_conflict_attempts < PUSH_ATTEMPTS - 1: - # retry on conflict, e.g. when server has changes that we do not have yet - mp.log.info( - f"Restarting sync process (conflict on server) - {server_conflict_attempts + 1}/{PUSH_ATTEMPTS}" - ) - server_conflict_attempts += 1 - sleep(PUSH_ATTEMPT_WAIT) - continue - raise e + # walk through the generator to perform the sync + # in this method we do not yield anything to the caller + for _ in self.sync_project_generator(project_directory): + pass diff --git a/mergin/common.py b/mergin/common.py index 25b58c48..296f7de0 100644 --- a/mergin/common.py +++ b/mergin/common.py @@ -39,7 +39,9 @@ class ErrorCode(Enum): class ClientError(Exception): - def __init__(self, detail: str, url=None, server_code=None, server_response=None, http_error=None, http_method=None): + def __init__( + self, detail: str, url=None, server_code=None, server_response=None, http_error=None, http_method=None + ): self.detail = detail self.url = url self.http_error = http_error diff --git a/mergin/test/test_common.py b/mergin/test/test_common.py index d86229e1..7a1dbbdf 100644 --- a/mergin/test/test_common.py +++ b/mergin/test/test_common.py @@ -1,5 +1,6 @@ from ..common import ClientError, ErrorCode + def test_client_error_is_blocked_sync(): """Test the is_blocked_sync method of ClientError.""" error = ClientError(detail="", server_code=None) @@ -12,6 +13,7 @@ def test_client_error_is_blocked_sync(): error.server_code = ErrorCode.ProjectVersionExists.value assert error.is_blocking_sync() is True + def test_client_error_is_rate_limit(): """Test the is_rate_limit method of ClientError.""" error = ClientError(detail="", http_error=None) @@ -21,6 +23,7 @@ def test_client_error_is_rate_limit(): error.http_error = 429 assert error.is_rate_limit() is True + def test_client_error_is_retryable_sync(): """Test the is_retryable_sync method of ClientError.""" error = ClientError(detail="", server_code=None, http_error=None) @@ -43,4 +46,4 @@ def test_client_error_is_retryable_sync(): error.http_error = 500 assert error.is_retryable_sync() is False error.http_error = 429 - assert error.is_retryable_sync() is True \ No newline at end of file + assert error.is_retryable_sync() is True