Skip to content

Commit be84ccd

Browse files
varmar05wonder-sk
authored andcommitted
Make sure all gpkg files and versions are reported
download diffs functionality was split for higher flexibility of usage
1 parent 96f7211 commit be84ccd

File tree

4 files changed

+64
-62
lines changed

4 files changed

+64
-62
lines changed

mergin/client.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import math
33
import os
44
import json
5+
import shutil
56
import zlib
67
import base64
78
import urllib.parse
@@ -757,9 +758,8 @@ def download_file(self, project_dir, file_path, output_filename, version=None):
757758
pull_project_wait(job)
758759
download_file_finalize(job)
759760

760-
def get_file_diff(self, project_dir, file_path, output_diff, version_from, version_to, keep_diffs=False):
761+
def get_file_diff(self, project_dir, file_path, output_diff, version_from, version_to):
761762
""" Create concatenated diff for project file diffs between versions version_from and version_to.
762-
Downloaded diffs can be kept if needed.
763763
764764
:param project_dir: project local directory
765765
:type project_dir: String
@@ -771,12 +771,39 @@ def get_file_diff(self, project_dir, file_path, output_diff, version_from, versi
771771
:type version_from: String
772772
:param version_to: ending project version tag for getting diff
773773
:type version_to: String
774-
:param keep_diffs: whether to keep downloaded diff files
775-
:type keep_diffs: Boolean
776774
"""
777-
job = download_diffs_async(self, project_dir, file_path, version_from, version_to)
775+
mp = MerginProject(project_dir)
776+
project_path = mp.metadata["name"]
777+
file_history = self.project_file_history_info(project_path, file_path)
778+
versions_to_fetch = get_versions_with_file_changes(
779+
self, project_path, file_path, version_from=version_from, version_to=version_to, file_history=file_history
780+
)
781+
diffs = self.download_file_diffs(project_dir, file_path, versions_to_fetch[1:])
782+
# concatenate diffs, if needed
783+
output_dir = os.path.dirname(output_diff)
784+
if len(diffs) >= 1:
785+
os.makedirs(output_dir, exist_ok=True)
786+
if len(diffs) > 1:
787+
mp.geodiff.concat_changes(diffs, output_diff)
788+
elif len(diffs) == 1:
789+
shutil.copy(diffs[0], output_diff)
790+
791+
def download_file_diffs(self, project_dir, file_path, versions):
792+
""" Download file diffs for specified versions.
793+
794+
:param project_dir: project local directory
795+
:type project_dir: String
796+
:param file_path: relative path of file to download in the project directory
797+
:type file_path: String
798+
:param versions: list of versions to download diffs for, for example ['v3', 'v5']
799+
:type versions: List(String)
800+
:returns: list of downloaded diffs (their actual locations on disk)
801+
:rtype: List(String)
802+
"""
803+
job = download_diffs_async(self, project_dir, file_path, versions)
778804
pull_project_wait(job)
779-
download_diffs_finalize(job, output_diff, keep_diffs)
805+
diffs = download_diffs_finalize(job)
806+
return diffs
780807

781808
def has_unfinished_pull(self, directory):
782809
"""

mergin/client_pull.py

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def download_file_finalize(job):
672672
shutil.rmtree(temp_dir)
673673

674674

675-
def download_diffs_async(mc, project_directory, file_path, version_from, version_to):
675+
def download_diffs_async(mc, project_directory, file_path, versions):
676676
"""
677677
Starts background download project file diffs for specified versions.
678678
Returns handle to the pending download.
@@ -681,20 +681,16 @@ def download_diffs_async(mc, project_directory, file_path, version_from, version
681681
mc (MerginClient): MerginClient instance.
682682
project_directory (str): local project directory.
683683
file_path (str): file path relative to Mergin project root.
684-
version_from (str): starting project version tag for getting diff, for example 'v3'.
685-
version_to (str): ending project version tag for getting diff.
684+
versions (list): list of versions to download diffs for, e.g. ['v1', 'v2'].
686685
687686
Returns:
688687
PullJob/None: a handle for the pending download.
689688
"""
690689
mp = MerginProject(project_directory)
691690
project_path = mp.metadata["name"]
692691
file_history = mc.project_file_history_info(project_path, file_path)
693-
versions_to_fetch = get_versions_with_file_changes(
694-
mc, project_path, file_path, version_from=version_from, version_to=version_to, file_history=file_history
695-
)
696692
mp.log.info(f"--- version: {mc.user_agent_info()}")
697-
mp.log.info(f"--- start download diffs for {file_path} of {project_path}, versions: {[v for v in versions_to_fetch]}")
693+
mp.log.info(f"--- start download diffs for {file_path} of {project_path}, versions: {[v for v in versions]}")
698694

699695
try:
700696
server_info = mc.project_info(project_path)
@@ -709,7 +705,7 @@ def download_diffs_async(mc, project_directory, file_path, version_from, version
709705
os.makedirs(download_dir, exist_ok=True)
710706
fetch_files = []
711707

712-
for version in versions_to_fetch[1:]:
708+
for version in versions:
713709
version_data = file_history["history"][version]
714710
diff_data = copy.deepcopy(version_data)
715711
diff_data['version'] = version
@@ -721,7 +717,7 @@ def download_diffs_async(mc, project_directory, file_path, version_from, version
721717
total_size = 0
722718
for file in fetch_files:
723719
items = _download_items(file, download_dir, diff_only=True)
724-
dest_file_path = os.path.normpath(os.path.join(download_dir, file["version"] + "-" + os.path.basename(file['diff']['path'])))
720+
dest_file_path = os.path.normpath(os.path.join(download_dir, file["version"] + "-" + file['diff']['path']))
725721
if os.path.exists(dest_file_path):
726722
continue
727723
files_to_merge.append(FileToMerge(dest_file_path, items))
@@ -744,8 +740,12 @@ def download_diffs_async(mc, project_directory, file_path, version_from, version
744740
return job
745741

746742

747-
def download_diffs_finalize(job, output_diff, keep_diffs=False):
748-
""" To be called after download_diffs_async """
743+
def download_diffs_finalize(job):
744+
""" To be called after download_diffs_async
745+
746+
Returns:
747+
diffs: list of downloaded diffs (their actual locations on disk)
748+
"""
749749

750750
job.executor.shutdown(wait=True)
751751

@@ -757,34 +757,17 @@ def download_diffs_finalize(job, output_diff, keep_diffs=False):
757757
raise future.exception()
758758

759759
job.mp.log.info("finalizing diffs pull")
760+
diffs = []
760761

761762
# merge downloaded chunks
762763
try:
763764
for file_to_merge in job.files_to_merge:
764765
file_to_merge.merge()
766+
diffs.append(file_to_merge.dest_file)
765767
except ClientError as err:
766768
job.mp.log.error("Error merging chunks of downloaded file: " + str(err))
767769
job.mp.log.info("--- diffs pull aborted")
768770
raise
769771

770772
job.mp.log.info("--- diffs pull finished")
771-
772-
# Collect and finally concatenate diffs, if needed
773-
diffs = []
774-
for file_to_merge in job.files_to_merge:
775-
diffs.append(file_to_merge.dest_file)
776-
777-
output_dir = os.path.dirname(output_diff)
778-
temp_dir = None
779-
if len(diffs) >= 1:
780-
os.makedirs(output_dir, exist_ok=True)
781-
temp_dir = os.path.dirname(diffs[0])
782-
if len(diffs) > 1:
783-
job.mp.geodiff.concat_changes(diffs, output_diff)
784-
elif len(diffs) == 1:
785-
shutil.copy(diffs[0], output_diff)
786-
787-
if not keep_diffs:
788-
# remove the diffs download temporary directory
789-
if temp_dir is not None:
790-
shutil.rmtree(temp_dir)
773+
return diffs

mergin/report.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from collections import defaultdict
66
from itertools import groupby
77

8-
from . import ClientError
9-
from .merginproject import MerginProject, pygeodiff
10-
from .utils import int_version
8+
from mergin import ClientError
9+
from mergin.merginproject import MerginProject, pygeodiff
10+
from mergin.utils import int_version
1111

1212

1313
# inspired by C++ implementation https://github.com/lutraconsulting/geodiff/blob/master/geodiff/src/drivers/sqliteutils.cpp
@@ -123,7 +123,7 @@ def __init__(self, changeset_reader, schema):
123123
# get geometry index in both gpkg schema and diffs values
124124
geom_idx = next((index for (index, col) in enumerate(schema_table["columns"]) if col["type"] == "geometry"),
125125
None)
126-
if not geom_idx:
126+
if geom_idx is None:
127127
continue
128128

129129
geom_col = schema_table["columns"][geom_idx]["geometry"]
@@ -169,24 +169,25 @@ def create_report(mc, directory, project, since, to, out_dir=tempfile.gettempdir
169169
headers = ["file", "table", "author", "timestamp", "version", "quantity_type", "quantity"]
170170
records = []
171171
info = mc.project_info(project, since=since)
172+
num_since = int_version(since)
173+
num_to = int_version(to)
172174
# filter only .gpkg files
173175
files = [f for f in info["files"] if mp.is_versioned_file(f["path"])]
174176
for f in files:
175177
mp.log.debug(f"analyzing {f['path']} ...")
176-
diff_file = os.path.join(mp.meta_dir, f["path"] + ".diff")
177178
try:
178179
if "history" not in f:
180+
mp.log.debug(f"no history field, skip")
179181
continue
180182

181-
# download diffs in desired range, in case versions are not within history,
182-
# pass unknown, and it will be determined in download function
183-
# it can be different for each file
184-
since_ = since if since in f["history"] else None
185-
to_ = to if to in f["history"] else None
186-
if not (since_ and to_):
187-
continue # no change at all for particular file in desired range
183+
# get version list (keys) within range
184+
history_keys = list(filter(lambda v: (num_since <= int_version(v) <= num_to), f["history"].keys()))
185+
if not history_keys:
186+
mp.log.debug(f"no file history within range, skip")
187+
continue
188188

189-
mc.get_file_diff(directory, f["path"], diff_file, since_, to_, True)
189+
# download diffs
190+
mc.download_file_diffs(directory, f["path"], history_keys)
190191

191192
# download full gpkg in "to" version to analyze its schema to determine which col is geometry
192193
full_gpkg = os.path.join(mp.meta_dir, ".cache", f["path"])
@@ -201,13 +202,9 @@ def create_report(mc, directory, project, since, to, out_dir=tempfile.gettempdir
201202
schema = json.load(sf).get("geodiff_schema")
202203

203204
# add records for every version (diff) and all tables within geopackage
204-
for idx in range(int_version(since) + 1, int_version(to) + 1):
205-
version = "v" + str(idx)
206-
# skip version if there was no diff file
207-
if version not in f['history']:
208-
continue
205+
for version in history_keys:
209206
v_diff_file = os.path.join(mp.meta_dir, '.cache',
210-
version + "-" + os.path.basename(f['history'][version]['diff']['path']))
207+
version + "-" + f['history'][version]['diff']['path'])
211208

212209
version_data = versions_map[version]
213210
cr = mp.geodiff.read_changeset(v_diff_file)

mergin/test/test_client.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def test_push_pull_changes(mc):
196196
assert len(project_info['files']) == len(mp.inspect_files())
197197
project_versions = mc.project_versions(project)
198198
assert len(project_versions) == 2
199-
f_change = next((f for f in project_versions[0]['changes']['updated'] if f['path'] == f_updated), None)
199+
f_change = next((f for f in project_versions[-1]['changes']['updated'] if f['path'] == f_updated), None)
200200
assert 'origin_checksum' not in f_change # internal client info
201201

202202
# test parallel changes
@@ -1642,8 +1642,3 @@ def test_report(mc):
16421642
assert "v3,update_count,2" in content
16431643
# files not edited are not in reports
16441644
assert "inserted_1_A.gpkg" not in content
1645-
1646-
# test some failure, e.g. wrong version range, which would fail on getting version list
1647-
with pytest.raises(ClientError) as e:
1648-
create_report(mc, directory, project, to, since, TMP_DIR)
1649-
assert "The requested URL was not found on the server." in str(e.value)

0 commit comments

Comments
 (0)