From 279c6caa50a35c2fe10f3631b867bdfd30d4a4f4 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Fri, 17 Mar 2023 13:02:33 +0100 Subject: [PATCH 1/4] improve dataset file handling tests This commit modifies test_datasets.test_file_handling to be able to test with remote paths that contain directories. There is still a possible improvment: support files with non ".txt" suffix. Those are currently not supported because replace file fails due to "changing file types". --- datalad_dataverse/tests/test_dataset.py | 111 +++++++++--------------- 1 file changed, 40 insertions(+), 71 deletions(-) diff --git a/datalad_dataverse/tests/test_dataset.py b/datalad_dataverse/tests/test_dataset.py index 110aaea..d507901 100644 --- a/datalad_dataverse/tests/test_dataset.py +++ b/datalad_dataverse/tests/test_dataset.py @@ -19,30 +19,42 @@ def test_file_handling( dataverse_dataaccess_api, dataverse_dataset, ): - # the starting point of `dataverse_dataset` is a freshly - # created, non-published dataset in draft mode, with no prior - # version odd = ODD(dataverse_admin_api, dataverse_dataset) - fcontent = 'some_content' - fpath = tmp_path / 'dummy.txt' - fpath.write_text(fcontent) - src_md5 = md5sum(fpath) - - fileid = check_upload(odd, fcontent, fpath, src_md5) - - check_download(odd, fileid, tmp_path / 'downloaded.txt', src_md5) - - check_file_metadata_update(dataverse_admin_api, dataverse_dataset, odd, - fileid, fpath) - - fileid = check_replace_file(odd, fileid, tmp_path) - - check_rename_file(odd, fileid) + paths = ( + tmp_path / '.Ö-dot-Ö-in-front' / '!-üö-.txt', + tmp_path / '.dot-in-front' / 'c1.txt', + tmp_path / ' space-in-front' / 'c2.txt', + tmp_path / '-minus-in-front' / 'c3.txt', + tmp_path / 'Ö-in-front' / 'überflüssiger_fuß.txt', + tmp_path / ' Ö-space-Ö-in-front' / 'a.€🔆.txt', + tmp_path / 'dummy.txt', + ) - check_remove(odd, fileid, PurePosixPath(fpath.name)) + path_info = dict() + for path in paths: + if path.parent != tmp_path: + path.parent.mkdir() + relative_path = path.relative_to(tmp_path) + fcontent = 'content of: ' + str(relative_path) + path.write_text(fcontent) + src_md5 = md5sum(path) + fileid = check_upload(odd, fcontent, relative_path, tmp_path, src_md5) + path_info[path] = (src_md5, fileid) - check_duplicate_file_deposition(odd, tmp_path) + for path, (src_md5, fileid) in path_info.items(): + relative_path = path.relative_to(tmp_path) + check_download(odd, fileid, tmp_path / 'downloaded.txt', src_md5) + check_file_metadata_update( + dataverse_admin_api, dataverse_dataset, odd, fileid, path + ) + path_to_check = relative_path.parent / 'mykey' + fileid = check_replace_file(odd, fileid, path_to_check, tmp_path) + check_rename_file( + odd, fileid, name=str(relative_path.parent / ('ren' + path.name)) + ) + check_remove(odd, fileid, PurePosixPath(path.name)) + check_duplicate_file_deposition(odd, tmp_path) def check_rename_file(odd, fileid, name='place.txt'): @@ -54,14 +66,14 @@ def check_rename_file(odd, fileid, name='place.txt'): assert odd.has_path_in_latest_version(new_path) -def check_replace_file(odd, fileid, tmp_path): - fpath = tmp_path / 'replace_source.txt' - fpath.write_text('some_new_content') - remote_path = PurePosixPath('downstairs') / fpath.name +def check_replace_file(odd, fileid, relative_fpath, tmp_path): + new_path = tmp_path / relative_fpath.parent / ('replace_' + relative_fpath.name) + new_path.write_text('some_new_content') + remote_path = PurePosixPath(relative_fpath.parent) / new_path.name odd.has_fileid(fileid) # we replace the file AND give it a new name at the same time - new_fileid = odd.upload_file(fpath, remote_path, fileid) + new_fileid = odd.upload_file(new_path, remote_path, fileid) assert fileid != new_fileid assert not odd.has_fileid(fileid) assert odd.has_fileid(new_fileid) @@ -92,10 +104,10 @@ def check_duplicate_file_deposition(odd, tmp_path): assert odd.has_path(PurePosixPath(fpaths[1].name)) -def check_upload(odd, fcontent, fpath, src_md5): +def check_upload(odd, fcontent, fpath, base_path, src_md5): # the simplest possible upload, just a source file name - remote_path = PurePosixPath(fpath.name) - file_id = odd.upload_file(fpath, remote_path) + remote_path = PurePosixPath(fpath) + file_id = odd.upload_file(base_path / fpath, remote_path) # internal consistency assert odd.has_fileid(file_id) assert odd.has_fileid_in_latest_version(file_id) @@ -198,46 +210,3 @@ def _update_md(fid, rec, mdid=None): assert info['label'] == info['dataFile']['filename'] == 'mykey' old = [m for m in mm if m['label'] == mangle_path(fpath.name)] assert old == [] - - -def test_name_mangling( - tmp_path, - dataverse_admin_api, - dataverse_dataaccess_api, - dataverse_dataset, -): - odd = ODD(dataverse_admin_api, dataverse_dataset) - - paths = ( - tmp_path / ".dot-in-front" 'c1.txt', - tmp_path / " space-in-front" 'c2.txt', - tmp_path / "-minus-in-front" 'c3.txt', - tmp_path / "Ö-in-front" 'c4.txt', - tmp_path / ".Ö-dot-Ö-in-front" 'c5.txt', - tmp_path / " Ö-space-Ö-in-front" 'c6.txt', - ) - - path_info = dict() - for path in paths: - if path.parent != tmp_path: - path.parent.mkdir() - fcontent = path.name - path.write_text(path.name) - src_md5 = md5sum(path) - fileid = check_upload(odd, fcontent, path, src_md5) - path_info[path] = (src_md5, fileid) - - for path, (src_md5, fileid) in path_info.items(): - check_download(odd, fileid, tmp_path / 'downloaded.txt', src_md5) - - check_file_metadata_update( - dataverse_admin_api, - dataverse_dataset, - odd, - fileid, - path) - - fileid = check_replace_file(odd, fileid, tmp_path) - check_rename_file(odd, fileid, name="ren" + path.name) - check_remove(odd, fileid, PurePosixPath(path.name)) - check_duplicate_file_deposition(odd, tmp_path) From 3193bcd27b23c36ed27c9f384e47e0915092aaf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20M=C3=B6nch?= Date: Fri, 17 Mar 2023 16:46:38 +0100 Subject: [PATCH 2/4] Update test_dataset.py --- datalad_dataverse/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_dataverse/tests/test_dataset.py b/datalad_dataverse/tests/test_dataset.py index d507901..0b0d9f6 100644 --- a/datalad_dataverse/tests/test_dataset.py +++ b/datalad_dataverse/tests/test_dataset.py @@ -27,7 +27,7 @@ def test_file_handling( tmp_path / ' space-in-front' / 'c2.txt', tmp_path / '-minus-in-front' / 'c3.txt', tmp_path / 'Ö-in-front' / 'überflüssiger_fuß.txt', - tmp_path / ' Ö-space-Ö-in-front' / 'a.€🔆.txt', + tmp_path / ' Ö-space-Ö-in-front' / 'a.txt', tmp_path / 'dummy.txt', ) From f73b17fc1b057dd3a7eb36b0a309ecce52577e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20M=C3=B6nch?= Date: Fri, 17 Mar 2023 20:34:24 +0100 Subject: [PATCH 3/4] convert windows paths properly into posix path --- datalad_dataverse/tests/test_dataset.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datalad_dataverse/tests/test_dataset.py b/datalad_dataverse/tests/test_dataset.py index 0b0d9f6..491a03e 100644 --- a/datalad_dataverse/tests/test_dataset.py +++ b/datalad_dataverse/tests/test_dataset.py @@ -4,7 +4,10 @@ At least as long as we are using that API layer. """ -from pathlib import PurePosixPath +from pathlib import ( + Path, + PurePosixPath, +) import json from datalad_next.tests.utils import md5sum @@ -58,7 +61,7 @@ def test_file_handling( def check_rename_file(odd, fileid, name='place.txt'): - new_path = PurePosixPath('fresh') / name + new_path = PurePosixPath(*(('fresh',) + Path(name).parts)) assert not odd.has_path(new_path) assert odd.has_fileid(fileid) odd.rename_file(new_path, fileid) From fae7605c54345fcf10edf36afe64ce0d95ee9dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20M=C3=B6nch?= Date: Fri, 17 Mar 2023 21:07:43 +0100 Subject: [PATCH 4/4] improve code co-authored by @adswa, @bpoldrack Use suggestions by Ben and Adina to improve the code Rebased on main --- datalad_dataverse/tests/test_dataset.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/datalad_dataverse/tests/test_dataset.py b/datalad_dataverse/tests/test_dataset.py index 491a03e..52261c8 100644 --- a/datalad_dataverse/tests/test_dataset.py +++ b/datalad_dataverse/tests/test_dataset.py @@ -4,10 +4,7 @@ At least as long as we are using that API layer. """ -from pathlib import ( - Path, - PurePosixPath, -) +from pathlib import PurePosixPath import json from datalad_next.tests.utils import md5sum @@ -36,8 +33,7 @@ def test_file_handling( path_info = dict() for path in paths: - if path.parent != tmp_path: - path.parent.mkdir() + path.parent.mkdir(parents=True, exist_ok=True) relative_path = path.relative_to(tmp_path) fcontent = 'content of: ' + str(relative_path) path.write_text(fcontent) @@ -54,14 +50,15 @@ def test_file_handling( path_to_check = relative_path.parent / 'mykey' fileid = check_replace_file(odd, fileid, path_to_check, tmp_path) check_rename_file( - odd, fileid, name=str(relative_path.parent / ('ren' + path.name)) + odd, fileid, + name=PurePosixPath(relative_path.parent / ('ren' + path.name)) ) check_remove(odd, fileid, PurePosixPath(path.name)) check_duplicate_file_deposition(odd, tmp_path) -def check_rename_file(odd, fileid, name='place.txt'): - new_path = PurePosixPath(*(('fresh',) + Path(name).parts)) +def check_rename_file(odd, fileid, name=PurePosixPath('place.txt')): + new_path = PurePosixPath('fresh' / name) assert not odd.has_path(new_path) assert odd.has_fileid(fileid) odd.rename_file(new_path, fileid)