From a021ff794392c4df8c2cf9b34ff9fa0ec2957556 Mon Sep 17 00:00:00 2001 From: Yoni Schirris Date: Thu, 17 Nov 2022 18:31:39 +0100 Subject: [PATCH 1/5] add slidescore url and slidescore study id to download manifest. - save manifest as json - when starting the download, add slidescore_url and slidescore_study_id - add layer of slidescore_url and slidescore_study_id keys, might a user download multiple studies into a single directory, so as to not overwrite the initial values of `slidescore_study_id` if a second study is downloaded to the same directory. --- .pre-commit-config.yaml | 2 +- slidescore_api/cli.py | 84 +++++++++++++++++++++++++++++++++++++---- tests/test_tests.py | 52 ++++++++++++++++++++++++- 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7058f46..57a584e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: name: isort (pyi) types: [pyi] - repo: https://github.com/psf/black - rev: 21.6b0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/slidescore_api/cli.py b/slidescore_api/cli.py index 24b2c32..dda632f 100644 --- a/slidescore_api/cli.py +++ b/slidescore_api/cli.py @@ -5,6 +5,9 @@ This module contains the CLI utilities that can be used with slidescore in python. """ + +# pylint: disable=duplicate-code + import argparse import csv import json @@ -15,7 +18,7 @@ from collections import defaultdict from enum import Enum from pathlib import Path -from typing import Iterable, Optional +from typing import Iterable, List, Optional, Union import shapely.geometry from tqdm import tqdm @@ -70,6 +73,7 @@ def parse_api_token(data: Optional[Path] = None) -> str: def _shapely_to_slidescore(shapely_object): + # pylint: disable=too-many-branches shapely_type = type(shapely_object) if shapely_type == shapely.geometry.Polygon: if len(shapely_object.interiors) != 0: @@ -351,22 +355,78 @@ def _download_labels(args: argparse.Namespace) -> None: ) -def append_to_manifest(save_dir: pathlib.Path, image_id: int, filename: pathlib.Path) -> None: +def append_to_manifest(save_dir: pathlib.Path, keys: List[str], value: Union[pathlib.Path, int, str]) -> None: """ - Create a manifest mapping image id to the filename. + Generic method to append a hierarchical key structure with one value to a dictionary in a json file to fix + missing functionality in python dict classes + + Works as desired: + >> {}['a'] = 1 + {'a': 1'} + + Does not work as desired in this case + >> {}['a', 'b'] = 1 + {('a', 'b'): 1} + + The following is not possible in a python dict, and throws an error + >> {}[['a', 'b']] = 1 + TypeError: unhashable type: 'list' + + We wish to get + >> {}[['a', 'b']] = 1 + {'a': {'b': 1}} + + Used to create a manifest mapping filename to slidescore ID Created when downloading WSIs from slidescores. + + Will end up with something like + { + 'slidescore_url': str, + 'slidescore_study_id': int, + 'slide_filename_to_id_mapping': { + str: int + ... + } + } Parameters ---------- save_dir : pathlib.Path - image_id : int - filename : pathlib.Path + keys : List[Union[str, pathlib.Path]], sets the hierarchical keys to be set. + E.g. ['slide_filename_id_mapping', 'filename'] + sets manifest['slide_filename_id_mapping']['filename'] to the given slidescore ID + value : Union[int, pathlib.Path, str], value to be set. Generally either a URL, a path, or an integer ID Returns ------- None """ - with open(save_dir / "slidescore_mapping.txt", "a", encoding="utf-8") as file: - file.write(f"{image_id} {filename.name}\n") + # Make dir if it doesn't exist. Usage in the CLI, however, places it in an existing directory + if not save_dir.is_dir(): + save_dir.mkdir(parents=True) + + value = value.name if isinstance(value, pathlib.Path) else value + config_filepath = save_dir / "download_config.json" + + try: + # Read file if it exists + with open(config_filepath, mode="r", encoding="utf-8") as file: + obj = json.load(file) + except FileNotFoundError: + # Otherwise create new object + obj = {} + + new_obj = obj # Create a pointer that we can update + for idx, key in enumerate(keys): + if idx == len(keys) - 1: # At the last node + new_obj[key] = value # Set the leaf + else: + if key not in new_obj.keys(): + new_obj[key] = {} # Not at the last node and the key does not exist; make a subdict + new_obj = new_obj[key] # Update pointer + + # Save file, overwriting the old file + with open(config_filepath, mode="w", encoding="utf-8") as file: + json.dump(obj, file, ensure_ascii=False, indent=4) def download_wsis( @@ -399,6 +459,10 @@ def download_wsis( # Collect image metadata images = client.get_images(study_id) + # # Add study details to manifest + append_to_manifest(save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url) + append_to_manifest(save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id) + # Download and save WSIs for image in tqdm(images): image_id = image["id"] @@ -406,7 +470,11 @@ def download_wsis( logger.info("Downloading image for id: %s", image_id) filename = client.download_slide(study_id, image, save_dir=save_dir) logger.info("Image with id %s has been saved to %s.", image_id, filename) - append_to_manifest(save_dir, image_id, filename) + append_to_manifest( + save_dir=save_dir, + keys=[slidescore_url, str(study_id), "slide_filename_to_study_image_id_mapping", filename.name], + value=image_id, + ) def _download_wsi(args: argparse.Namespace): diff --git a/tests/test_tests.py b/tests/test_tests.py index 3953d5e..63308d0 100644 --- a/tests/test_tests.py +++ b/tests/test_tests.py @@ -1,4 +1,52 @@ # coding=utf-8 # Placeholder -def test_test(): - assert True +""" +Unit tests +""" + +# pylint: disable=duplicate-code + +import json +from pathlib import Path + +from slidescore_api.cli import append_to_manifest + + +def test_append_to_manifest(): + # Ignores similar lines + """ + Tests the function of slidescore_api.cli.append_to_manifest + """ + if Path("./test_append_to_manifest/download_config.json").is_file(): + Path("./test_append_to_manifest/download_config.json").unlink() # Remove file + + append_to_manifest( + save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "slidescore_url"], value="https" + ) # -> {"slidescore_url": {"1234": {"slidescore_url": "https}}} + + append_to_manifest( + save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "slidescore_id"], value=1234 + ) # -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234}}} + + append_to_manifest( + save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "mapping", "pathname"], value=1234 + ) + # -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234, + # "mapping": {"pathname": 1234} }}} + + expected_object = { + "slidescore_url": {"1234": {"slidescore_url": "https", "slidescore_id": 1234, "mapping": {"pathname": 1234}}} + } + + with open("./test_append_to_manifest/download_config.json", "r", encoding="utf-8") as file: + obj = json.load(file) + assert ( + obj == expected_object + ), f"expected object is {expected_object}, while the actual object on disk is {obj}" + + Path("./test_append_to_manifest/download_config.json").unlink() # Remove file + Path("./test_append_to_manifest").rmdir() # Remove dir + + +if __name__ == "__main__": + test_append_to_manifest() From 46ebb0a0769b1ccbca64222039b9efa8ad0fe11a Mon Sep 17 00:00:00 2001 From: Yoni Schirris Date: Fri, 18 Nov 2022 12:03:03 +0100 Subject: [PATCH 2/5] update saving of mappings - add option to save mappings without downloading WSIs - add option to save mappings as tsv instead of json - save image name instead of file name, since this is not helpful when we download .mrxs files as zips - flip image name -> image id mapping to image id -> image name mapping, since image name is not necessarily unique --- slidescore_api/cli.py | 126 +++++++++++++++++++++++++++++++++++++----- tests/test_tests.py | 65 +++++++++++++++++----- 2 files changed, 162 insertions(+), 29 deletions(-) diff --git a/slidescore_api/cli.py b/slidescore_api/cli.py index dda632f..d159f8c 100644 --- a/slidescore_api/cli.py +++ b/slidescore_api/cli.py @@ -355,7 +355,35 @@ def _download_labels(args: argparse.Namespace) -> None: ) -def append_to_manifest(save_dir: pathlib.Path, keys: List[str], value: Union[pathlib.Path, int, str]) -> None: +def append_to_tsv_mapping(save_dir: pathlib.Path, items: List[str]) -> None: + """ + Create a manifest mapping image id to image name + + Creates a file that looks like + ```txt + # + # + + ... + ``` + + Parameters + ---------- + save_dir: pathlib.Path + items: List[str] + + Returns + ------- + None + """ + if not save_dir.is_dir(): + save_dir.mkdir(parents=True) + tab = "\t" + with open(save_dir / "slidescore_mapping.tsv", "a+", encoding="utf-8") as file: + file.write(f"{tab.join(items)}\n") + + +def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union[pathlib.Path, int, str]) -> None: """ Generic method to append a hierarchical key structure with one value to a dictionary in a json file to fix missing functionality in python dict classes @@ -405,7 +433,7 @@ def append_to_manifest(save_dir: pathlib.Path, keys: List[str], value: Union[pat save_dir.mkdir(parents=True) value = value.name if isinstance(value, pathlib.Path) else value - config_filepath = save_dir / "download_config.json" + config_filepath = save_dir / "slidescore_mapping.json" try: # Read file if it exists @@ -429,12 +457,15 @@ def append_to_manifest(save_dir: pathlib.Path, keys: List[str], value: Union[pat json.dump(obj, file, ensure_ascii=False, indent=4) +# pylint: disable=too-many-arguments def download_wsis( slidescore_url: str, api_token: str, study_id: int, save_dir: pathlib.Path, disable_certificate_check: bool = False, + disable_download: bool = False, + mapping_format: str = "json", ) -> None: """ Download all WSIs for a given study from SlideScore @@ -446,6 +477,9 @@ def download_wsis( study_id : int save_dir : pathlib.Path disable_certificate_check : bool + disable_download : bool + mapping_format: str + either of "json" or "tsv" Returns ------- @@ -459,31 +493,56 @@ def download_wsis( # Collect image metadata images = client.get_images(study_id) - # # Add study details to manifest - append_to_manifest(save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url) - append_to_manifest(save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id) + # # Add study details to mapping manifest + if mapping_format == "json": + append_to_json_mapping( + save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url + ) + append_to_json_mapping( + save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id + ) + elif mapping_format == "tsv": + append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"]) + append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"]) # Download and save WSIs for image in tqdm(images): image_id = image["id"] + image_name = image["name"] + if not disable_download: + logger.info("Downloading image for id: %s", image_id) + filename = client.download_slide(study_id, image, save_dir=save_dir) + logger.info("Image with id %s has been saved to %s.", image_id, filename) + if mapping_format == "json": + append_to_json_mapping( + save_dir=save_dir, + keys=[slidescore_url, str(study_id), "slide_filename_to_study_image_id_mapping", str(image_id)], + value=image_name, + ) + elif mapping_format == "tsv": + append_to_tsv_mapping( + save_dir=save_dir, + items=[str(image_id), image_name], + ) - logger.info("Downloading image for id: %s", image_id) - filename = client.download_slide(study_id, image, save_dir=save_dir) - logger.info("Image with id %s has been saved to %s.", image_id, filename) - append_to_manifest( - save_dir=save_dir, - keys=[slidescore_url, str(study_id), "slide_filename_to_study_image_id_mapping", filename.name], - value=image_id, - ) +def _download_mapping(args: argparse.Namespace): + """Main function that downloads only the mapping from filename to slidescore slide ID -def _download_wsi(args: argparse.Namespace): + Calls _download_wsi while setting `disable_download=True` + """ + _download_wsi(args=args, disable_download=True) + + +def _download_wsi(args: argparse.Namespace, disable_download=False): """Main function that downloads WSIs from SlideScore. Parameters ---------- args: argparse.Namespace The arguments passed from the CLI. Run with `-h` to see the required parameters + disable_download: bool + If download is disabled, only the mapping is saved. Can also be used to debug. Returns ------- @@ -497,6 +556,8 @@ def _download_wsi(args: argparse.Namespace): args.study_id, args.output_dir, disable_certificate_check=args.disable_certificate_check, + disable_download=disable_download, + mapping_format=args.mapping_format, ) @@ -513,6 +574,43 @@ def register_parser(parser: argparse._SubParsersAction): download_wsi_parser.set_defaults(subcommand=_download_wsi) + download_wsi_parser.add_argument( + "--mapping-format", + dest="mapping_format", + type=str, + help="Save mapping as either json or tsv", + choices=["tsv", "json"], + required=False, + default="tsv", + ) + + download_mapping_parser = parser.add_parser( + "download-study-slide-mapping", + help="Download the download_config.json" + " with url, study id, and file to " + "slidescore ID mapping from SlideScore" + " without downloading the WSIs. " + "Useful if slides are already on disk," + "but slidescore information is not", + ) + download_mapping_parser.add_argument( + "output_dir", + type=pathlib.Path, + help="Directory to save output too.", + ) + + download_mapping_parser.add_argument( + "--mapping-format", + dest="mapping_format", + type=str, + help="Save mapping as either json or tsv", + choices=["tsv", "json"], + required=False, + default="tsv", + ) + + download_mapping_parser.set_defaults(subcommand=_download_mapping) + download_label_parser = parser.add_parser("download-labels", help="Download labels from SlideScore.") download_label_parser.add_argument( "-q", diff --git a/tests/test_tests.py b/tests/test_tests.py index 63308d0..fa6a2aa 100644 --- a/tests/test_tests.py +++ b/tests/test_tests.py @@ -9,44 +9,79 @@ import json from pathlib import Path -from slidescore_api.cli import append_to_manifest +from slidescore_api.cli import append_to_json_mapping, append_to_tsv_mapping -def test_append_to_manifest(): +def test_append_to_json_mapping(): # Ignores similar lines """ Tests the function of slidescore_api.cli.append_to_manifest """ - if Path("./test_append_to_manifest/download_config.json").is_file(): - Path("./test_append_to_manifest/download_config.json").unlink() # Remove file + if Path("./test_append_to_json_mapping/slidescore_mapping.json").is_file(): + Path("./test_append_to_json_mapping/slidescore_mapping.json").unlink() # Remove file - append_to_manifest( - save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "slidescore_url"], value="https" + append_to_json_mapping( + save_dir=Path("./test_append_to_json_mapping"), + keys=["slidescore_url", "1234", "slidescore_url"], + value="https", ) # -> {"slidescore_url": {"1234": {"slidescore_url": "https}}} - append_to_manifest( - save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "slidescore_id"], value=1234 + append_to_json_mapping( + save_dir=Path("./test_append_to_json_mapping"), keys=["slidescore_url", "1234", "slidescore_id"], value=1234 ) # -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234}}} - append_to_manifest( - save_dir=Path("./test_append_to_manifest"), keys=["slidescore_url", "1234", "mapping", "pathname"], value=1234 + append_to_json_mapping( + save_dir=Path("./test_append_to_json_mapping"), + keys=["slidescore_url", "1234", "mapping", "1234"], + value="slidename", ) # -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234, # "mapping": {"pathname": 1234} }}} expected_object = { - "slidescore_url": {"1234": {"slidescore_url": "https", "slidescore_id": 1234, "mapping": {"pathname": 1234}}} + "slidescore_url": { + "1234": {"slidescore_url": "https", "slidescore_id": 1234, "mapping": {"1234": "slidename"}} + } } - with open("./test_append_to_manifest/download_config.json", "r", encoding="utf-8") as file: + with open("./test_append_to_json_mapping/slidescore_mapping.json", "r", encoding="utf-8") as file: obj = json.load(file) assert ( obj == expected_object ), f"expected object is {expected_object}, while the actual object on disk is {obj}" - Path("./test_append_to_manifest/download_config.json").unlink() # Remove file - Path("./test_append_to_manifest").rmdir() # Remove dir + Path("./test_append_to_json_mapping/slidescore_mapping.json").unlink() # Remove file + Path("./test_append_to_json_mapping").rmdir() # Remove dir + + +def test_append_to_tsv_mapping(): + # Ignores similar lines + """ + Tests the function of slidescore_api.cli.append_to_tsv_manifest + """ + if Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").is_file(): + Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").unlink() # Remove file + + append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["# slidescore_url"]) + # -> {"slidescore_url": {"1234": {"slidescore_url": "https}}} + + append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["# slidescore_study_id"]) + + append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["image_id", "image_name"]) + + lines = [] + with open("./test_append_to_tsv_mapping/slidescore_mapping.tsv", "r", encoding="utf-8") as file: + for line in file: + lines.append(line) + + assert lines[0] == "# slidescore_url\n" + assert lines[1] == "# slidescore_study_id\n" + assert lines[2] == "image_id\timage_name\n" + + Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").unlink() # Remove file + Path("./test_append_to_tsv_mapping").rmdir() # Remove dir if __name__ == "__main__": - test_append_to_manifest() + test_append_to_json_mapping() + test_append_to_tsv_mapping() From 0a47294101571043cb16efb6c95d199cc5b79988 Mon Sep 17 00:00:00 2001 From: Yoni Schirris Date: Fri, 18 Nov 2022 14:49:12 +0100 Subject: [PATCH 3/5] fix mypy typing erros. duplicate code not found by pycharm, so is ignored for now. --- slidescore_api/api.py | 6 ++++-- slidescore_api/utils/annotations.py | 25 +++++++++++++++++-------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/slidescore_api/api.py b/slidescore_api/api.py index e6af792..762eca8 100644 --- a/slidescore_api/api.py +++ b/slidescore_api/api.py @@ -2,6 +2,8 @@ # Copyright (c) slidescore_api contributors """Main module containing the SlideScore API wrapper.""" +# Can't find any duplicate code +# pylint:disable=duplicate-code import io import json import logging @@ -45,7 +47,7 @@ class SlideScoreResult: # pylint: disable=too-many-instance-attributes """Slidescore wrapper class for storing SlideScore server responses.""" - def __init__(self, slide_dict: Dict = None): + def __init__(self, slide_dict: Optional[Dict] = None): """ Parameters ---------- @@ -446,7 +448,7 @@ def get_tile(self, level: int, x_coord: int, y_coord: int) -> Image: Gets tile from WSI for given magnification level. A WSI at any given magnification level is converted into an x by y tile matrix. This method downloads the tile at col (x) and row (y) only as jpeg. Maximum magnification level can be calculated as follows: - max_level = int(np.ceil(math.log(max_dim, 2))), where max_dim is is the maximum of either height or width + max_level = int(np.ceil(math.log(max_dim, 2))), where max_dim is the maximum of either height or width of the slide. This can be requested by calling get_image_metadata. Parameters diff --git a/slidescore_api/utils/annotations.py b/slidescore_api/utils/annotations.py index 799fb49..724c27a 100644 --- a/slidescore_api/utils/annotations.py +++ b/slidescore_api/utils/annotations.py @@ -1,12 +1,14 @@ # coding=utf-8 """Utility file containing parsing modules and functions to save slidescore annotations.""" +# Can't find any duplicate code +# pylint:disable=duplicate-code import json import logging import warnings from enum import Enum from pathlib import Path -from typing import Any, Dict, Iterable, List, NamedTuple, TypedDict, Union +from typing import Any, Dict, Iterable, List, NamedTuple, Optional, TypedDict, Union import numpy as np import shapely.errors @@ -128,9 +130,13 @@ def save_shapely(annotations: ImageAnnotation, save_dir: Path) -> None: # pylin coords = annotations.annotation[ann_id]["points"] if isinstance(coords, (Polygon, MultiPolygon)) and coords.area == 0: + author = annotations.author + slide_name = annotations.slide_name logger.warning( - f"Dismissed polygon for {annotations.author} and {annotations.slide_name} because area = 0." + "Dismissed polygon for %s because area = 0.", + author, ) + logger.warning("^-- the above is related to %s", slide_name) continue dump_list.append(coords) feature_collection = _to_geojson_format( @@ -139,7 +145,10 @@ def save_shapely(annotations: ImageAnnotation, save_dir: Path) -> None: # pylin json.dump(feature_collection, file, indent=2) -def _parse_brush_annotation(annotations: Dict) -> Dict: # pylint:disable=logging-fstring-interpolation +# pylint:disable=logging-fstring-interpolation, too-many-branches +def _parse_brush_annotation( + annotations: Dict, +) -> Dict: """ Parameters @@ -338,7 +347,7 @@ def _parse_annotation_row(self, row, filter_empty): # pylint:disable=too-many-b try: ann = json.loads(_row["Answer"]) if len(ann) > 0: - # Points dont have type, only x,y; so we use that to distinguish task + # Points don't have type, only x,y; so we use that to distinguish task # Code can be shortened, but is more readable this way if "type" in ann[0]: label_type = "segmentation" @@ -386,9 +395,9 @@ def annotated_images_list(self) -> list: def from_iterable( self, row_iterator: Iterable, - filter_author: str = None, - filter_label: str = None, - filter_empty=True, + filter_author: Optional[str] = None, + filter_label: Optional[str] = None, + filter_empty: bool = True, ) -> Iterable: """ Function to convert slidescore annotations (txt file) to an iterable. @@ -410,7 +419,7 @@ def from_iterable( row_iterator: Iterable An iterable object that holds a single row of annotations and attributes. filter_empty: bool - A binary flag to indicate whether or not empty rows must be filtered. + A binary flag to indicate whether empty rows must be filtered. filter_author: str Email-like string to look for annotations corresponding to a particular annotation author. filter_label: From c5e19a62d0f078bb5d1e9f72054320362bd50364 Mon Sep 17 00:00:00 2001 From: Yoni Schirris Date: Fri, 18 Nov 2022 15:06:38 +0100 Subject: [PATCH 4/5] fix comments - update docstring of file structure of slidescore_mapping.json - remove double # - raise valueerror when mapping format is not properly given --- slidescore_api/cli.py | 50 +++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/slidescore_api/cli.py b/slidescore_api/cli.py index d159f8c..1f6a895 100644 --- a/slidescore_api/cli.py +++ b/slidescore_api/cli.py @@ -383,36 +383,37 @@ def append_to_tsv_mapping(save_dir: pathlib.Path, items: List[str]) -> None: file.write(f"{tab.join(items)}\n") -def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union[pathlib.Path, int, str]) -> None: +def append_to_json_mapping( + save_dir: pathlib.Path, + keys: List[str], + value: Union[pathlib.Path, int, str], + filename: str = "slidescore_mapping.json", +) -> None: """ Generic method to append a hierarchical key structure with one value to a dictionary in a json file to fix missing functionality in python dict classes - Works as desired: - >> {}['a'] = 1 - {'a': 1'} - - Does not work as desired in this case - >> {}['a', 'b'] = 1 - {('a', 'b'): 1} - - The following is not possible in a python dict, and throws an error - >> {}[['a', 'b']] = 1 - TypeError: unhashable type: 'list' - We wish to get >> {}[['a', 'b']] = 1 {'a': {'b': 1}} - Used to create a manifest mapping filename to slidescore ID Created when downloading WSIs from slidescores. + But this is not possible in a python dict, and throws an error + + This function is used to create a slidescore_mapping.json file, mapping slidescore ID (unique) to image name, + created when downloading WSIs from slidescores, or when just creating the slidescore_mapping. - Will end up with something like + Will end up a file like { - 'slidescore_url': str, - 'slidescore_study_id': int, - 'slide_filename_to_id_mapping': { - str: int - ... + "url": { + "study_id": { + "slidescore_url": "url", + "slidescore_study_id": study_id, + "slide_filename_to_study_image_id_mapping": { + "image_id": "image_name", + ... + ... + } + } } } @@ -423,6 +424,7 @@ def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union E.g. ['slide_filename_id_mapping', 'filename'] sets manifest['slide_filename_id_mapping']['filename'] to the given slidescore ID value : Union[int, pathlib.Path, str], value to be set. Generally either a URL, a path, or an integer ID + filename : fileanme for json file Returns ------- @@ -433,7 +435,7 @@ def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union save_dir.mkdir(parents=True) value = value.name if isinstance(value, pathlib.Path) else value - config_filepath = save_dir / "slidescore_mapping.json" + config_filepath = save_dir / filename try: # Read file if it exists @@ -457,7 +459,7 @@ def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union json.dump(obj, file, ensure_ascii=False, indent=4) -# pylint: disable=too-many-arguments +# pylint: disable=too-many-arguments, too-many-branches def download_wsis( slidescore_url: str, api_token: str, @@ -493,7 +495,7 @@ def download_wsis( # Collect image metadata images = client.get_images(study_id) - # # Add study details to mapping manifest + # Add study details to mapping manifest if mapping_format == "json": append_to_json_mapping( save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url @@ -504,6 +506,8 @@ def download_wsis( elif mapping_format == "tsv": append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"]) append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"]) + else: + raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}") # Download and save WSIs for image in tqdm(images): From 2790c9176aa0e7f80ae648be83494f2b779666d1 Mon Sep 17 00:00:00 2001 From: Yoni Schirris Date: Fri, 18 Nov 2022 15:19:38 +0100 Subject: [PATCH 5/5] refactor setting of slidescore url and study id details into function --- slidescore_api/cli.py | 48 ++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/slidescore_api/cli.py b/slidescore_api/cli.py index 1f6a895..c480f30 100644 --- a/slidescore_api/cli.py +++ b/slidescore_api/cli.py @@ -355,6 +355,39 @@ def _download_labels(args: argparse.Namespace) -> None: ) +def set_study_details_to_mapping( + save_dir: pathlib.Path, mapping_format: str, slidescore_url: str, study_id: int +) -> None: + """ + Sets the slidescore study details to the mapping file + + Parameters + ---------- + + save_dir: pathlib.Path + mapping_format: str + slidesore_url: str + study_id: int + + Returns + ------- + + None + """ + if mapping_format == "json": + append_to_json_mapping( + save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url + ) + append_to_json_mapping( + save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id + ) + elif mapping_format == "tsv": + append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"]) + append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"]) + else: + raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}") + + def append_to_tsv_mapping(save_dir: pathlib.Path, items: List[str]) -> None: """ Create a manifest mapping image id to image name @@ -496,18 +529,9 @@ def download_wsis( images = client.get_images(study_id) # Add study details to mapping manifest - if mapping_format == "json": - append_to_json_mapping( - save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url - ) - append_to_json_mapping( - save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id - ) - elif mapping_format == "tsv": - append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"]) - append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"]) - else: - raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}") + set_study_details_to_mapping( + save_dir=save_dir, mapping_format=mapping_format, slidescore_url=slidescore_url, study_id=study_id + ) # Download and save WSIs for image in tqdm(images):