Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ repos:
name: isort (pyi)
types: [pyi]
- repo: https://github.com/psf/black
rev: 21.6b0
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-mypy
Expand Down
6 changes: 4 additions & 2 deletions slidescore_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Copyright (c) slidescore_api contributors
"""Main module containing the SlideScore API wrapper."""

# Can't find any duplicate code
# pylint:disable=duplicate-code
import io
import json
import logging
Expand Down Expand Up @@ -45,7 +47,7 @@ class SlideScoreResult:
# pylint: disable=too-many-instance-attributes
"""Slidescore wrapper class for storing SlideScore server responses."""

def __init__(self, slide_dict: Dict = None):
def __init__(self, slide_dict: Optional[Dict] = None):
"""
Parameters
----------
Expand Down Expand Up @@ -446,7 +448,7 @@ def get_tile(self, level: int, x_coord: int, y_coord: int) -> Image:
Gets tile from WSI for given magnification level.
A WSI at any given magnification level is converted into an x by y tile matrix. This method downloads the tile
at col (x) and row (y) only as jpeg. Maximum magnification level can be calculated as follows:
max_level = int(np.ceil(math.log(max_dim, 2))), where max_dim is is the maximum of either height or width
max_level = int(np.ceil(math.log(max_dim, 2))), where max_dim is the maximum of either height or width
of the slide. This can be requested by calling get_image_metadata.

Parameters
Expand Down
218 changes: 206 additions & 12 deletions slidescore_api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
This module contains the CLI utilities that can be used with slidescore in python.

"""

# pylint: disable=duplicate-code

import argparse
import csv
import json
Expand All @@ -15,7 +18,7 @@
from collections import defaultdict
from enum import Enum
from pathlib import Path
from typing import Iterable, Optional
from typing import Iterable, List, Optional, Union

import shapely.geometry
from tqdm import tqdm
Expand Down Expand Up @@ -70,6 +73,7 @@ def parse_api_token(data: Optional[Path] = None) -> str:


def _shapely_to_slidescore(shapely_object):
# pylint: disable=too-many-branches
shapely_type = type(shapely_object)
if shapely_type == shapely.geometry.Polygon:
if len(shapely_object.interiors) != 0:
Expand Down Expand Up @@ -351,30 +355,152 @@ def _download_labels(args: argparse.Namespace) -> None:
)


def append_to_manifest(save_dir: pathlib.Path, image_id: int, filename: pathlib.Path) -> None:
def set_study_details_to_mapping(
save_dir: pathlib.Path, mapping_format: str, slidescore_url: str, study_id: int
) -> None:
"""
Sets the slidescore study details to the mapping file

Parameters
----------

save_dir: pathlib.Path
mapping_format: str
slidesore_url: str
study_id: int

Returns
-------

None
"""
if mapping_format == "json":
append_to_json_mapping(
save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url
)
append_to_json_mapping(
save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id
)
elif mapping_format == "tsv":
append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"])
append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"])
else:
raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}")


def append_to_tsv_mapping(save_dir: pathlib.Path, items: List[str]) -> None:
"""
Create a manifest mapping image id to image name

Creates a file that looks like
```txt
# <slidescore_url>
# <slidescore_study_id>
<image_id_1> <image_name_1>
...
```

Parameters
----------
save_dir: pathlib.Path
items: List[str]

Returns
-------
None
"""
if not save_dir.is_dir():
save_dir.mkdir(parents=True)
tab = "\t"
with open(save_dir / "slidescore_mapping.tsv", "a+", encoding="utf-8") as file:
file.write(f"{tab.join(items)}\n")


def append_to_json_mapping(
save_dir: pathlib.Path,
keys: List[str],
value: Union[pathlib.Path, int, str],
filename: str = "slidescore_mapping.json",
) -> None:
"""
Create a manifest mapping image id to the filename.
Generic method to append a hierarchical key structure with one value to a dictionary in a json file to fix
missing functionality in python dict classes

We wish to get
>> {}[['a', 'b']] = 1
{'a': {'b': 1}}

But this is not possible in a python dict, and throws an error

This function is used to create a slidescore_mapping.json file, mapping slidescore ID (unique) to image name,
created when downloading WSIs from slidescores, or when just creating the slidescore_mapping.

Will end up a file like
{
"url": {
"study_id": {
"slidescore_url": "url",
"slidescore_study_id": study_id,
"slide_filename_to_study_image_id_mapping": {
"image_id": "image_name",
...
...
}
}
}
}

Parameters
----------
save_dir : pathlib.Path
image_id : int
filename : pathlib.Path
keys : List[Union[str, pathlib.Path]], sets the hierarchical keys to be set.
E.g. ['slide_filename_id_mapping', 'filename']
sets manifest['slide_filename_id_mapping']['filename'] to the given slidescore ID
value : Union[int, pathlib.Path, str], value to be set. Generally either a URL, a path, or an integer ID
filename : fileanme for json file

Returns
-------
None
"""
with open(save_dir / "slidescore_mapping.txt", "a", encoding="utf-8") as file:
file.write(f"{image_id} {filename.name}\n")
# Make dir if it doesn't exist. Usage in the CLI, however, places it in an existing directory
if not save_dir.is_dir():
save_dir.mkdir(parents=True)

value = value.name if isinstance(value, pathlib.Path) else value
config_filepath = save_dir / filename

try:
# Read file if it exists
with open(config_filepath, mode="r", encoding="utf-8") as file:
obj = json.load(file)
except FileNotFoundError:
# Otherwise create new object
obj = {}

new_obj = obj # Create a pointer that we can update
for idx, key in enumerate(keys):
if idx == len(keys) - 1: # At the last node
new_obj[key] = value # Set the leaf
else:
if key not in new_obj.keys():
new_obj[key] = {} # Not at the last node and the key does not exist; make a subdict
new_obj = new_obj[key] # Update pointer

# Save file, overwriting the old file
with open(config_filepath, mode="w", encoding="utf-8") as file:
json.dump(obj, file, ensure_ascii=False, indent=4)


# pylint: disable=too-many-arguments, too-many-branches
def download_wsis(
slidescore_url: str,
api_token: str,
study_id: int,
save_dir: pathlib.Path,
disable_certificate_check: bool = False,
disable_download: bool = False,
mapping_format: str = "json",
Comment on lines +502 to +503
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there no other way to get the mapping only?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that i can think of without duplicating a lot of code or entirely refactoring the function into less coherent parts.

) -> None:
"""
Download all WSIs for a given study from SlideScore
Expand All @@ -386,6 +512,9 @@ def download_wsis(
study_id : int
save_dir : pathlib.Path
disable_certificate_check : bool
disable_download : bool
mapping_format: str
either of "json" or "tsv"

Returns
-------
Expand All @@ -399,23 +528,49 @@ def download_wsis(
# Collect image metadata
images = client.get_images(study_id)

# Add study details to mapping manifest
set_study_details_to_mapping(
save_dir=save_dir, mapping_format=mapping_format, slidescore_url=slidescore_url, study_id=study_id
)

# Download and save WSIs
for image in tqdm(images):
image_id = image["id"]
image_name = image["name"]
if not disable_download:
logger.info("Downloading image for id: %s", image_id)
filename = client.download_slide(study_id, image, save_dir=save_dir)
logger.info("Image with id %s has been saved to %s.", image_id, filename)
if mapping_format == "json":
append_to_json_mapping(
save_dir=save_dir,
keys=[slidescore_url, str(study_id), "slide_filename_to_study_image_id_mapping", str(image_id)],
value=image_name,
)
elif mapping_format == "tsv":
append_to_tsv_mapping(
save_dir=save_dir,
items=[str(image_id), image_name],
)
Comment on lines +550 to +554
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If not one of these, it's better to raise an error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Thought this was done by the argparser, but raising error here makes the function more generic

    else:
        raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}")



logger.info("Downloading image for id: %s", image_id)
filename = client.download_slide(study_id, image, save_dir=save_dir)
logger.info("Image with id %s has been saved to %s.", image_id, filename)
append_to_manifest(save_dir, image_id, filename)
def _download_mapping(args: argparse.Namespace):
"""Main function that downloads only the mapping from filename to slidescore slide ID

Calls _download_wsi while setting `disable_download=True`
"""
_download_wsi(args=args, disable_download=True)


def _download_wsi(args: argparse.Namespace):
def _download_wsi(args: argparse.Namespace, disable_download=False):
"""Main function that downloads WSIs from SlideScore.

Parameters
----------
args: argparse.Namespace
The arguments passed from the CLI. Run with `-h` to see the required parameters
disable_download: bool
If download is disabled, only the mapping is saved. Can also be used to debug.

Returns
-------
Expand All @@ -429,6 +584,8 @@ def _download_wsi(args: argparse.Namespace):
args.study_id,
args.output_dir,
disable_certificate_check=args.disable_certificate_check,
disable_download=disable_download,
mapping_format=args.mapping_format,
)


Expand All @@ -445,6 +602,43 @@ def register_parser(parser: argparse._SubParsersAction):

download_wsi_parser.set_defaults(subcommand=_download_wsi)

download_wsi_parser.add_argument(
"--mapping-format",
dest="mapping_format",
type=str,
help="Save mapping as either json or tsv",
choices=["tsv", "json"],
required=False,
default="tsv",
)

download_mapping_parser = parser.add_parser(
"download-study-slide-mapping",
help="Download the download_config.json"
" with url, study id, and file to "
"slidescore ID mapping from SlideScore"
" without downloading the WSIs. "
"Useful if slides are already on disk,"
"but slidescore information is not",
)
download_mapping_parser.add_argument(
"output_dir",
type=pathlib.Path,
help="Directory to save output too.",
)

download_mapping_parser.add_argument(
"--mapping-format",
dest="mapping_format",
type=str,
help="Save mapping as either json or tsv",
choices=["tsv", "json"],
required=False,
default="tsv",
)

download_mapping_parser.set_defaults(subcommand=_download_mapping)

download_label_parser = parser.add_parser("download-labels", help="Download labels from SlideScore.")
download_label_parser.add_argument(
"-q",
Expand Down
Loading