Skip to content
This repository was archived by the owner on Oct 19, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion ahcore/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dlup.backends.common import AbstractSlideBackend
from dlup.types import PathLike

from ahcore.readers import StitchingMode, ZarrFileImageReader
from ahcore.readers import StitchingMode, ZarrFileImageReader, H5FileImageReader


class ZarrSlide(AbstractSlideBackend):
Expand Down Expand Up @@ -42,3 +42,39 @@ def read_region(self, coordinates: tuple[int, int], level: int, size: tuple[int,

def close(self):
self._reader.close()

class H5Slide(AbstractSlideBackend):
def __init__(self, filename: PathLike, stitching_mode: StitchingMode | str = StitchingMode.CROP) -> None:
super().__init__(filename)
self._reader: H5FileImageReader = H5FileImageReader(filename, stitching_mode=stitching_mode)
self._spacings = [(self._reader.mpp, self._reader.mpp)]

@property
def size(self):
return self._reader.size

@property
def level_dimensions(self) -> tuple[tuple[int, int], ...]:
return (self._reader.size,)

@property
def level_downsamples(self) -> tuple[float, ...]:
return (1.0,)

@property
def vendor(self) -> str:
return "H5FileImageReader"

@property
def properties(self) -> dict[str, Any]:
return self._reader.metadata

@property
def magnification(self):
return None

def read_region(self, coordinates: tuple[int, int], level: int, size: tuple[int, int]) -> pyvips.Image:
return self._reader.read_region(coordinates, level, size)

def close(self):
self._reader.close()
123 changes: 119 additions & 4 deletions ahcore/cli/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Module to write copy manifests files over to SCRATCH directory"""

from __future__ import annotations

import argparse
import hashlib
import os
Expand Down Expand Up @@ -44,8 +43,18 @@ def copy_data(args: argparse.Namespace) -> None:
for patient in all_records:
for image in patient.images:
image_fn = image.filename

get_from = base_dir / image_fn
write_to = Path(target_dir) / dataset_name / image_fn
write_to = Path(target_dir) / dataset_name / "images" / image_fn

accompanying_folder_write_to, accompanying_folder_get_from = None, None
if get_from.suffix == ".mrxs":
accompanying_folder_get_from = get_from.parent / get_from.stem
if not accompanying_folder_get_from.is_dir():
raise ValueError(
f"Image {image_fn} does not have an accompanying folder, which is expected for mrxs images"
)
accompanying_folder_write_to = write_to.parent / write_to.stem

write_to.parent.mkdir(parents=True, exist_ok=True)
if write_to.exists():
Expand All @@ -54,16 +63,93 @@ def copy_data(args: argparse.Namespace) -> None:
new_hash = _quick_hash(get_from)
if old_hash == new_hash:
# Skip if they are the same
progress.console.log("Skipping file as it already exists: {}".format(image_fn))
progress.console.log("Skipping (image) file as it already exists: {}".format(image_fn))
progress.update(task, advance=1)
continue

total_size += get_from.stat().st_size

# Copy file from get_from to write_to
shutil.copy(get_from, write_to)
if accompanying_folder_get_from is not None:
shutil.copytree(accompanying_folder_get_from, accompanying_folder_write_to, dirs_exist_ok=True)
total_size += accompanying_folder_get_from.stat().st_size
progress.update(task, advance=1)

# copy mask and annotations
if args.mask_dir is not None or args.annotations_dir is not None:
for mask in image.masks:
mask_fn = mask.filename
get_from = args.mask_dir / mask_fn
write_to = Path(target_dir) / dataset_name / "masks" / mask_fn
write_to.parent.mkdir(parents=True, exist_ok=True)
if write_to.exists():
# compute the hash of previous and new file
old_hash = _quick_hash(write_to)
new_hash = _quick_hash(get_from)
if old_hash == new_hash:
# Skip if they are the same
progress.console.log(
"Skipping (mask) file as it already exists: {}".format(mask_fn)
)
progress.update(task, advance=1)
continue

total_size += get_from.stat().st_size
shutil.copy(get_from, write_to)
progress.update(task, advance=1)

for annotation in image.annotations:
annotation_fn = annotation.filename
get_from = args.annotations_dir / annotation_fn
write_to = Path(target_dir) / dataset_name / "annotations" / annotation_fn
write_to.parent.mkdir(parents=True, exist_ok=True)
if write_to.exists():
# compute the hash of previous and new file
old_hash = _quick_hash(write_to)
new_hash = _quick_hash(get_from)
if old_hash == new_hash:
# Skip if they are the same
progress.console.log(
"Skipping (annotation) file as it already exists: {}".format(annotation_fn)
)
progress.update(task, advance=1)
continue

total_size += get_from.stat().st_size
shutil.copy(get_from, write_to)
progress.update(task, advance=1)

# copy features
if args.features_dir is not None:
if args.feature_version is not None:
feature = dm.get_image_features_by_image_and_feature_version(
image.id, args.feature_version
) # there should only be one
feature_fn = feature.filename
get_from = args.features_dir / feature_fn
write_to = Path(target_dir) / dataset_name / "features" / feature_fn
write_to.parent.mkdir(parents=True, exist_ok=True)
if write_to.exists():
# compute the hash of previous and new file
old_hash = _quick_hash(write_to)
new_hash = _quick_hash(get_from)
if old_hash == new_hash:
# Skip if they are the same
progress.console.log(
"Skipping (feature) file as it already exists: {}".format(feature_fn)
)
progress.update(task, advance=1)
continue

total_size += get_from.stat().st_size
shutil.copy(get_from, write_to)
progress.update(task, advance=1)
else:
raise ValueError(
"Feature version is not provided, but features directory is provided. Please provide both to copy features."
)

progress.console.log("Total data size copied: {:.2f} GB".format(total_size / 1024**3))


Expand All @@ -85,7 +171,7 @@ def register_parser(
_parser.add_argument(
"manifest_uri",
type=str,
help="URI that refers to the sqlalchemy supported database path.",
help="URI that refers to the sqlalchemy supported database path. If using an sqlite database this looks like 'sqlite:///your_database_path'.",
)
_parser.add_argument(
"manifest_name",
Expand All @@ -107,4 +193,33 @@ def register_parser(
type=str,
help="Name of the dataset to copy the data to. The data will be copied over to $SCRATCH / DATASET_NAME",
)

_parser.add_argument(
"--mask_dir",
type=dir_path(require_writable=False),
help="Directory to which the masks paths defined in the manifest are relative to.",
default=None,
)

_parser.add_argument(
"--annotations_dir",
type=dir_path(require_writable=False),
help="Directory to which the annotations paths defined in the manifest are relative to.",
default=None,
)

_parser.add_argument(
"--features_dir",
type=dir_path(require_writable=False),
help="Directory to which the features paths defined in the manifest are relative to.",
default=None,
)

_parser.add_argument(
"--feature_version",
type=dir_path(require_writable=False),
help="Version of the features to copy.",
default=None,
)

_parser.set_defaults(subcommand=copy_data)
14 changes: 10 additions & 4 deletions ahcore/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,16 @@ def metadata(self) -> dict[str, Any]:
assert self._metadata
return self._metadata

def _decompress_data(self, tile: GenericNumberArray) -> GenericNumberArray:
def _decompress_and_reshape_data(self, tile: GenericNumberArray) -> GenericNumberArray:
if self._is_binary:
with PIL.Image.open(io.BytesIO(tile)) as img:
return np.array(img).transpose(2, 0, 1)
return np.array(img).transpose(2, 0, 1) # fixme: this also shouldn't work because the thing is flattened and doesn't have 3 dimensions
else:
# If handling features, we need to expand dimensions to match the expected shape.
if tile.ndim == 1: # fixme: is this the correct location for this
if not self._tile_size == [1, 1]:
raise NotImplementedError(f"Tile is single dimensional and {self._tile_size=} should be [1, 1], other cases have not been considered and cause unwanted behaviour.")
return tile.reshape(self._num_channels, *self._tile_size)
return tile

def read_region(self, location: tuple[int, int], level: int, size: tuple[int, int]) -> pyvips.Image:
Expand Down Expand Up @@ -201,7 +206,7 @@ def read_region(self, location: tuple[int, int], level: int, size: tuple[int, in
total_rows = math.ceil((self._size[1] - self._tile_overlap[1]) / self._stride[1])
total_cols = math.ceil((self._size[0] - self._tile_overlap[0]) / self._stride[0])

assert total_rows * total_cols == num_tiles
assert total_rows * total_cols == num_tiles # Equality only holds if features where created without mask

x, y = location
w, h = size
Expand Down Expand Up @@ -230,7 +235,7 @@ def read_region(self, location: tuple[int, int], level: int, size: tuple[int, in
tile = (
self._empty_tile()
if tile_index_in_image_dataset == -1
else self._decompress_data(image_dataset[tile_index_in_image_dataset])
else self._decompress_and_reshape_data(image_dataset[tile_index_in_image_dataset])
)
start_y = i * self._stride[1] - y
end_y = start_y + self._tile_size[1]
Expand All @@ -242,6 +247,7 @@ def read_region(self, location: tuple[int, int], level: int, size: tuple[int, in
img_start_x = max(0, start_x)
img_end_x = min(w, end_x)


if self._stitching_mode == StitchingMode.CROP:
crop_start_y = img_start_y - start_y
crop_end_y = img_end_y - start_y
Expand Down
5 changes: 3 additions & 2 deletions ahcore/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ class DataDescription(BaseModel):
manifest_database_uri: str
manifest_name: str
split_version: str
feature_version: Optional[str] = None
annotations_dir: Path
training_grid: GridDescription
inference_grid: GridDescription
training_grid: Optional[GridDescription] = None
inference_grid: Optional[GridDescription] = None
index_map: Optional[Dict[str, int]]
remap_labels: Optional[Dict[str, str]] = None
use_class_weights: Optional[bool] = False
Expand Down
41 changes: 41 additions & 0 deletions ahcore/utils/database_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Image(Base):
annotations: Mapped[List["ImageAnnotations"]] = relationship("ImageAnnotations", back_populates="image")
labels: Mapped[List["ImageLabels"]] = relationship("ImageLabels", back_populates="image")
caches: Mapped[List["ImageCache"]] = relationship("ImageCache", back_populates="image")
features: Mapped[List["ImageFeature"]] = relationship("ImageFeature", back_populates="image")


class ImageCache(Base):
Expand Down Expand Up @@ -115,6 +116,46 @@ class CacheDescription(Base):
cache: Mapped["ImageCache"] = relationship("ImageCache", back_populates="description")


class ImageFeature(Base):
"""Image feature table."""

__tablename__ = "image_feature"
id = Column(Integer, primary_key=True)
# pylint: disable=E1102
created = Column(DateTime(timezone=True), default=func.now())
last_updated = Column(DateTime(timezone=True), default=func.now(), onupdate=func.now())
filename = Column(String, unique=True, nullable=False)
reader = Column(String)
num_tiles = Column(Integer)
image_id = Column(Integer, ForeignKey("image.id"), nullable=False)

image: Mapped["Image"] = relationship("Image", back_populates="features")
description: Mapped["FeatureDescription"] = relationship("FeatureDescription", back_populates="image_feature")

class FeatureDescription(Base):
"""Feature description table."""

__tablename__ = "feature_description"

id = Column(Integer, primary_key=True)
# pylint: disable=E1102
created = Column(DateTime(timezone=True), default=func.now())
last_updated = Column(DateTime(timezone=True), default=func.now(), onupdate=func.now())

mpp = Column(Float)
tile_size_width = Column(Integer)
tile_size_height = Column(Integer)
tile_overlap_width = Column(Integer)
tile_overlap_height = Column(Integer)
description = Column(String)

version = Column(String, unique=True, nullable=False) # use this to select which features we want to use

model_name = Column(String)
model_path = Column(String)
feature_dimension = Column(Integer)
image_transforms_description = Column(String) # it would be nice to have a way to track which transforms the feature extractors used, but maybe this is not the best way to do it

class Mask(Base):
"""Mask table."""

Expand Down
Loading