From 70debd3078485e4708757926eabed2e149594870 Mon Sep 17 00:00:00 2001 From: gerchowl Date: Wed, 25 Feb 2026 22:29:24 +0100 Subject: [PATCH] feat(cli): add fd5 ingest CLI subcommand group Add fd5 ingest {raw,csv,nifti,dicom,list} CLI commands. Each command wraps the corresponding ingest loader. Closes #113 --- src/fd5/cli.py | 214 +++++++++++++++++ tests/test_ingest_cli.py | 505 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 719 insertions(+) create mode 100644 tests/test_ingest_cli.py diff --git a/src/fd5/cli.py b/src/fd5/cli.py index 64217e4..40df521 100644 --- a/src/fd5/cli.py +++ b/src/fd5/cli.py @@ -5,11 +5,13 @@ import json import sys from pathlib import Path +from typing import Any import click import h5py from fd5.hash import verify +from fd5.ingest._base import discover_loaders from fd5.manifest import write_manifest from fd5.quality import check_descriptions from fd5.rocrate import write as write_rocrate @@ -219,6 +221,218 @@ def check_descriptions_cmd(file: str) -> None: sys.exit(1) +# --------------------------------------------------------------------------- +# fd5 ingest — subcommand group +# --------------------------------------------------------------------------- + +_ALL_LOADER_NAMES = ("raw", "csv", "nifti", "dicom") + + +def _ingest_binary( + binary_path: Path, + output_dir: Path, + **kwargs: Any, +) -> Path: + """Thin wrapper for lazy import — patchable in tests.""" + from fd5.ingest.raw import ingest_binary + + return ingest_binary(binary_path, output_dir, **kwargs) + + +def _get_nifti_loader(): # type: ignore[no-untyped-def] + """Lazy-import the NiftiLoader so missing nibabel is caught at call time.""" + from fd5.ingest.nifti import NiftiLoader + + return NiftiLoader() + + +def _get_dicom_loader(): # type: ignore[no-untyped-def] + """Lazy-import a DICOM loader (not yet implemented).""" + from fd5.ingest.dicom import DicomLoader # type: ignore[import-not-found] + + return DicomLoader() + + +@cli.group() +def ingest() -> None: + """Ingest external data formats into sealed fd5 files.""" + + +@ingest.command("list") +def ingest_list() -> None: + """List available ingest loaders and their dependency status.""" + available = discover_loaders() + click.echo("Available loaders:") + for name in _ALL_LOADER_NAMES: + if name in available: + click.echo(f" {name:<10} \u2713") + else: + click.echo(f" {name:<10} \u2717 (dependency not installed)") + + +@ingest.command("raw") +@click.argument("source", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "--output", "-o", type=click.Path(), required=True, help="Output directory." +) +@click.option("--name", required=True, help="Human-readable name.") +@click.option("--description", required=True, help="Description for AI-readability.") +@click.option("--product", required=True, help="Product type (e.g. recon).") +@click.option("--dtype", required=True, help="NumPy dtype (e.g. float32).") +@click.option("--shape", required=True, help="Comma-separated shape (e.g. 128,128,64).") +@click.option("--timestamp", default=None, help="Override ISO-8601 timestamp.") +def ingest_raw( + source: str, + output: str, + name: str, + description: str, + product: str, + dtype: str, + shape: str, + timestamp: str | None, +) -> None: + """Ingest a raw binary file into a sealed fd5 file.""" + shape_tuple = tuple(int(s.strip()) for s in shape.split(",")) + try: + result = _ingest_binary( + Path(source), + Path(output), + dtype=dtype, + shape=shape_tuple, + product=product, + name=name, + description=description, + timestamp=timestamp, + ) + click.echo(f"Ingested {Path(source).name} \u2192 {result}") + except (ValueError, FileNotFoundError) as exc: + click.echo(f"Error: {exc}", err=True) + sys.exit(1) + + +@ingest.command("csv") +@click.argument("source", type=click.Path(exists=True, dir_okay=False)) +@click.option( + "--output", "-o", type=click.Path(), required=True, help="Output directory." +) +@click.option("--name", required=True, help="Human-readable name.") +@click.option("--description", required=True, help="Description for AI-readability.") +@click.option("--product", required=True, help="Product type (e.g. spectrum).") +@click.option("--delimiter", default=",", help="Column delimiter (default: comma).") +@click.option("--timestamp", default=None, help="Override ISO-8601 timestamp.") +def ingest_csv( + source: str, + output: str, + name: str, + description: str, + product: str, + delimiter: str, + timestamp: str | None, +) -> None: + """Ingest a CSV/TSV file into a sealed fd5 file.""" + from fd5.ingest.csv import CsvLoader + + loader = CsvLoader() + try: + result = loader.ingest( + Path(source), + Path(output), + product=product, + name=name, + description=description, + timestamp=timestamp, + delimiter=delimiter, + ) + click.echo(f"Ingested {Path(source).name} \u2192 {result}") + except (ValueError, FileNotFoundError) as exc: + click.echo(f"Error: {exc}", err=True) + sys.exit(1) + + +@ingest.command("nifti") +@click.argument("source", type=click.Path(exists=True)) +@click.option( + "--output", "-o", type=click.Path(), required=True, help="Output directory." +) +@click.option("--name", required=True, help="Human-readable name.") +@click.option("--description", required=True, help="Description for AI-readability.") +@click.option("--product", default="recon", help="Product type (default: recon).") +@click.option("--timestamp", default=None, help="Override ISO-8601 timestamp.") +def ingest_nifti( + source: str, + output: str, + name: str, + description: str, + product: str, + timestamp: str | None, +) -> None: + """Ingest a NIfTI file (.nii / .nii.gz) into a sealed fd5 file.""" + try: + loader = _get_nifti_loader() + except ImportError: + click.echo( + "Error: nibabel is not installed. Install with: pip install 'fd5[nifti]'", + err=True, + ) + sys.exit(1) + + try: + result = loader.ingest( + Path(source), + Path(output), + product=product, + name=name, + description=description, + timestamp=timestamp, + ) + click.echo(f"Ingested {Path(source).name} \u2192 {result}") + except (ValueError, FileNotFoundError) as exc: + click.echo(f"Error: {exc}", err=True) + sys.exit(1) + + +@ingest.command("dicom") +@click.argument("source", type=click.Path(exists=True)) +@click.option( + "--output", "-o", type=click.Path(), required=True, help="Output directory." +) +@click.option("--name", required=True, help="Human-readable name.") +@click.option("--description", required=True, help="Description for AI-readability.") +@click.option("--product", default="recon", help="Product type (default: recon).") +@click.option("--timestamp", default=None, help="Override ISO-8601 timestamp.") +def ingest_dicom( + source: str, + output: str, + name: str, + description: str, + product: str, + timestamp: str | None, +) -> None: + """Ingest a DICOM series directory into a sealed fd5 file.""" + try: + loader = _get_dicom_loader() + except ImportError: + click.echo( + "Error: pydicom is not installed. Install with: pip install 'fd5[dicom]'", + err=True, + ) + sys.exit(1) + + try: + result = loader.ingest( + Path(source), + Path(output), + product=product, + name=name, + description=description, + timestamp=timestamp, + ) + click.echo(f"Ingested {Path(source).name} \u2192 {result}") + except (ValueError, FileNotFoundError) as exc: + click.echo(f"Error: {exc}", err=True) + sys.exit(1) + + # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- diff --git a/tests/test_ingest_cli.py b/tests/test_ingest_cli.py new file mode 100644 index 0000000..f37e0b6 --- /dev/null +++ b/tests/test_ingest_cli.py @@ -0,0 +1,505 @@ +"""Tests for fd5 ingest CLI commands — issue #113.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest +from click.testing import CliRunner + +from fd5.cli import cli + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture() +def binary_file(tmp_path: Path) -> Path: + """Create a small raw binary file (float32, 4x4x4).""" + arr = np.ones((4, 4, 4), dtype=np.float32) + path = tmp_path / "data.bin" + arr.tofile(path) + return path + + +@pytest.fixture() +def csv_file(tmp_path: Path) -> Path: + """Create a minimal CSV file with energy + counts columns.""" + path = tmp_path / "spectrum.csv" + path.write_text("energy,counts\n1.0,100\n2.0,200\n3.0,300\n") + return path + + +# --------------------------------------------------------------------------- +# fd5 ingest --help +# --------------------------------------------------------------------------- + + +class TestIngestHelp: + def test_ingest_help_exits_zero(self, runner: CliRunner): + result = runner.invoke(cli, ["ingest", "--help"]) + assert result.exit_code == 0 + + def test_ingest_help_lists_subcommands(self, runner: CliRunner): + result = runner.invoke(cli, ["ingest", "--help"]) + for sub in ("raw", "nifti", "csv", "list"): + assert sub in result.output + + def test_ingest_appears_in_main_help(self, runner: CliRunner): + result = runner.invoke(cli, ["--help"]) + assert "ingest" in result.output + + +# --------------------------------------------------------------------------- +# fd5 ingest list +# --------------------------------------------------------------------------- + + +class TestIngestList: + def test_exits_zero(self, runner: CliRunner): + result = runner.invoke(cli, ["ingest", "list"]) + assert result.exit_code == 0 + + def test_shows_header(self, runner: CliRunner): + result = runner.invoke(cli, ["ingest", "list"]) + assert "available" in result.output.lower() or "loader" in result.output.lower() + + def test_shows_raw_loader(self, runner: CliRunner): + with patch( + "fd5.cli.discover_loaders", + return_value={"raw": MagicMock()}, + ): + result = runner.invoke(cli, ["ingest", "list"]) + assert "raw" in result.output + + def test_shows_missing_dep(self, runner: CliRunner): + """Loaders not returned by discover_loaders are shown as missing.""" + with ( + patch( + "fd5.cli.discover_loaders", + return_value={}, + ), + patch( + "fd5.cli._ALL_LOADER_NAMES", + ("raw", "nifti", "csv"), + ), + ): + result = runner.invoke(cli, ["ingest", "list"]) + assert "raw" in result.output + + +# --------------------------------------------------------------------------- +# fd5 ingest raw +# --------------------------------------------------------------------------- + + +def _make_mock_ingest_binary(tmp_path: Path): + """Return a patch that replaces ingest_binary with a mock.""" + fake_h5 = tmp_path / "out" / "result.h5" + fake_h5.parent.mkdir(exist_ok=True) + fake_h5.touch() + return patch("fd5.cli._ingest_binary", return_value=fake_h5) + + +class TestIngestRaw: + def test_exits_zero(self, runner: CliRunner, binary_file: Path, tmp_path: Path): + with _make_mock_ingest_binary(tmp_path): + result = runner.invoke( + cli, + [ + "ingest", + "raw", + str(binary_file), + "--output", + str(tmp_path / "out"), + "--name", + "Test Raw", + "--description", + "Test raw binary ingest", + "--product", + "recon", + "--dtype", + "float32", + "--shape", + "4,4,4", + ], + ) + assert result.exit_code == 0, result.output + + def test_prints_confirmation( + self, runner: CliRunner, binary_file: Path, tmp_path: Path + ): + with _make_mock_ingest_binary(tmp_path): + result = runner.invoke( + cli, + [ + "ingest", + "raw", + str(binary_file), + "--output", + str(tmp_path / "out"), + "--name", + "Test", + "--description", + "desc", + "--product", + "recon", + "--dtype", + "float32", + "--shape", + "4,4,4", + ], + ) + assert "ingested" in result.output.lower() or ".h5" in result.output.lower() + + def test_calls_ingest_binary_with_correct_args( + self, runner: CliRunner, binary_file: Path, tmp_path: Path + ): + with _make_mock_ingest_binary(tmp_path) as mock_fn: + runner.invoke( + cli, + [ + "ingest", + "raw", + str(binary_file), + "--output", + str(tmp_path / "out"), + "--name", + "N", + "--description", + "D", + "--product", + "recon", + "--dtype", + "float32", + "--shape", + "4,4,4", + ], + ) + mock_fn.assert_called_once() + _, kwargs = mock_fn.call_args + assert kwargs["dtype"] == "float32" + assert kwargs["shape"] == (4, 4, 4) + assert kwargs["product"] == "recon" + assert kwargs["name"] == "N" + + def test_missing_required_options(self, runner: CliRunner, binary_file: Path): + result = runner.invoke(cli, ["ingest", "raw", str(binary_file)]) + assert result.exit_code != 0 + + def test_nonexistent_source_exits_nonzero(self, runner: CliRunner, tmp_path: Path): + result = runner.invoke( + cli, + [ + "ingest", + "raw", + str(tmp_path / "ghost.bin"), + "--output", + str(tmp_path), + "--name", + "x", + "--description", + "x", + "--product", + "recon", + "--dtype", + "float32", + "--shape", + "4,4,4", + ], + ) + assert result.exit_code != 0 + + def test_error_from_ingest_binary_exits_nonzero( + self, runner: CliRunner, binary_file: Path, tmp_path: Path + ): + out = tmp_path / "out" + out.mkdir() + with patch( + "fd5.cli._ingest_binary", + side_effect=ValueError("cannot reshape"), + ): + result = runner.invoke( + cli, + [ + "ingest", + "raw", + str(binary_file), + "--output", + str(out), + "--name", + "x", + "--description", + "x", + "--product", + "recon", + "--dtype", + "float32", + "--shape", + "999,999,999", + ], + ) + assert result.exit_code != 0 + + +# --------------------------------------------------------------------------- +# fd5 ingest csv +# --------------------------------------------------------------------------- + + +class TestIngestCsv: + def test_exits_zero(self, runner: CliRunner, csv_file: Path, tmp_path: Path): + out = tmp_path / "out" + out.mkdir() + result = runner.invoke( + cli, + [ + "ingest", + "csv", + str(csv_file), + "--output", + str(out), + "--name", + "Test Spectrum", + "--description", + "Test CSV ingest", + "--product", + "spectrum", + ], + ) + assert result.exit_code == 0, result.output + + def test_creates_h5_file(self, runner: CliRunner, csv_file: Path, tmp_path: Path): + out = tmp_path / "out" + out.mkdir() + runner.invoke( + cli, + [ + "ingest", + "csv", + str(csv_file), + "--output", + str(out), + "--name", + "Test", + "--description", + "desc", + "--product", + "spectrum", + ], + ) + h5_files = list(out.glob("*.h5")) + assert len(h5_files) >= 1 + + def test_prints_confirmation( + self, runner: CliRunner, csv_file: Path, tmp_path: Path + ): + out = tmp_path / "out" + out.mkdir() + result = runner.invoke( + cli, + [ + "ingest", + "csv", + str(csv_file), + "--output", + str(out), + "--name", + "Test", + "--description", + "desc", + "--product", + "spectrum", + ], + ) + assert "ingested" in result.output.lower() or ".h5" in result.output.lower() + + def test_custom_delimiter(self, runner: CliRunner, tmp_path: Path): + tsv = tmp_path / "data.tsv" + tsv.write_text("energy\tcounts\n1.0\t100\n2.0\t200\n") + out = tmp_path / "out" + out.mkdir() + result = runner.invoke( + cli, + [ + "ingest", + "csv", + str(tsv), + "--output", + str(out), + "--name", + "TSV", + "--description", + "tab-delimited", + "--product", + "spectrum", + "--delimiter", + "\t", + ], + ) + assert result.exit_code == 0, result.output + + def test_missing_source_exits_nonzero(self, runner: CliRunner, tmp_path: Path): + result = runner.invoke( + cli, + [ + "ingest", + "csv", + str(tmp_path / "ghost.csv"), + "--output", + str(tmp_path), + "--name", + "x", + "--description", + "x", + "--product", + "spectrum", + ], + ) + assert result.exit_code != 0 + + +# --------------------------------------------------------------------------- +# fd5 ingest nifti +# --------------------------------------------------------------------------- + + +class TestIngestNifti: + def test_exits_zero_with_mock(self, runner: CliRunner, tmp_path: Path): + """Nifti ingest works when nibabel is available (mocked).""" + mock_loader = MagicMock() + fake_h5 = tmp_path / "out" / "result.h5" + (tmp_path / "out").mkdir() + fake_h5.touch() + mock_loader.ingest.return_value = fake_h5 + + nii = tmp_path / "vol.nii" + nii.touch() + + with patch("fd5.cli._get_nifti_loader", return_value=mock_loader): + result = runner.invoke( + cli, + [ + "ingest", + "nifti", + str(nii), + "--output", + str(tmp_path / "out"), + "--name", + "CT Volume", + "--description", + "Thorax CT scan", + ], + ) + assert result.exit_code == 0, result.output + + def test_missing_nibabel_shows_error(self, runner: CliRunner, tmp_path: Path): + nii = tmp_path / "vol.nii" + nii.touch() + out = tmp_path / "out" + out.mkdir() + + with patch("fd5.cli._get_nifti_loader", side_effect=ImportError("no nibabel")): + result = runner.invoke( + cli, + [ + "ingest", + "nifti", + str(nii), + "--output", + str(out), + "--name", + "CT", + "--description", + "desc", + ], + ) + assert result.exit_code != 0 + assert "nibabel" in result.output.lower() or "install" in result.output.lower() + + def test_nonexistent_source_exits_nonzero(self, runner: CliRunner, tmp_path: Path): + result = runner.invoke( + cli, + [ + "ingest", + "nifti", + str(tmp_path / "ghost.nii"), + "--output", + str(tmp_path), + "--name", + "x", + "--description", + "x", + ], + ) + assert result.exit_code != 0 + + +# --------------------------------------------------------------------------- +# fd5 ingest dicom (always mocked — no pydicom loader yet) +# --------------------------------------------------------------------------- + + +class TestIngestDicom: + def test_exits_zero_with_mock(self, runner: CliRunner, tmp_path: Path): + mock_loader = MagicMock() + fake_h5 = tmp_path / "out" / "result.h5" + (tmp_path / "out").mkdir() + fake_h5.touch() + mock_loader.ingest.return_value = fake_h5 + + dcm_dir = tmp_path / "dcm" + dcm_dir.mkdir() + + with patch("fd5.cli._get_dicom_loader", return_value=mock_loader): + result = runner.invoke( + cli, + [ + "ingest", + "dicom", + str(dcm_dir), + "--output", + str(tmp_path / "out"), + "--name", + "PET Recon", + "--description", + "Whole-body PET", + ], + ) + assert result.exit_code == 0, result.output + + def test_missing_pydicom_shows_error(self, runner: CliRunner, tmp_path: Path): + dcm_dir = tmp_path / "dcm" + dcm_dir.mkdir() + out = tmp_path / "out" + out.mkdir() + + with patch( + "fd5.cli._get_dicom_loader", + side_effect=ImportError("no pydicom"), + ): + result = runner.invoke( + cli, + [ + "ingest", + "dicom", + str(dcm_dir), + "--output", + str(out), + "--name", + "PET", + "--description", + "desc", + ], + ) + assert result.exit_code != 0 + assert "pydicom" in result.output.lower() or "install" in result.output.lower()