From 1361b1eda9831b6e883a66663dda9bdd8b8ce2db Mon Sep 17 00:00:00 2001 From: DylanAdlard Date: Mon, 15 Dec 2025 11:05:01 +0200 Subject: [PATCH 1/4] mypy --- .github/workflows/mypy.yml | 24 ++++++++++ README.md | 21 +-------- env.yml | 18 -------- pyproject.toml | 17 ++++++++ requirements.txt | 10 ----- src/ecoff_fitter/__init__.py | 7 ++- src/ecoff_fitter/cli.py | 3 +- src/ecoff_fitter/core.py | 45 ++++++++++++++----- src/ecoff_fitter/defence.py | 29 +++++++----- src/ecoff_fitter/graphs.py | 27 +++++++----- src/ecoff_fitter/mixture.py | 47 +++++++++++++++----- src/ecoff_fitter/report.py | 85 +++++++++++++++++++++--------------- src/ecoff_fitter/utils.py | 73 ++++++++++++++++++------------- tests/test_utils.py | 4 +- 14 files changed, 246 insertions(+), 164 deletions(-) create mode 100644 .github/workflows/mypy.yml delete mode 100644 env.yml delete mode 100644 requirements.txt diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 0000000..2554ed7 --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,24 @@ +name: mypy + +on: [push, pull_request] + +jobs: + type-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install package + dev deps + run: | + pip install .[dev] + + - name: Run MyPy + run: | + mypy src/ecoff_fitter --pretty diff --git a/README.md b/README.md index c4ea53c..718eca9 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Demo input files are provided in `demo_files/` to illustrate basic use. --- -## 📦 Installation +## 🛠 Installation ### Install from PyPI @@ -39,25 +39,6 @@ pip install -e . --- -## 🛠 Creating the Environment - -### Conda environment (env.yml) - -```bash -conda env create -f env.yml -conda activate ECOFFitter -``` - -### Pip environment (requirements.txt) - -```bash -python -m venv ecoff-env -source ecoff-env/bin/activate -pip install -r requirements.txt -``` - ---- - ## 📥 Input ### 1. MIC Data Input File diff --git a/env.yml b/env.yml deleted file mode 100644 index 97372b4..0000000 --- a/env.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: ECOFFitter -channels: - - conda-forge - - defaults -dependencies: - - python - - scipy - - pytest - - scikit-learn - - pandas - - joblib - - yaml - - pyyaml - - matplotlib - - pytest-cov - - pip - - pip: - - intreg diff --git a/pyproject.toml b/pyproject.toml index 617a12b..b28a487 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,3 +50,20 @@ omit = [ "*/ecoff_fitter/wts.py", "*/gui.py" ] + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_ignores = true +warn_redundant_casts = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +ignore_missing_imports = true +allow_redefinition = true +no_implicit_optional = true +implicit_reexport = true +exclude = "src/ecoff_fitter/wts.py" + +[project.optional-dependencies] +dev = ["mypy", "pandas-stubs", "types-PyYAML", "numpy-stubs", "scipy-stubs"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e93a0b7..0000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -scipy -pytest -scikit-learn -pandas -joblib -PyYAML -matplotlib -pytest-cov -intreg -openpyxl diff --git a/src/ecoff_fitter/__init__.py b/src/ecoff_fitter/__init__.py index fbb240b..9d48a6a 100644 --- a/src/ecoff_fitter/__init__.py +++ b/src/ecoff_fitter/__init__.py @@ -1,3 +1,6 @@ +from typing import Callable + + """ ECOFF Fitter — Estimate epidemiological cutoff values (ECOFFs) using interval regression on MIC (Minimum Inhibitory Concentration) data. @@ -16,7 +19,7 @@ # --- Public API imports --- from .core import ECOFFitter -__all__ = ["ECOFFitter"] +__all__: list[str] = ["ECOFFitter"] # --- Optional: version handling --- try: @@ -25,7 +28,7 @@ __version__ = "0.0.0" # --- Optional: CLI hook for `python -m ecoff_fitter` --- -def main(): +def main() -> None: """Entry point for running ecoff_fitter as a module (CLI).""" from .cli import main as cli_main cli_main() diff --git a/src/ecoff_fitter/cli.py b/src/ecoff_fitter/cli.py index 0cb8648..e49c5f6 100644 --- a/src/ecoff_fitter/cli.py +++ b/src/ecoff_fitter/cli.py @@ -7,6 +7,7 @@ """ import argparse +from typing import Any, List, Optional from ecoff_fitter import ECOFFitter from ecoff_fitter.report import GenerateReport from ecoff_fitter.defence import validate_output_path @@ -74,7 +75,7 @@ def build_parser() -> argparse.ArgumentParser: return parser -def main(argv=None): +def main(argv: Optional[List[str]] = None) -> None: """Main entry point for the ECOFFitter CLI.""" parser = build_parser() args = parser.parse_args(argv) diff --git a/src/ecoff_fitter/core.py b/src/ecoff_fitter/core.py index 360e6cd..6863363 100644 --- a/src/ecoff_fitter/core.py +++ b/src/ecoff_fitter/core.py @@ -1,4 +1,7 @@ import numpy as np +from typing import Any, Optional, Tuple +import pandas as pd +from numpy.typing import NDArray from scipy.stats import norm from intreg.intreg import IntReg from ecoff_fitter.utils import read_input, read_params @@ -24,14 +27,28 @@ class ECOFFitter: (wild-type) component at the given percentile. """ + model_: IntReg | MixtureModel | None + x: NDArray[np.float_] + mus_: NDArray[np.float_] + sigmas_: NDArray[np.float_] + pis_: NDArray[np.float_] + loglike_: float + converged_: bool + n_iter_: int | None + ecoff_: float + z_percentile_: float + y_low_: NDArray[np.float_] + y_high_: NDArray[np.float_] + weights_: NDArray[np.float_] + def __init__( self, - input, - params: dict | str | None = None, + input: pd.DataFrame | str, + params: dict[str, Any] | str | None = None, dilution_factor: int = 2, distributions: int = 1, boundary_support: int | None = 1, - ): + ) -> None: """ Initialize the ECOFFitter. @@ -75,7 +92,7 @@ def __init__( self.distributions = distributions self.boundary_support = boundary_support - def fit(self, options={}): + def fit(self, options: dict[str, Any] | None = None) -> "ECOFFitter": """ Define MIC intervals and fit either a single censored-normal model or a finite mixture model. @@ -96,7 +113,7 @@ def fit(self, options={}): # multiple gaussians return self.fit_mixture(options) - def fit_single(self, options=None): + def fit_single(self, options: dict[str, Any] | None = None) -> "ECOFFitter": """ Fit a single-component censored normal distribution using interval regression. @@ -122,7 +139,9 @@ def fit_single(self, options=None): self.converged_ = result.success self.n_iter_ = result.nit if hasattr(result, "nit") else None - def fit_mixture(self, options=None): + return self + + def fit_mixture(self, options: dict[str, Any] | None = None) -> "ECOFFitter": """ Fit a K-component finite mixture of censored normals using the EM algorithm followed by optional refinement. @@ -156,7 +175,9 @@ def fit_mixture(self, options=None): return self - def define_intervals(self, df=None): + def define_intervals( + self, df: Optional[pd.DataFrame] = None + ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: """ Construct MIC interval bounds and apply left-, right-, and interval- censoring rules, then transform to log dilution space. @@ -210,7 +231,9 @@ def define_intervals(self, df=None): return y_low_log, y_high_log, weights - def log_transf_intervals(self, y_low, y_high): + def log_transf_intervals( + self, y_low: NDArray[np.float_], y_high: NDArray[np.float_] + ) -> Tuple[NDArray[np.float_], NDArray[np.float_]]: """ Transform interval bounds into log base–dilution_factor space. @@ -231,7 +254,9 @@ def log_transf_intervals(self, y_low, y_high): return y_low_log, y_high_log - def generate(self, percentile: int | float = 99, options={}): + def generate( + self, percentile: int | float = 99, options: dict[str, Any] | None = None + ) -> Tuple[Any, ...]: """ Fit the model and compute the ECOFF at a specified percentile. @@ -253,7 +278,7 @@ def generate(self, percentile: int | float = 99, options={}): return results - def compute_ecoff(self, percentile: float): + def compute_ecoff(self, percentile: float) -> Tuple[Any, ...]: """ Compute the ECOFF and percentile location from the fitted model. diff --git a/src/ecoff_fitter/defence.py b/src/ecoff_fitter/defence.py index c583673..c96251f 100644 --- a/src/ecoff_fitter/defence.py +++ b/src/ecoff_fitter/defence.py @@ -1,9 +1,11 @@ import pandas as pd +from typing import Any +from pandas import DataFrame import os import re -def validate_input_source(input): +def validate_input_source(input: str | DataFrame | dict[str, Any]) -> None: """ Validate the input source for ECOFFitter. @@ -32,7 +34,9 @@ def validate_input_source(input): raise ValueError("Input must be a pandas DataFrame or a valid file path.") -def validate_params_source(params): +def validate_params_source( + params: dict[str, Any] | str | list[Any] | tuple[Any, ...] | DataFrame | Any | None, +) -> None: """ Pre-validate the params argument before attempting to read it. @@ -69,8 +73,7 @@ def validate_params_source(params): ) - -def validate_mic_data(df): +def validate_mic_data(df: DataFrame) -> None: """ Validate MIC and observations columns. @@ -97,7 +100,9 @@ def validate_mic_data(df): raise ValueError(f"Invalid MIC format found in rows: {bad_rows.index.tolist()}") -def validate_params(dilution_factor, distributions, boundary_support): +def validate_params( + dilution_factor: int, distributions: int, boundary_support: int | None +) -> None: """ Validate ECOFFitter configuration values. @@ -112,17 +117,18 @@ def validate_params(dilution_factor, distributions, boundary_support): if not isinstance(dilution_factor, int) or dilution_factor <= 1: raise ValueError("dilution_factor must be an integer > 1.") - + if not isinstance(distributions, int): - raise NotImplementedError("The number of mixture components must be an integer.") - + raise NotImplementedError( + "The number of mixture components must be an integer." + ) + if boundary_support is not None and ( not isinstance(boundary_support, int) or boundary_support < 0 ): raise ValueError("boundary_support must be a non-negative integer or None.") - def validate_output_path(path: str) -> bool: """ Checks if the given path is safe and writable, and that the file extension is .txt or .pdf. @@ -130,11 +136,11 @@ def validate_output_path(path: str) -> bool: Returns True if valid, otherwise raises ValueError. """ # Check extension - allowed_exts = ('.txt', '.pdf') + allowed_exts = (".txt", ".pdf") if not path.lower().endswith(allowed_exts): raise ValueError(f"File must end with {allowed_exts}, got '{path}'") - directory = os.path.dirname(path) or '.' + directory = os.path.dirname(path) or "." if not os.path.exists(directory): raise ValueError(f"Directory does not exist: {directory}") @@ -143,4 +149,3 @@ def validate_output_path(path: str) -> bool: raise PermissionError(f"No write permission in directory: {directory}") return True - diff --git a/src/ecoff_fitter/graphs.py b/src/ecoff_fitter/graphs.py index 13e02cb..f5bfd57 100644 --- a/src/ecoff_fitter/graphs.py +++ b/src/ecoff_fitter/graphs.py @@ -1,21 +1,24 @@ import numpy as np +from typing import Optional +from numpy.typing import NDArray +import matplotlib.axes import matplotlib.pyplot as plt from scipy.stats import norm def plot_mic_distribution( - low_log, - high_log, - weights, - dilution_factor, - mus, - sigmas, - pis=None, - log2_ecoff=None, - global_x_min=None, - global_x_max=None, - ax=None, -): + low_log: NDArray[np.float_], + high_log: NDArray[np.float_], + weights: NDArray[np.float_], + dilution_factor: float | int, + mus: NDArray[np.float_] | list[float], + sigmas: NDArray[np.float_] | list[float], + pis: Optional[NDArray[np.float_] | list[float]] = None, + log2_ecoff: Optional[float] = None, + global_x_min: Optional[float] = None, + global_x_max: Optional[float] = None, + ax: Optional[matplotlib.axes.Axes] = None, +) -> matplotlib.axes.Axes: """ Plot MIC intervals with a K-component Gaussian mixture fit. Supports left- and right-censoring with visual tail extensions. diff --git a/src/ecoff_fitter/mixture.py b/src/ecoff_fitter/mixture.py index a5cdb03..11ea20c 100644 --- a/src/ecoff_fitter/mixture.py +++ b/src/ecoff_fitter/mixture.py @@ -1,4 +1,6 @@ import numpy as np +from typing import Optional, Tuple +from numpy.typing import NDArray from intreg.intreg import IntReg from sklearn.cluster import KMeans from scipy.optimize import minimize @@ -15,7 +17,27 @@ class MixtureModel: 3. Optional refinement using mixture likelihood via L-BFGS-B. """ - def __init__(self, y_low, y_high, weights, distributions): + y_low: NDArray[np.float_] + y_high: NDArray[np.float_] + weights: NDArray[np.float_] + + mus: NDArray[np.float_] + sigmas: NDArray[np.float_] + pis: NDArray[np.float_] + + x: NDArray[np.float_] + converged: bool + n_iter: int + loglike: float + params_: dict[str, float] + + def __init__( + self, + y_low: NDArray[np.float_] | list[float], + y_high: NDArray[np.float_] | list[float], + weights: NDArray[np.float_] | list[float], + distributions: int, + ) -> None: """ Initialise a K-component mixture model using K-means clustering. @@ -39,14 +61,14 @@ def __init__(self, y_low, y_high, weights, distributions): # Find finite bounds finite_high = np.max(y_high[np.isfinite(y_high)]) - finite_low = np.min(y_low[np.isfinite(y_low)]) + finite_low = np.min(y_low[np.isfinite(y_low)]) # KMeans cannot handle inf → substitute only for midpoint computation - y_low_km = y_low.copy() + y_low_km = y_low.copy() y_high_km = y_high.copy() y_high_km[np.isinf(y_high_km)] = finite_high + 1.0 - y_low_km[np.isinf(y_low_km)] = finite_low - 1.0 + y_low_km[np.isinf(y_low_km)] = finite_low - 1.0 mid = (y_low_km + y_high_km) / 2 mid_reshaped = mid.reshape(-1, 1) @@ -57,7 +79,6 @@ def __init__(self, y_low, y_high, weights, distributions): random_state=0, ).fit(mid_reshaped, sample_weight=weights) - # Cluster centres → initial mus mus = kmeans.cluster_centers_.flatten() @@ -84,7 +105,9 @@ def __init__(self, y_low, y_high, weights, distributions): self.y_low, self.y_high, self.weights = y_low, y_high, weights self.mus, self.sigmas, self.pis = mus, sigmas, pis - def fit(self, max_iter=500, tol=1e-6, refine=True): + def fit( + self, max_iter: int = 500, tol: float = 1e-6, refine: bool = True + ) -> "MixtureModel": """ Fit the mixture model using EM and optional refinement. @@ -105,7 +128,7 @@ def fit(self, max_iter=500, tol=1e-6, refine=True): return self - def em(self, max_iter=500, tol=1e-6): + def em(self, max_iter: int = 500, tol: float = 1e-6) -> "MixtureModel": """ Expectation–Maximization (EM) algorithm for a K-component mixture of interval-censored normal distributions. @@ -197,7 +220,7 @@ def em(self, max_iter=500, tol=1e-6): return self - def refine_mixture(self): + def refine_mixture(self) -> "MixtureModel": """ Refinement step for a general K-component mixture model using L-BFGS-B. @@ -213,7 +236,9 @@ def refine_mixture(self): y_high = np.asarray(self.y_high, float) weights = np.asarray(self.weights, float) - def unpack_params(params): + def unpack_params( + params: NDArray[np.float_], + ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: """Convert flat parameter vector into mus, sigmas, pis.""" mus = params[: self.K] sigmas = np.exp(params[self.K : 2 * self.K]) @@ -226,7 +251,7 @@ def unpack_params(params): return mus, sigmas, pis - def neg_log_likelihood(params): + def neg_log_likelihood(params: NDArray[np.float_]) -> float: mus, sigmas, pis = unpack_params(params) # P(interval | component k) @@ -241,7 +266,7 @@ def neg_log_likelihood(params): mix = p_mat @ pis mix = np.clip(mix, 1e-300, np.inf) - return -np.sum(weights * np.log(mix)) + return float(-np.sum(weights * np.log(mix))) res = minimize(neg_log_likelihood, self.x, method="L-BFGS-B") self.x = res.x diff --git a/src/ecoff_fitter/report.py b/src/ecoff_fitter/report.py index a32b3d1..64c55ad 100644 --- a/src/ecoff_fitter/report.py +++ b/src/ecoff_fitter/report.py @@ -1,5 +1,7 @@ from dataclasses import dataclass -from typing import Any +from typing import Any, Tuple, Optional, Dict, cast +from matplotlib.figure import Figure +from numpy.typing import NDArray import numpy as np from matplotlib.backends.backend_pdf import PdfPages from ecoff_fitter.graphs import plot_mic_distribution @@ -15,12 +17,12 @@ class GenerateReport: avoiding duplication of distributions, mus, sigmas, intervals, etc. """ - fitter: Any # The ECOFFitter used to generate the results - ecoff: float # ECOFF value - z: tuple # Percentile-based ECOFFs (99, 97.5, 95) + fitter: Any + ecoff: float + z: Tuple[float, float, float] # Percentile-based ECOFFs (99, 97.5, 95) @classmethod - def from_fitter(cls, fitter, result): + def from_fitter(cls, fitter: Any, result: Tuple[Any, ...]) -> "GenerateReport": """ Construct a GenerateReport from an ECOFFitter and generate() output. @@ -33,7 +35,7 @@ def from_fitter(cls, fitter, result): z1 = fitter.compute_ecoff(percentile=97.5)[0] z2 = fitter.compute_ecoff(percentile=95)[0] - ecoff = result[0] # first element always ECOFF + ecoff = result[0] # first element always ECOFF return cls( fitter=fitter, @@ -42,34 +44,44 @@ def from_fitter(cls, fitter, result): ) @property - def distributions(self): - return self.fitter.distributions + def distributions(self) -> int: + return cast(int, self.fitter.distributions) @property - def dilution_factor(self): - return self.fitter.dilution_factor + def dilution_factor(self) -> float: + return cast(float, self.fitter.dilution_factor) @property - def mus(self): - return self.fitter.mus_ + def mus(self) -> NDArray[np.float_]: + return cast(NDArray[np.float_], self.fitter.mus_) @property - def sigmas(self): - return self.fitter.sigmas_ - + def sigmas(self) -> NDArray[np.float_]: + return cast(NDArray[np.float_], self.fitter.sigmas_) + @property - def pis(self): + def pis(self) -> Optional[NDArray[np.float_]]: return getattr(self.fitter, "pis_", None) @property - def model(self): + def model(self) -> Any: return getattr(self.fitter, "model_", None) + @property - def intervals(self): - return self.fitter.define_intervals() + def intervals( + self, + ) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: + return cast( + tuple[ + NDArray[np.float_], + NDArray[np.float_], + NDArray[np.float_] + ], + self.fitter.define_intervals(), + ) - def print_stats(self, verbose=False): + def print_stats(self, verbose: bool = False) -> None: print(f"\nECOFF (original scale): {self.ecoff:.2}") if self.distributions == 1: @@ -80,15 +92,16 @@ def print_stats(self, verbose=False): else: print("\nComponent means and sigmas (original scale):") for i, (mu, sigma) in enumerate(zip(self.mus, self.sigmas), start=1): - print(f" μ{i}: {self.dilution_factor**mu:.4f}, " - f"σ{i} (folds): {self.dilution_factor**sigma:.4f}") + print( + f" μ{i}: {self.dilution_factor**mu:.4f}, " + f"σ{i} (folds): {self.dilution_factor**sigma:.4f}" + ) if verbose and self.model is not None: print("\n--- Model details ---") print(self.model) - - def write_out(self, path: str): + def write_out(self, path: str) -> None: z0, z1, z2 = self.z with open(path, "w") as f: @@ -113,8 +126,7 @@ def write_out(self, path: str): print(f"\nResults saved to: {path}") - - def save_pdf(self, outfile: str): + def save_pdf(self, outfile: str) -> None: with PdfPages(outfile) as pdf: fig = self._make_pdf() pdf.savefig(fig) @@ -122,11 +134,9 @@ def save_pdf(self, outfile: str): print(f"PDF report saved to: {outfile}") - - def _make_pdf(self, title=None): + def _make_pdf(self, title: Optional[str] = None) -> Figure: fig, (ax_plot, ax_text) = plt.subplots( - nrows=1, ncols=2, figsize=(10, 4), - gridspec_kw={"width_ratios": [2, 1]} + nrows=1, ncols=2, figsize=(10, 4), gridspec_kw={"width_ratios": [2, 1]} ) low_log, high_log, weights = self.intervals @@ -173,21 +183,26 @@ def _make_pdf(self, title=None): ) ax_text.text( - 0.05, 0.9, + 0.05, + 0.9, "\n".join(lines), fontsize=11, va="top", family="monospace", ) - fig.tight_layout(rect=[0, 0, 1, 0.95]) + fig.tight_layout(rect=(0, 0, 1, 0.95)) return fig - class CombinedReport: - def __init__(self, outfile, global_report, individual_reports): + def __init__( + self, + outfile: str, + global_report: GenerateReport, + individual_reports: Dict[str, GenerateReport], + ) -> None: """ outfile: PDF filename global_report: GenerateReport instance @@ -197,7 +212,7 @@ def __init__(self, outfile, global_report, individual_reports): self.global_report = global_report self.individual_reports = individual_reports - def save_pdf(self): + def save_pdf(self) -> None: from matplotlib.backends.backend_pdf import PdfPages with PdfPages(self.outfile) as pdf: diff --git a/src/ecoff_fitter/utils.py b/src/ecoff_fitter/utils.py index d1625b0..096371e 100644 --- a/src/ecoff_fitter/utils.py +++ b/src/ecoff_fitter/utils.py @@ -1,9 +1,15 @@ +from typing import Any, Dict, List, Tuple, Optional, cast import pandas as pd +from pandas import DataFrame +from numpy.typing import NDArray import yaml import os -def read_input(data, sheet_name=None): +def read_input( + data: DataFrame | list[Any] | tuple[Any, ...] | dict[str, Any] | str, + sheet_name: Optional[str] = None, +) -> DataFrame: """ Read MIC input data from a DataFrame, array-like, dict, or file and validate required columns. If given a single-column input, @@ -36,25 +42,26 @@ def read_input(data, sheet_name=None): elif ext in [".tsv", ".txt"]: df = pd.read_csv(data, sep=r"\s+") elif ext in [".xlsx", ".xls"]: - df = pd.read_excel(data, sheet_name=sheet_name) + val = pd.read_excel(data, sheet_name=sheet_name) + if isinstance(val, dict): + # choose a sheet, or raise error + df = next(iter(val.values())) # first sheet + else: + df = val else: raise ValueError(f"Unsupported file type: {ext}") else: raise ValueError("Input must be DataFrame, list, array, dict, or file path.") - + df.columns = [str(c).strip() for c in df.columns] # Handle single-column input automatically if df.shape[1] == 1: col = df.columns[0] df["MIC"] = df[col].astype(str).str.strip() - - df = ( - df.groupby("MIC") - .size() - .reset_index(name="observations") - ) + + df = df.groupby("MIC").size().reset_index(name="observations") expected = ["MIC", "observations"] missing = [c for c in expected if c not in df.columns] @@ -67,15 +74,19 @@ def read_input(data, sheet_name=None): df["MIC"] = df["MIC"].astype(str).str.strip() df["observations"] = ( - pd.to_numeric(df["observations"], errors="coerce") - .fillna(0) - .astype(int) + pd.to_numeric(df["observations"], errors="coerce").fillna(0).astype(int) ) df = df.dropna(subset=["MIC"]).reset_index(drop=True) return df -def read_params(params, dflt_dilution, dflt_dists, dflt_tails): + +def read_params( + params: str | dict[str, Any], + dflt_dilution: int, + dflt_dists: int, + dflt_tails: Optional[int], +) -> Tuple[int, int, Optional[int], float]: """ Read ECOFF model parameters from a file or dictionary, falling back to provided defaults. @@ -106,7 +117,8 @@ def read_params(params, dflt_dilution, dflt_dists, dflt_tails): params = yaml.safe_load(f) or {} elif ext == ".txt": - parsed = {} + + parsed: Dict[str, Any] = {} with open(params, "r") as f: for line in f: line = line.strip() @@ -139,12 +151,16 @@ def read_params(params, dflt_dilution, dflt_dists, dflt_tails): dilution_factor = params.get("dilution_factor", dflt_dilution) distributions = params.get("distributions", dflt_dists) boundary_support = params.get("boundary_support", dflt_tails) - percentile = params.get("percentile", None) + percentile = params.get("percentile", 99) return dilution_factor, distributions, boundary_support, percentile -def read_multi_obs_input(data, sheet_name=None): +def read_multi_obs_input( + data: DataFrame | list[Any] | tuple[Any, ...] | dict[str, Any] | Any | str, + sheet_name: Optional[str] = None, +) -> Dict[str, Any]: + """ Read MIC input but allow multiple observation columns. Returns a dict: @@ -179,7 +195,12 @@ def read_multi_obs_input(data, sheet_name=None): elif ext in [".tsv", ".txt"]: df = pd.read_csv(data, sep=r"\s+") elif ext in [".xlsx", ".xls"]: - df = pd.read_excel(data, sheet_name='Sheet1') + val = pd.read_excel(data, sheet_name=sheet_name) + if isinstance(val, dict): + # choose a sheet, or raise error + df = next(iter(val.values())) # first sheet + else: + df = val else: raise ValueError(f"Unsupported file type: {ext}") @@ -193,15 +214,9 @@ def read_multi_obs_input(data, sheet_name=None): if df.shape[1] == 1: col = df.columns[0] df["MIC"] = df[col].astype(str).str.strip() - df_single = ( - df.groupby("MIC") - .size() - .reset_index(name="observations") - ) - return { - "global": df_single, - "individual": {"observations": df_single.copy()} - } + df_single = df.groupby("MIC").size().reset_index(name="observations") + return cast(Dict[str, Any], {"global": df_single, "individual": {"observations": df_single.copy()}}) + # Require MIC column if "MIC" not in df.columns: @@ -227,8 +242,4 @@ def read_multi_obs_input(data, sheet_name=None): df_global = df[["MIC"]].copy() df_global["observations"] = df[obs_cols].sum(axis=1).astype(int) - return { - "global": df_global, - "individual": individual - } - + return {"global": df_global, "individual": individual} diff --git a/tests/test_utils.py b/tests/test_utils.py index 9f166e0..06a9aae 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -114,8 +114,8 @@ def test_read_params_dict(): # Defaults respected assert tails is None - #check default percentile = None from parser - assert percentile is None + #check default percentile = 99 from parser + assert percentile == 99 def test_read_params_txt_invalid_format(tmp_path): From 1da10179bbeff8d276435f24756604276cae5a5f Mon Sep 17 00:00:00 2001 From: DylanAdlard Date: Mon, 15 Dec 2025 11:06:31 +0200 Subject: [PATCH 2/4] typing extra --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b28a487..e286b57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,4 +66,4 @@ implicit_reexport = true exclude = "src/ecoff_fitter/wts.py" [project.optional-dependencies] -dev = ["mypy", "pandas-stubs", "types-PyYAML", "numpy-stubs", "scipy-stubs"] +dev = ["mypy", "pandas-stubs", "types-PyYAML", "scipy-stubs"] From 6aab083bfa4cf214a6518cf68e2b60d71243306b Mon Sep 17 00:00:00 2001 From: DylanAdlard Date: Mon, 15 Dec 2025 11:10:22 +0200 Subject: [PATCH 3/4] np floating --- src/ecoff_fitter/core.py | 20 ++++++++++---------- src/ecoff_fitter/graphs.py | 12 ++++++------ src/ecoff_fitter/mixture.py | 26 +++++++++++++------------- src/ecoff_fitter/report.py | 18 +++++++++--------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/ecoff_fitter/core.py b/src/ecoff_fitter/core.py index 6863363..0b30c3f 100644 --- a/src/ecoff_fitter/core.py +++ b/src/ecoff_fitter/core.py @@ -28,18 +28,18 @@ class ECOFFitter: """ model_: IntReg | MixtureModel | None - x: NDArray[np.float_] - mus_: NDArray[np.float_] - sigmas_: NDArray[np.float_] - pis_: NDArray[np.float_] + x: NDArray[np.floating] + mus_: NDArray[np.floating] + sigmas_: NDArray[np.floating] + pis_: NDArray[np.floating] loglike_: float converged_: bool n_iter_: int | None ecoff_: float z_percentile_: float - y_low_: NDArray[np.float_] - y_high_: NDArray[np.float_] - weights_: NDArray[np.float_] + y_low_: NDArray[np.floating] + y_high_: NDArray[np.floating] + weights_: NDArray[np.floating] def __init__( self, @@ -177,7 +177,7 @@ def fit_mixture(self, options: dict[str, Any] | None = None) -> "ECOFFitter": def define_intervals( self, df: Optional[pd.DataFrame] = None - ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: + ) -> Tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]: """ Construct MIC interval bounds and apply left-, right-, and interval- censoring rules, then transform to log dilution space. @@ -232,8 +232,8 @@ def define_intervals( return y_low_log, y_high_log, weights def log_transf_intervals( - self, y_low: NDArray[np.float_], y_high: NDArray[np.float_] - ) -> Tuple[NDArray[np.float_], NDArray[np.float_]]: + self, y_low: NDArray[np.floating], y_high: NDArray[np.floating] + ) -> Tuple[NDArray[np.floating], NDArray[np.floating]]: """ Transform interval bounds into log base–dilution_factor space. diff --git a/src/ecoff_fitter/graphs.py b/src/ecoff_fitter/graphs.py index f5bfd57..d5f85fa 100644 --- a/src/ecoff_fitter/graphs.py +++ b/src/ecoff_fitter/graphs.py @@ -7,13 +7,13 @@ def plot_mic_distribution( - low_log: NDArray[np.float_], - high_log: NDArray[np.float_], - weights: NDArray[np.float_], + low_log: NDArray[np.floating], + high_log: NDArray[np.floating], + weights: NDArray[np.floating], dilution_factor: float | int, - mus: NDArray[np.float_] | list[float], - sigmas: NDArray[np.float_] | list[float], - pis: Optional[NDArray[np.float_] | list[float]] = None, + mus: NDArray[np.floating] | list[float], + sigmas: NDArray[np.floating] | list[float], + pis: Optional[NDArray[np.floating] | list[float]] = None, log2_ecoff: Optional[float] = None, global_x_min: Optional[float] = None, global_x_max: Optional[float] = None, diff --git a/src/ecoff_fitter/mixture.py b/src/ecoff_fitter/mixture.py index 11ea20c..18d82a4 100644 --- a/src/ecoff_fitter/mixture.py +++ b/src/ecoff_fitter/mixture.py @@ -17,15 +17,15 @@ class MixtureModel: 3. Optional refinement using mixture likelihood via L-BFGS-B. """ - y_low: NDArray[np.float_] - y_high: NDArray[np.float_] - weights: NDArray[np.float_] + y_low: NDArray[np.floating] + y_high: NDArray[np.floating] + weights: NDArray[np.floating] - mus: NDArray[np.float_] - sigmas: NDArray[np.float_] - pis: NDArray[np.float_] + mus: NDArray[np.floating] + sigmas: NDArray[np.floating] + pis: NDArray[np.floating] - x: NDArray[np.float_] + x: NDArray[np.floating] converged: bool n_iter: int loglike: float @@ -33,9 +33,9 @@ class MixtureModel: def __init__( self, - y_low: NDArray[np.float_] | list[float], - y_high: NDArray[np.float_] | list[float], - weights: NDArray[np.float_] | list[float], + y_low: NDArray[np.floating] | list[float], + y_high: NDArray[np.floating] | list[float], + weights: NDArray[np.floating] | list[float], distributions: int, ) -> None: """ @@ -237,8 +237,8 @@ def refine_mixture(self) -> "MixtureModel": weights = np.asarray(self.weights, float) def unpack_params( - params: NDArray[np.float_], - ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: + params: NDArray[np.floating], + ) -> Tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]: """Convert flat parameter vector into mus, sigmas, pis.""" mus = params[: self.K] sigmas = np.exp(params[self.K : 2 * self.K]) @@ -251,7 +251,7 @@ def unpack_params( return mus, sigmas, pis - def neg_log_likelihood(params: NDArray[np.float_]) -> float: + def neg_log_likelihood(params: NDArray[np.floating]) -> float: mus, sigmas, pis = unpack_params(params) # P(interval | component k) diff --git a/src/ecoff_fitter/report.py b/src/ecoff_fitter/report.py index 64c55ad..6fe2df6 100644 --- a/src/ecoff_fitter/report.py +++ b/src/ecoff_fitter/report.py @@ -52,15 +52,15 @@ def dilution_factor(self) -> float: return cast(float, self.fitter.dilution_factor) @property - def mus(self) -> NDArray[np.float_]: - return cast(NDArray[np.float_], self.fitter.mus_) + def mus(self) -> NDArray[np.floating]: + return cast(NDArray[np.floating], self.fitter.mus_) @property - def sigmas(self) -> NDArray[np.float_]: - return cast(NDArray[np.float_], self.fitter.sigmas_) + def sigmas(self) -> NDArray[np.floating]: + return cast(NDArray[np.floating], self.fitter.sigmas_) @property - def pis(self) -> Optional[NDArray[np.float_]]: + def pis(self) -> Optional[NDArray[np.floating]]: return getattr(self.fitter, "pis_", None) @property @@ -71,12 +71,12 @@ def model(self) -> Any: @property def intervals( self, - ) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: + ) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]: return cast( tuple[ - NDArray[np.float_], - NDArray[np.float_], - NDArray[np.float_] + NDArray[np.floating], + NDArray[np.floating], + NDArray[np.floating] ], self.fitter.define_intervals(), ) From 356d7ea9e54a40d8153c27224b139d336fcb8fd1 Mon Sep 17 00:00:00 2001 From: DylanAdlard Date: Mon, 15 Dec 2025 11:17:32 +0200 Subject: [PATCH 4/4] minor --- env.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 env.yml diff --git a/env.yml b/env.yml new file mode 100644 index 0000000..97372b4 --- /dev/null +++ b/env.yml @@ -0,0 +1,18 @@ +name: ECOFFitter +channels: + - conda-forge + - defaults +dependencies: + - python + - scipy + - pytest + - scikit-learn + - pandas + - joblib + - yaml + - pyyaml + - matplotlib + - pytest-cov + - pip + - pip: + - intreg