From 1361b1eda9831b6e883a66663dda9bdd8b8ce2db Mon Sep 17 00:00:00 2001
From: DylanAdlard <dylan.adlard@lmh.ox.ac.uk>
Date: Mon, 15 Dec 2025 11:05:01 +0200
Subject: [PATCH 1/4] mypy

---
 .github/workflows/mypy.yml   | 24 ++++++++++
 README.md                    | 21 +--------
 env.yml                      | 18 --------
 pyproject.toml               | 17 ++++++++
 requirements.txt             | 10 -----
 src/ecoff_fitter/__init__.py |  7 ++-
 src/ecoff_fitter/cli.py      |  3 +-
 src/ecoff_fitter/core.py     | 45 ++++++++++++++-----
 src/ecoff_fitter/defence.py  | 29 +++++++-----
 src/ecoff_fitter/graphs.py   | 27 +++++++-----
 src/ecoff_fitter/mixture.py  | 47 +++++++++++++++-----
 src/ecoff_fitter/report.py   | 85 +++++++++++++++++++++---------------
 src/ecoff_fitter/utils.py    | 73 ++++++++++++++++++-------------
 tests/test_utils.py          |  4 +-
 14 files changed, 246 insertions(+), 164 deletions(-)
 create mode 100644 .github/workflows/mypy.yml
 delete mode 100644 env.yml
 delete mode 100644 requirements.txt

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
new file mode 100644
index 0000000..2554ed7
--- /dev/null
+++ b/.github/workflows/mypy.yml
@@ -0,0 +1,24 @@
+name: mypy
+
+on: [push, pull_request]
+
+jobs:
+  type-check:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install package + dev deps
+        run: |
+          pip install .[dev]
+
+      - name: Run MyPy
+        run: |
+          mypy src/ecoff_fitter --pretty
diff --git a/README.md b/README.md
index c4ea53c..718eca9 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Demo input files are provided in `demo_files/` to illustrate basic use.
 
 ---
 
-## 📦 Installation
+## 🛠 Installation
 
 ### Install from PyPI
 
@@ -39,25 +39,6 @@ pip install -e .
 
 ---
 
-## 🛠 Creating the Environment
-
-### Conda environment (env.yml)
-
-```bash
-conda env create -f env.yml
-conda activate ECOFFitter
-```
-
-### Pip environment (requirements.txt)
-
-```bash
-python -m venv ecoff-env
-source ecoff-env/bin/activate
-pip install -r requirements.txt
-```
-
----
-
 ## 📥 Input
 
 ### 1. MIC Data Input File
diff --git a/env.yml b/env.yml
deleted file mode 100644
index 97372b4..0000000
--- a/env.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: ECOFFitter
-channels:
-  - conda-forge
-  - defaults
-dependencies:
-  - python
-  - scipy
-  - pytest
-  - scikit-learn
-  - pandas
-  - joblib
-  - yaml
-  - pyyaml
-  - matplotlib
-  - pytest-cov
-  - pip
-  - pip:
-      - intreg
diff --git a/pyproject.toml b/pyproject.toml
index 617a12b..b28a487 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,3 +50,20 @@ omit = [
     "*/ecoff_fitter/wts.py",
     "*/gui.py"
 ]
+
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_ignores = true
+warn_redundant_casts = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+ignore_missing_imports = true
+allow_redefinition = true
+no_implicit_optional = true
+implicit_reexport = true
+exclude = "src/ecoff_fitter/wts.py"
+
+[project.optional-dependencies]
+dev = ["mypy", "pandas-stubs", "types-PyYAML", "numpy-stubs", "scipy-stubs"]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index e93a0b7..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-scipy
-pytest
-scikit-learn
-pandas
-joblib
-PyYAML
-matplotlib
-pytest-cov
-intreg
-openpyxl
diff --git a/src/ecoff_fitter/__init__.py b/src/ecoff_fitter/__init__.py
index fbb240b..9d48a6a 100644
--- a/src/ecoff_fitter/__init__.py
+++ b/src/ecoff_fitter/__init__.py
@@ -1,3 +1,6 @@
+from typing import Callable
+
+
 """
 ECOFF Fitter — Estimate epidemiological cutoff values (ECOFFs)
 using interval regression on MIC (Minimum Inhibitory Concentration) data.
@@ -16,7 +19,7 @@
 # --- Public API imports ---
 from .core import ECOFFitter
 
-__all__ = ["ECOFFitter"]
+__all__: list[str] = ["ECOFFitter"]
 
 # --- Optional: version handling ---
 try:
@@ -25,7 +28,7 @@
     __version__ = "0.0.0"
 
 # --- Optional: CLI hook for `python -m ecoff_fitter` ---
-def main():
+def main() -> None:
     """Entry point for running ecoff_fitter as a module (CLI)."""
     from .cli import main as cli_main
     cli_main()
diff --git a/src/ecoff_fitter/cli.py b/src/ecoff_fitter/cli.py
index 0cb8648..e49c5f6 100644
--- a/src/ecoff_fitter/cli.py
+++ b/src/ecoff_fitter/cli.py
@@ -7,6 +7,7 @@
 """
 
 import argparse
+from typing import Any, List, Optional
 from ecoff_fitter import ECOFFitter
 from ecoff_fitter.report import GenerateReport
 from ecoff_fitter.defence import validate_output_path
@@ -74,7 +75,7 @@ def build_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def main(argv=None):
+def main(argv: Optional[List[str]] = None) -> None:
     """Main entry point for the ECOFFitter CLI."""
     parser = build_parser()
     args = parser.parse_args(argv)
diff --git a/src/ecoff_fitter/core.py b/src/ecoff_fitter/core.py
index 360e6cd..6863363 100644
--- a/src/ecoff_fitter/core.py
+++ b/src/ecoff_fitter/core.py
@@ -1,4 +1,7 @@
 import numpy as np
+from typing import Any, Optional, Tuple
+import pandas as pd
+from numpy.typing import NDArray
 from scipy.stats import norm
 from intreg.intreg import IntReg
 from ecoff_fitter.utils import read_input, read_params
@@ -24,14 +27,28 @@ class ECOFFitter:
     (wild-type) component at the given percentile.
     """
 
+    model_: IntReg | MixtureModel | None
+    x: NDArray[np.float_]
+    mus_: NDArray[np.float_]
+    sigmas_: NDArray[np.float_]
+    pis_: NDArray[np.float_]
+    loglike_: float
+    converged_: bool
+    n_iter_: int | None
+    ecoff_: float
+    z_percentile_: float
+    y_low_: NDArray[np.float_]
+    y_high_: NDArray[np.float_]
+    weights_: NDArray[np.float_]
+
     def __init__(
         self,
-        input,
-        params: dict | str | None = None,
+        input: pd.DataFrame | str,
+        params: dict[str, Any] | str | None = None,
         dilution_factor: int = 2,
         distributions: int = 1,
         boundary_support: int | None = 1,
-    ):
+    ) -> None:
         """
         Initialize the ECOFFitter.
 
@@ -75,7 +92,7 @@ def __init__(
         self.distributions = distributions
         self.boundary_support = boundary_support
 
-    def fit(self, options={}):
+    def fit(self, options: dict[str, Any] | None = None) -> "ECOFFitter":
         """
         Define MIC intervals and fit either a single censored-normal model
         or a finite mixture model.
@@ -96,7 +113,7 @@ def fit(self, options={}):
             # multiple gaussians
             return self.fit_mixture(options)
 
-    def fit_single(self, options=None):
+    def fit_single(self, options: dict[str, Any] | None = None) -> "ECOFFitter":
         """
         Fit a single-component censored normal distribution using interval
         regression.
@@ -122,7 +139,9 @@ def fit_single(self, options=None):
         self.converged_ = result.success
         self.n_iter_ = result.nit if hasattr(result, "nit") else None
 
-    def fit_mixture(self, options=None):
+        return self
+
+    def fit_mixture(self, options: dict[str, Any] | None = None) -> "ECOFFitter":
         """
         Fit a K-component finite mixture of censored normals using the EM
         algorithm followed by optional refinement.
@@ -156,7 +175,9 @@ def fit_mixture(self, options=None):
 
         return self
 
-    def define_intervals(self, df=None):
+    def define_intervals(
+        self, df: Optional[pd.DataFrame] = None
+    ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
         """
         Construct MIC interval bounds and apply left-, right-, and interval-
         censoring rules, then transform to log dilution space.
@@ -210,7 +231,9 @@ def define_intervals(self, df=None):
 
         return y_low_log, y_high_log, weights
 
-    def log_transf_intervals(self, y_low, y_high):
+    def log_transf_intervals(
+        self, y_low: NDArray[np.float_], y_high: NDArray[np.float_]
+    ) -> Tuple[NDArray[np.float_], NDArray[np.float_]]:
         """
         Transform interval bounds into log base–dilution_factor space.
 
@@ -231,7 +254,9 @@ def log_transf_intervals(self, y_low, y_high):
 
         return y_low_log, y_high_log
 
-    def generate(self, percentile: int | float = 99, options={}):
+    def generate(
+        self, percentile: int | float = 99, options: dict[str, Any] | None = None
+    ) -> Tuple[Any, ...]:
         """
         Fit the model and compute the ECOFF at a specified percentile.
 
@@ -253,7 +278,7 @@ def generate(self, percentile: int | float = 99, options={}):
 
         return results
 
-    def compute_ecoff(self, percentile: float):
+    def compute_ecoff(self, percentile: float) -> Tuple[Any, ...]:
         """
         Compute the ECOFF and percentile location from the fitted model.
 
diff --git a/src/ecoff_fitter/defence.py b/src/ecoff_fitter/defence.py
index c583673..c96251f 100644
--- a/src/ecoff_fitter/defence.py
+++ b/src/ecoff_fitter/defence.py
@@ -1,9 +1,11 @@
 import pandas as pd
+from typing import Any
+from pandas import DataFrame
 import os
 import re
 
 
-def validate_input_source(input):
+def validate_input_source(input: str | DataFrame | dict[str, Any]) -> None:
     """
     Validate the input source for ECOFFitter.
 
@@ -32,7 +34,9 @@ def validate_input_source(input):
         raise ValueError("Input must be a pandas DataFrame or a valid file path.")
 
 
-def validate_params_source(params):
+def validate_params_source(
+    params: dict[str, Any] | str | list[Any] | tuple[Any, ...] | DataFrame | Any | None,
+) -> None:
     """
     Pre-validate the params argument before attempting to read it.
 
@@ -69,8 +73,7 @@ def validate_params_source(params):
     )
 
 
-
-def validate_mic_data(df):
+def validate_mic_data(df: DataFrame) -> None:
     """
     Validate MIC and observations columns.
 
@@ -97,7 +100,9 @@ def validate_mic_data(df):
         raise ValueError(f"Invalid MIC format found in rows: {bad_rows.index.tolist()}")
 
 
-def validate_params(dilution_factor, distributions, boundary_support):
+def validate_params(
+    dilution_factor: int, distributions: int, boundary_support: int | None
+) -> None:
     """
     Validate ECOFFitter configuration values.
 
@@ -112,17 +117,18 @@ def validate_params(dilution_factor, distributions, boundary_support):
 
     if not isinstance(dilution_factor, int) or dilution_factor <= 1:
         raise ValueError("dilution_factor must be an integer > 1.")
-    
+
     if not isinstance(distributions, int):
-        raise NotImplementedError("The number of mixture components must be an integer.")
-    
+        raise NotImplementedError(
+            "The number of mixture components must be an integer."
+        )
+
     if boundary_support is not None and (
         not isinstance(boundary_support, int) or boundary_support < 0
     ):
         raise ValueError("boundary_support must be a non-negative integer or None.")
 
 
-
 def validate_output_path(path: str) -> bool:
     """
     Checks if the given path is safe and writable, and that the file extension is .txt or .pdf.
@@ -130,11 +136,11 @@ def validate_output_path(path: str) -> bool:
     Returns True if valid, otherwise raises ValueError.
     """
     # Check extension
-    allowed_exts = ('.txt', '.pdf')
+    allowed_exts = (".txt", ".pdf")
     if not path.lower().endswith(allowed_exts):
         raise ValueError(f"File must end with {allowed_exts}, got '{path}'")
 
-    directory = os.path.dirname(path) or '.'
+    directory = os.path.dirname(path) or "."
 
     if not os.path.exists(directory):
         raise ValueError(f"Directory does not exist: {directory}")
@@ -143,4 +149,3 @@ def validate_output_path(path: str) -> bool:
         raise PermissionError(f"No write permission in directory: {directory}")
 
     return True
-
diff --git a/src/ecoff_fitter/graphs.py b/src/ecoff_fitter/graphs.py
index 13e02cb..f5bfd57 100644
--- a/src/ecoff_fitter/graphs.py
+++ b/src/ecoff_fitter/graphs.py
@@ -1,21 +1,24 @@
 import numpy as np
+from typing import Optional
+from numpy.typing import NDArray
+import matplotlib.axes
 import matplotlib.pyplot as plt
 from scipy.stats import norm
 
 
 def plot_mic_distribution(
-    low_log,
-    high_log,
-    weights,
-    dilution_factor,
-    mus,
-    sigmas,
-    pis=None,
-    log2_ecoff=None,
-    global_x_min=None,
-    global_x_max=None,
-    ax=None,
-):
+    low_log: NDArray[np.float_],
+    high_log: NDArray[np.float_],
+    weights: NDArray[np.float_],
+    dilution_factor: float | int,
+    mus: NDArray[np.float_] | list[float],
+    sigmas: NDArray[np.float_] | list[float],
+    pis: Optional[NDArray[np.float_] | list[float]] = None,
+    log2_ecoff: Optional[float] = None,
+    global_x_min: Optional[float] = None,
+    global_x_max: Optional[float] = None,
+    ax: Optional[matplotlib.axes.Axes] = None,
+) -> matplotlib.axes.Axes:
     """
     Plot MIC intervals with a K-component Gaussian mixture fit.
     Supports left- and right-censoring with visual tail extensions.
diff --git a/src/ecoff_fitter/mixture.py b/src/ecoff_fitter/mixture.py
index a5cdb03..11ea20c 100644
--- a/src/ecoff_fitter/mixture.py
+++ b/src/ecoff_fitter/mixture.py
@@ -1,4 +1,6 @@
 import numpy as np
+from typing import Optional, Tuple
+from numpy.typing import NDArray
 from intreg.intreg import IntReg
 from sklearn.cluster import KMeans
 from scipy.optimize import minimize
@@ -15,7 +17,27 @@ class MixtureModel:
         3. Optional refinement using mixture likelihood via L-BFGS-B.
     """
 
-    def __init__(self, y_low, y_high, weights, distributions):
+    y_low: NDArray[np.float_]
+    y_high: NDArray[np.float_]
+    weights: NDArray[np.float_]
+
+    mus: NDArray[np.float_]
+    sigmas: NDArray[np.float_]
+    pis: NDArray[np.float_]
+
+    x: NDArray[np.float_]
+    converged: bool
+    n_iter: int
+    loglike: float
+    params_: dict[str, float]
+
+    def __init__(
+        self,
+        y_low: NDArray[np.float_] | list[float],
+        y_high: NDArray[np.float_] | list[float],
+        weights: NDArray[np.float_] | list[float],
+        distributions: int,
+    ) -> None:
         """
         Initialise a K-component mixture model using K-means clustering.
 
@@ -39,14 +61,14 @@ def __init__(self, y_low, y_high, weights, distributions):
 
         # Find finite bounds
         finite_high = np.max(y_high[np.isfinite(y_high)])
-        finite_low  = np.min(y_low[np.isfinite(y_low)])
+        finite_low = np.min(y_low[np.isfinite(y_low)])
 
         # KMeans cannot handle inf → substitute only for midpoint computation
-        y_low_km  = y_low.copy()
+        y_low_km = y_low.copy()
         y_high_km = y_high.copy()
 
         y_high_km[np.isinf(y_high_km)] = finite_high + 1.0
-        y_low_km[np.isinf(y_low_km)]   = finite_low - 1.0
+        y_low_km[np.isinf(y_low_km)] = finite_low - 1.0
 
         mid = (y_low_km + y_high_km) / 2
         mid_reshaped = mid.reshape(-1, 1)
@@ -57,7 +79,6 @@ def __init__(self, y_low, y_high, weights, distributions):
             random_state=0,
         ).fit(mid_reshaped, sample_weight=weights)
 
-
         # Cluster centres → initial mus
         mus = kmeans.cluster_centers_.flatten()
 
@@ -84,7 +105,9 @@ def __init__(self, y_low, y_high, weights, distributions):
         self.y_low, self.y_high, self.weights = y_low, y_high, weights
         self.mus, self.sigmas, self.pis = mus, sigmas, pis
 
-    def fit(self, max_iter=500, tol=1e-6, refine=True):
+    def fit(
+        self, max_iter: int = 500, tol: float = 1e-6, refine: bool = True
+    ) -> "MixtureModel":
         """
         Fit the mixture model using EM and optional refinement.
 
@@ -105,7 +128,7 @@ def fit(self, max_iter=500, tol=1e-6, refine=True):
 
         return self
 
-    def em(self, max_iter=500, tol=1e-6):
+    def em(self, max_iter: int = 500, tol: float = 1e-6) -> "MixtureModel":
         """
         Expectation–Maximization (EM) algorithm for a K-component mixture
         of interval-censored normal distributions.
@@ -197,7 +220,7 @@ def em(self, max_iter=500, tol=1e-6):
 
         return self
 
-    def refine_mixture(self):
+    def refine_mixture(self) -> "MixtureModel":
         """
         Refinement step for a general K-component mixture model using L-BFGS-B.
 
@@ -213,7 +236,9 @@ def refine_mixture(self):
         y_high = np.asarray(self.y_high, float)
         weights = np.asarray(self.weights, float)
 
-        def unpack_params(params):
+        def unpack_params(
+            params: NDArray[np.float_],
+        ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
             """Convert flat parameter vector into mus, sigmas, pis."""
             mus = params[: self.K]
             sigmas = np.exp(params[self.K : 2 * self.K])
@@ -226,7 +251,7 @@ def unpack_params(params):
 
             return mus, sigmas, pis
 
-        def neg_log_likelihood(params):
+        def neg_log_likelihood(params: NDArray[np.float_]) -> float:
             mus, sigmas, pis = unpack_params(params)
 
             # P(interval | component k)
@@ -241,7 +266,7 @@ def neg_log_likelihood(params):
             mix = p_mat @ pis
             mix = np.clip(mix, 1e-300, np.inf)
 
-            return -np.sum(weights * np.log(mix))
+            return float(-np.sum(weights * np.log(mix)))
 
         res = minimize(neg_log_likelihood, self.x, method="L-BFGS-B")
         self.x = res.x
diff --git a/src/ecoff_fitter/report.py b/src/ecoff_fitter/report.py
index a32b3d1..64c55ad 100644
--- a/src/ecoff_fitter/report.py
+++ b/src/ecoff_fitter/report.py
@@ -1,5 +1,7 @@
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Tuple, Optional, Dict, cast
+from matplotlib.figure import Figure
+from numpy.typing import NDArray
 import numpy as np
 from matplotlib.backends.backend_pdf import PdfPages
 from ecoff_fitter.graphs import plot_mic_distribution
@@ -15,12 +17,12 @@ class GenerateReport:
     avoiding duplication of distributions, mus, sigmas, intervals, etc.
     """
 
-    fitter: Any            # The ECOFFitter used to generate the results
-    ecoff: float           # ECOFF value
-    z: tuple               # Percentile-based ECOFFs (99, 97.5, 95)
+    fitter: Any
+    ecoff: float
+    z: Tuple[float, float, float]  # Percentile-based ECOFFs (99, 97.5, 95)
 
     @classmethod
-    def from_fitter(cls, fitter, result):
+    def from_fitter(cls, fitter: Any, result: Tuple[Any, ...]) -> "GenerateReport":
         """
         Construct a GenerateReport from an ECOFFitter and generate() output.
 
@@ -33,7 +35,7 @@ def from_fitter(cls, fitter, result):
         z1 = fitter.compute_ecoff(percentile=97.5)[0]
         z2 = fitter.compute_ecoff(percentile=95)[0]
 
-        ecoff = result[0]    # first element always ECOFF
+        ecoff = result[0]  # first element always ECOFF
 
         return cls(
             fitter=fitter,
@@ -42,34 +44,44 @@ def from_fitter(cls, fitter, result):
         )
 
     @property
-    def distributions(self):
-        return self.fitter.distributions
+    def distributions(self) -> int:
+        return cast(int, self.fitter.distributions)
 
     @property
-    def dilution_factor(self):
-        return self.fitter.dilution_factor
+    def dilution_factor(self) -> float:
+        return cast(float, self.fitter.dilution_factor)
 
     @property
-    def mus(self):
-        return self.fitter.mus_
+    def mus(self) -> NDArray[np.float_]:
+        return cast(NDArray[np.float_], self.fitter.mus_)
 
     @property
-    def sigmas(self):
-        return self.fitter.sigmas_
-    
+    def sigmas(self) -> NDArray[np.float_]:
+        return cast(NDArray[np.float_], self.fitter.sigmas_)
+
     @property
-    def pis(self):
+    def pis(self) -> Optional[NDArray[np.float_]]:
         return getattr(self.fitter, "pis_", None)
 
     @property
-    def model(self):
+    def model(self) -> Any:
         return getattr(self.fitter, "model_", None)
 
+
     @property
-    def intervals(self):
-        return self.fitter.define_intervals()
+    def intervals(
+        self,
+    ) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
+        return cast(
+            tuple[
+                NDArray[np.float_],
+                NDArray[np.float_],
+                NDArray[np.float_]
+            ],
+            self.fitter.define_intervals(),
+        )
 
-    def print_stats(self, verbose=False):
+    def print_stats(self, verbose: bool = False) -> None:
         print(f"\nECOFF (original scale): {self.ecoff:.2}")
 
         if self.distributions == 1:
@@ -80,15 +92,16 @@ def print_stats(self, verbose=False):
         else:
             print("\nComponent means and sigmas (original scale):")
             for i, (mu, sigma) in enumerate(zip(self.mus, self.sigmas), start=1):
-                print(f"  μ{i}: {self.dilution_factor**mu:.4f}, "
-                      f"σ{i} (folds): {self.dilution_factor**sigma:.4f}")
+                print(
+                    f"  μ{i}: {self.dilution_factor**mu:.4f}, "
+                    f"σ{i} (folds): {self.dilution_factor**sigma:.4f}"
+                )
 
         if verbose and self.model is not None:
             print("\n--- Model details ---")
             print(self.model)
 
-
-    def write_out(self, path: str):
+    def write_out(self, path: str) -> None:
         z0, z1, z2 = self.z
 
         with open(path, "w") as f:
@@ -113,8 +126,7 @@ def write_out(self, path: str):
 
         print(f"\nResults saved to: {path}")
 
-
-    def save_pdf(self, outfile: str):
+    def save_pdf(self, outfile: str) -> None:
         with PdfPages(outfile) as pdf:
             fig = self._make_pdf()
             pdf.savefig(fig)
@@ -122,11 +134,9 @@ def save_pdf(self, outfile: str):
 
         print(f"PDF report saved to: {outfile}")
 
-
-    def _make_pdf(self, title=None):
+    def _make_pdf(self, title: Optional[str] = None) -> Figure:
         fig, (ax_plot, ax_text) = plt.subplots(
-            nrows=1, ncols=2, figsize=(10, 4),
-            gridspec_kw={"width_ratios": [2, 1]}
+            nrows=1, ncols=2, figsize=(10, 4), gridspec_kw={"width_ratios": [2, 1]}
         )
 
         low_log, high_log, weights = self.intervals
@@ -173,21 +183,26 @@ def _make_pdf(self, title=None):
                 )
 
         ax_text.text(
-            0.05, 0.9,
+            0.05,
+            0.9,
             "\n".join(lines),
             fontsize=11,
             va="top",
             family="monospace",
         )
 
-        fig.tight_layout(rect=[0, 0, 1, 0.95])
+        fig.tight_layout(rect=(0, 0, 1, 0.95))
 
         return fig
 
 
-
 class CombinedReport:
-    def __init__(self, outfile, global_report, individual_reports):
+    def __init__(
+        self,
+        outfile: str,
+        global_report: GenerateReport,
+        individual_reports: Dict[str, GenerateReport],
+    ) -> None:
         """
         outfile: PDF filename
         global_report: GenerateReport instance
@@ -197,7 +212,7 @@ def __init__(self, outfile, global_report, individual_reports):
         self.global_report = global_report
         self.individual_reports = individual_reports
 
-    def save_pdf(self):
+    def save_pdf(self) -> None:
         from matplotlib.backends.backend_pdf import PdfPages
 
         with PdfPages(self.outfile) as pdf:
diff --git a/src/ecoff_fitter/utils.py b/src/ecoff_fitter/utils.py
index d1625b0..096371e 100644
--- a/src/ecoff_fitter/utils.py
+++ b/src/ecoff_fitter/utils.py
@@ -1,9 +1,15 @@
+from typing import Any, Dict, List, Tuple, Optional, cast
 import pandas as pd
+from pandas import DataFrame
+from numpy.typing import NDArray
 import yaml
 import os
 
 
-def read_input(data, sheet_name=None):
+def read_input(
+    data: DataFrame | list[Any] | tuple[Any, ...] | dict[str, Any] | str,
+    sheet_name: Optional[str] = None,
+) -> DataFrame:
     """
     Read MIC input data from a DataFrame, array-like, dict, or file
     and validate required columns. If given a single-column input,
@@ -36,25 +42,26 @@ def read_input(data, sheet_name=None):
         elif ext in [".tsv", ".txt"]:
             df = pd.read_csv(data, sep=r"\s+")
         elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(data, sheet_name=sheet_name)
+            val = pd.read_excel(data, sheet_name=sheet_name)
+            if isinstance(val, dict):
+                # choose a sheet, or raise error
+                df = next(iter(val.values()))   # first sheet
+            else:
+                df = val
         else:
             raise ValueError(f"Unsupported file type: {ext}")
 
     else:
         raise ValueError("Input must be DataFrame, list, array, dict, or file path.")
-
+    
     df.columns = [str(c).strip() for c in df.columns]
 
     # Handle single-column input automatically
     if df.shape[1] == 1:
         col = df.columns[0]
         df["MIC"] = df[col].astype(str).str.strip()
-        
-        df = (
-            df.groupby("MIC")
-            .size()
-            .reset_index(name="observations")
-        )
+
+        df = df.groupby("MIC").size().reset_index(name="observations")
 
     expected = ["MIC", "observations"]
     missing = [c for c in expected if c not in df.columns]
@@ -67,15 +74,19 @@ def read_input(data, sheet_name=None):
 
     df["MIC"] = df["MIC"].astype(str).str.strip()
     df["observations"] = (
-        pd.to_numeric(df["observations"], errors="coerce")
-        .fillna(0)
-        .astype(int)
+        pd.to_numeric(df["observations"], errors="coerce").fillna(0).astype(int)
     )
 
     df = df.dropna(subset=["MIC"]).reset_index(drop=True)
     return df
 
-def read_params(params, dflt_dilution, dflt_dists, dflt_tails):
+
+def read_params(
+    params: str | dict[str, Any],
+    dflt_dilution: int,
+    dflt_dists: int,
+    dflt_tails: Optional[int],
+) -> Tuple[int, int, Optional[int], float]:
     """
     Read ECOFF model parameters from a file or dictionary, falling back to provided defaults.
 
@@ -106,7 +117,8 @@ def read_params(params, dflt_dilution, dflt_dists, dflt_tails):
                 params = yaml.safe_load(f) or {}
 
         elif ext == ".txt":
-            parsed = {}
+
+            parsed: Dict[str, Any] = {}
             with open(params, "r") as f:
                 for line in f:
                     line = line.strip()
@@ -139,12 +151,16 @@ def read_params(params, dflt_dilution, dflt_dists, dflt_tails):
     dilution_factor = params.get("dilution_factor", dflt_dilution)
     distributions = params.get("distributions", dflt_dists)
     boundary_support = params.get("boundary_support", dflt_tails)
-    percentile = params.get("percentile", None)
+    percentile = params.get("percentile", 99)
 
     return dilution_factor, distributions, boundary_support, percentile
 
 
-def read_multi_obs_input(data, sheet_name=None):
+def read_multi_obs_input(
+    data: DataFrame | list[Any] | tuple[Any, ...] | dict[str, Any] | Any | str,
+    sheet_name: Optional[str] = None,
+) -> Dict[str, Any]:
+
     """
     Read MIC input but allow multiple observation columns.
     Returns a dict:
@@ -179,7 +195,12 @@ def read_multi_obs_input(data, sheet_name=None):
         elif ext in [".tsv", ".txt"]:
             df = pd.read_csv(data, sep=r"\s+")
         elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(data, sheet_name='Sheet1')
+            val = pd.read_excel(data, sheet_name=sheet_name)
+            if isinstance(val, dict):
+                # choose a sheet, or raise error
+                df = next(iter(val.values()))   # first sheet
+            else:
+                df = val
         else:
             raise ValueError(f"Unsupported file type: {ext}")
 
@@ -193,15 +214,9 @@ def read_multi_obs_input(data, sheet_name=None):
     if df.shape[1] == 1:
         col = df.columns[0]
         df["MIC"] = df[col].astype(str).str.strip()
-        df_single = (
-            df.groupby("MIC")
-              .size()
-              .reset_index(name="observations")
-        )
-        return {
-            "global": df_single,
-            "individual": {"observations": df_single.copy()}
-        }
+        df_single = df.groupby("MIC").size().reset_index(name="observations")
+        return cast(Dict[str, Any], {"global": df_single, "individual": {"observations": df_single.copy()}})
+
 
     # Require MIC column
     if "MIC" not in df.columns:
@@ -227,8 +242,4 @@ def read_multi_obs_input(data, sheet_name=None):
     df_global = df[["MIC"]].copy()
     df_global["observations"] = df[obs_cols].sum(axis=1).astype(int)
 
-    return {
-        "global": df_global,
-        "individual": individual
-    }
-
+    return {"global": df_global, "individual": individual}
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9f166e0..06a9aae 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -114,8 +114,8 @@ def test_read_params_dict():
     # Defaults respected
     assert tails is None
 
-    #check default percentile = None from parser
-    assert percentile is None
+    #check default percentile = 99 from parser
+    assert percentile == 99
 
 
 def test_read_params_txt_invalid_format(tmp_path):

From 1da10179bbeff8d276435f24756604276cae5a5f Mon Sep 17 00:00:00 2001
From: DylanAdlard <dylan.adlard@lmh.ox.ac.uk>
Date: Mon, 15 Dec 2025 11:06:31 +0200
Subject: [PATCH 2/4] typing extra

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b28a487..e286b57 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,4 +66,4 @@ implicit_reexport = true
 exclude = "src/ecoff_fitter/wts.py"
 
 [project.optional-dependencies]
-dev = ["mypy", "pandas-stubs", "types-PyYAML", "numpy-stubs", "scipy-stubs"]
+dev = ["mypy", "pandas-stubs", "types-PyYAML",  "scipy-stubs"]

From 6aab083bfa4cf214a6518cf68e2b60d71243306b Mon Sep 17 00:00:00 2001
From: DylanAdlard <dylan.adlard@lmh.ox.ac.uk>
Date: Mon, 15 Dec 2025 11:10:22 +0200
Subject: [PATCH 3/4] np floating

---
 src/ecoff_fitter/core.py    | 20 ++++++++++----------
 src/ecoff_fitter/graphs.py  | 12 ++++++------
 src/ecoff_fitter/mixture.py | 26 +++++++++++++-------------
 src/ecoff_fitter/report.py  | 18 +++++++++---------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/ecoff_fitter/core.py b/src/ecoff_fitter/core.py
index 6863363..0b30c3f 100644
--- a/src/ecoff_fitter/core.py
+++ b/src/ecoff_fitter/core.py
@@ -28,18 +28,18 @@ class ECOFFitter:
     """
 
     model_: IntReg | MixtureModel | None
-    x: NDArray[np.float_]
-    mus_: NDArray[np.float_]
-    sigmas_: NDArray[np.float_]
-    pis_: NDArray[np.float_]
+    x: NDArray[np.floating]
+    mus_: NDArray[np.floating]
+    sigmas_: NDArray[np.floating]
+    pis_: NDArray[np.floating]
     loglike_: float
     converged_: bool
     n_iter_: int | None
     ecoff_: float
     z_percentile_: float
-    y_low_: NDArray[np.float_]
-    y_high_: NDArray[np.float_]
-    weights_: NDArray[np.float_]
+    y_low_: NDArray[np.floating]
+    y_high_: NDArray[np.floating]
+    weights_: NDArray[np.floating]
 
     def __init__(
         self,
@@ -177,7 +177,7 @@ def fit_mixture(self, options: dict[str, Any] | None = None) -> "ECOFFitter":
 
     def define_intervals(
         self, df: Optional[pd.DataFrame] = None
-    ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
+    ) -> Tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]:
         """
         Construct MIC interval bounds and apply left-, right-, and interval-
         censoring rules, then transform to log dilution space.
@@ -232,8 +232,8 @@ def define_intervals(
         return y_low_log, y_high_log, weights
 
     def log_transf_intervals(
-        self, y_low: NDArray[np.float_], y_high: NDArray[np.float_]
-    ) -> Tuple[NDArray[np.float_], NDArray[np.float_]]:
+        self, y_low: NDArray[np.floating], y_high: NDArray[np.floating]
+    ) -> Tuple[NDArray[np.floating], NDArray[np.floating]]:
         """
         Transform interval bounds into log base–dilution_factor space.
 
diff --git a/src/ecoff_fitter/graphs.py b/src/ecoff_fitter/graphs.py
index f5bfd57..d5f85fa 100644
--- a/src/ecoff_fitter/graphs.py
+++ b/src/ecoff_fitter/graphs.py
@@ -7,13 +7,13 @@
 
 
 def plot_mic_distribution(
-    low_log: NDArray[np.float_],
-    high_log: NDArray[np.float_],
-    weights: NDArray[np.float_],
+    low_log: NDArray[np.floating],
+    high_log: NDArray[np.floating],
+    weights: NDArray[np.floating],
     dilution_factor: float | int,
-    mus: NDArray[np.float_] | list[float],
-    sigmas: NDArray[np.float_] | list[float],
-    pis: Optional[NDArray[np.float_] | list[float]] = None,
+    mus: NDArray[np.floating] | list[float],
+    sigmas: NDArray[np.floating] | list[float],
+    pis: Optional[NDArray[np.floating] | list[float]] = None,
     log2_ecoff: Optional[float] = None,
     global_x_min: Optional[float] = None,
     global_x_max: Optional[float] = None,
diff --git a/src/ecoff_fitter/mixture.py b/src/ecoff_fitter/mixture.py
index 11ea20c..18d82a4 100644
--- a/src/ecoff_fitter/mixture.py
+++ b/src/ecoff_fitter/mixture.py
@@ -17,15 +17,15 @@ class MixtureModel:
         3. Optional refinement using mixture likelihood via L-BFGS-B.
     """
 
-    y_low: NDArray[np.float_]
-    y_high: NDArray[np.float_]
-    weights: NDArray[np.float_]
+    y_low: NDArray[np.floating]
+    y_high: NDArray[np.floating]
+    weights: NDArray[np.floating]
 
-    mus: NDArray[np.float_]
-    sigmas: NDArray[np.float_]
-    pis: NDArray[np.float_]
+    mus: NDArray[np.floating]
+    sigmas: NDArray[np.floating]
+    pis: NDArray[np.floating]
 
-    x: NDArray[np.float_]
+    x: NDArray[np.floating]
     converged: bool
     n_iter: int
     loglike: float
@@ -33,9 +33,9 @@ class MixtureModel:
 
     def __init__(
         self,
-        y_low: NDArray[np.float_] | list[float],
-        y_high: NDArray[np.float_] | list[float],
-        weights: NDArray[np.float_] | list[float],
+        y_low: NDArray[np.floating] | list[float],
+        y_high: NDArray[np.floating] | list[float],
+        weights: NDArray[np.floating] | list[float],
         distributions: int,
     ) -> None:
         """
@@ -237,8 +237,8 @@ def refine_mixture(self) -> "MixtureModel":
         weights = np.asarray(self.weights, float)
 
         def unpack_params(
-            params: NDArray[np.float_],
-        ) -> Tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
+            params: NDArray[np.floating],
+        ) -> Tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]:
             """Convert flat parameter vector into mus, sigmas, pis."""
             mus = params[: self.K]
             sigmas = np.exp(params[self.K : 2 * self.K])
@@ -251,7 +251,7 @@ def unpack_params(
 
             return mus, sigmas, pis
 
-        def neg_log_likelihood(params: NDArray[np.float_]) -> float:
+        def neg_log_likelihood(params: NDArray[np.floating]) -> float:
             mus, sigmas, pis = unpack_params(params)
 
             # P(interval | component k)
diff --git a/src/ecoff_fitter/report.py b/src/ecoff_fitter/report.py
index 64c55ad..6fe2df6 100644
--- a/src/ecoff_fitter/report.py
+++ b/src/ecoff_fitter/report.py
@@ -52,15 +52,15 @@ def dilution_factor(self) -> float:
         return cast(float, self.fitter.dilution_factor)
 
     @property
-    def mus(self) -> NDArray[np.float_]:
-        return cast(NDArray[np.float_], self.fitter.mus_)
+    def mus(self) -> NDArray[np.floating]:
+        return cast(NDArray[np.floating], self.fitter.mus_)
 
     @property
-    def sigmas(self) -> NDArray[np.float_]:
-        return cast(NDArray[np.float_], self.fitter.sigmas_)
+    def sigmas(self) -> NDArray[np.floating]:
+        return cast(NDArray[np.floating], self.fitter.sigmas_)
 
     @property
-    def pis(self) -> Optional[NDArray[np.float_]]:
+    def pis(self) -> Optional[NDArray[np.floating]]:
         return getattr(self.fitter, "pis_", None)
 
     @property
@@ -71,12 +71,12 @@ def model(self) -> Any:
     @property
     def intervals(
         self,
-    ) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
+    ) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating]]:
         return cast(
             tuple[
-                NDArray[np.float_],
-                NDArray[np.float_],
-                NDArray[np.float_]
+                NDArray[np.floating],
+                NDArray[np.floating],
+                NDArray[np.floating]
             ],
             self.fitter.define_intervals(),
         )

From 356d7ea9e54a40d8153c27224b139d336fcb8fd1 Mon Sep 17 00:00:00 2001
From: DylanAdlard <dylan.adlard@lmh.ox.ac.uk>
Date: Mon, 15 Dec 2025 11:17:32 +0200
Subject: [PATCH 4/4] minor

---
 env.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 env.yml

diff --git a/env.yml b/env.yml
new file mode 100644
index 0000000..97372b4
--- /dev/null
+++ b/env.yml
@@ -0,0 +1,18 @@
+name: ECOFFitter
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python
+  - scipy
+  - pytest
+  - scikit-learn
+  - pandas
+  - joblib
+  - yaml
+  - pyyaml
+  - matplotlib
+  - pytest-cov
+  - pip
+  - pip:
+      - intreg