From 9ef2c33b55ac94da7aeb832a88e19b6fa8caba0d Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Thu, 2 Apr 2026 10:40:44 +0200
Subject: [PATCH 1/7] improve reporting if no tf devices found

---
 src/orcai/auxiliary.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/orcai/auxiliary.py b/src/orcai/auxiliary.py
index e14edbe..c9f94c2 100644
--- a/src/orcai/auxiliary.py
+++ b/src/orcai/auxiliary.py
@@ -326,16 +326,19 @@ def print_tf_device_info(
         )  # suppress tensorflow logging (ERROR and worse only)
 
         physical_devices = tf.config.list_physical_devices("GPU")
-        devices_info = [
-            tf.config.experimental.get_device_details(i) for i in physical_devices
-        ]
-
-        devices_string = ", ".join(
-            [
-                f"{dev.name.replace('physical_device:', '')}: {info['device_name']}"
-                for dev, info in zip(physical_devices, devices_info)
+
+        if len(physical_devices) == 0:
+            devices_string = "No GPU devices found. Using CPU."
+        else:
+            devices_info = [
+                tf.config.experimental.get_device_details(i) for i in physical_devices
             ]
-        )
+            devices_string = ", ".join(
+                [
+                    f"{dev.name.replace('physical_device:', '')}: {info['device_name']}"
+                    for dev, info in zip(physical_devices, devices_info)
+                ]
+            )
 
         self.info(
             f"Available TensorFlow devices: {devices_string}",

From 1342f7b7b1607e0552c90eecb0f551b571e4b772 Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Thu, 2 Apr 2026 10:41:11 +0200
Subject: [PATCH 2/7] report tf devices in predict if DEBUG

---
 src/orcai/predict.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/orcai/predict.py b/src/orcai/predict.py
index 55f9260..284e8e8 100755
--- a/src/orcai/predict.py
+++ b/src/orcai/predict.py
@@ -309,6 +309,7 @@ def compute_aggregated_predictions(
 
     # Step 2: Model predictions for all snippets
     msgr.info("Prediction of snippets")
+    msgr.print_tf_device_info(severity=3)
     snippets = snippets[..., np.newaxis]  # Shape: (num_snippets, 736, 171, 1)
     predictions = model.predict(
         snippets,

From 365ef3276d0b95ce465113924447aeefa2a0cdb0 Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Tue, 7 Apr 2026 09:29:41 +0200
Subject: [PATCH 3/7] linting

---
 tests/test_json_encoder.py |  9 +++-
 tests/test_predict.py      | 39 +++++++++-------
 tests/test_snippets.py     | 52 +++++++++++++++++-----
 tests/test_spectrogram.py  | 91 ++++++++++++++++++++++++++++----------
 tests/test_test_models.py  | 28 +++++++++---
 tests/test_train.py        |  6 +--
 6 files changed, 163 insertions(+), 62 deletions(-)

diff --git a/tests/test_json_encoder.py b/tests/test_json_encoder.py
index 9ef31a5..f268c01 100644
--- a/tests/test_json_encoder.py
+++ b/tests/test_json_encoder.py
@@ -85,6 +85,13 @@ def test_unsupported_type_raises(self):
 
     def test_standard_types_unchanged(self):
         """Standard JSON-serializable types pass through unchanged."""
-        data = {"int": 1, "float": 2.5, "str": "hello", "list": [1, 2], "bool": True, "none": None}
+        data = {
+            "int": 1,
+            "float": 2.5,
+            "str": "hello",
+            "list": [1, 2],
+            "bool": True,
+            "none": None,
+        }
         result = encode(data)
         assert result == data
diff --git a/tests/test_predict.py b/tests/test_predict.py
index 28af59e..8f3c424 100644
--- a/tests/test_predict.py
+++ b/tests/test_predict.py
@@ -115,8 +115,8 @@ def test_multiple_calls(self):
         probs = np.array([0.1, 0.5, 0.9, 0.3])
         result = _calulate_mean_probabilities(probs, [0, 2], [2, 4])
         assert len(result) == 2
-        assert result[0] == pytest.approx(0.3)   # mean(probs[0:2]) = mean(0.1, 0.5)
-        assert result[1] == pytest.approx(0.6)   # mean(0.9, 0.3)
+        assert result[0] == pytest.approx(0.3)  # mean(probs[0:2]) = mean(0.1, 0.5)
+        assert result[1] == pytest.approx(0.6)  # mean(0.9, 0.3)
 
 
 # ---------------------------------------------------------------------------
@@ -178,14 +178,10 @@ def test_custom_threshold(self):
         preds = np.zeros((10, 1))
         preds[3:6, 0] = 0.6
         # threshold=0.7 → nothing detected
-        _, _, labels_high, _ = compute_binary_predictions(
-            preds, ["BR"], threshold=0.7
-        )
+        _, _, labels_high, _ = compute_binary_predictions(preds, ["BR"], threshold=0.7)
         assert len(labels_high) == 0
         # threshold=0.5 → detected
-        _, _, labels_low, _ = compute_binary_predictions(
-            preds, ["BR"], threshold=0.5
-        )
+        _, _, labels_low, _ = compute_binary_predictions(preds, ["BR"], threshold=0.5)
         assert "BR" in labels_low
 
 
@@ -289,7 +285,9 @@ def test_different_channel(self, tmp_path):
 class TestFilterPredictions:
     """Tests for filter_predictions."""
 
-    def test_keeps_all_within_limits(self, predicted_labels_df, call_duration_limits_dict):
+    def test_keeps_all_within_limits(
+        self, predicted_labels_df, call_duration_limits_dict
+    ):
         """All calls within limits are kept."""
         # predicted_labels_df has durations 5, 5, 5, 5 (stop-start), delta_t=1
         # BR limits [2,8], BUZZ limits [3,20], WHISTLE limits [1,10] → all kept
@@ -310,7 +308,9 @@ def test_removes_too_short(self, call_duration_limits_dict):
                 "mean_p": [0.9],
             }
         )
-        result = filter_predictions(df, delta_t=1.0, call_duration_limits=call_duration_limits_dict)
+        result = filter_predictions(
+            df, delta_t=1.0, call_duration_limits=call_duration_limits_dict
+        )
         assert len(result) == 0
 
     def test_removes_too_long(self, call_duration_limits_dict):
@@ -323,20 +323,28 @@ def test_removes_too_long(self, call_duration_limits_dict):
                 "mean_p": [0.9],
             }
         )
-        result = filter_predictions(df, delta_t=1.0, call_duration_limits=call_duration_limits_dict)
+        result = filter_predictions(
+            df, delta_t=1.0, call_duration_limits=call_duration_limits_dict
+        )
         assert len(result) == 0
 
     def test_empty_input_returns_empty(self, call_duration_limits_dict):
         """Empty input DataFrame is returned unchanged."""
         df = pd.DataFrame(columns=["start", "stop", "label", "mean_p"])
-        result = filter_predictions(df, delta_t=1.0, call_duration_limits=call_duration_limits_dict)
+        result = filter_predictions(
+            df, delta_t=1.0, call_duration_limits=call_duration_limits_dict
+        )
         assert result.empty
 
-    def test_output_columns_preserved(self, predicted_labels_df, call_duration_limits_dict):
+    def test_output_columns_preserved(
+        self, predicted_labels_df, call_duration_limits_dict
+    ):
         """Output has the same columns as input (filter_predictions modifies df in-place)."""
         original_cols = list(predicted_labels_df.columns)
         result = filter_predictions(
-            predicted_labels_df, delta_t=1.0, call_duration_limits=call_duration_limits_dict
+            predicted_labels_df,
+            delta_t=1.0,
+            call_duration_limits=call_duration_limits_dict,
         )
         assert list(result.columns) == original_cols
 
@@ -362,8 +370,7 @@ class TestFilterPredictionsFile:
     def _write_predictions_file(self, path: Path, rows: list[tuple]) -> None:
         """Write a tab-separated predictions file."""
         lines = "\n".join(
-            f"{start}\t{stop}\t{label}\t{p}\tsource"
-            for start, stop, label, p in rows
+            f"{start}\t{stop}\t{label}\t{p}\tsource" for start, stop, label, p in rows
         )
         path.write_text(lines)
 
diff --git a/tests/test_snippets.py b/tests/test_snippets.py
index 300a436..3b40bb7 100644
--- a/tests/test_snippets.py
+++ b/tests/test_snippets.py
@@ -38,7 +38,9 @@ def test_total_equals_sum_of_types(self, snippet_table_df, label_calls):
         """Total column equals sum of train + val + test."""
         stats = _compute_snippet_stats(snippet_table_df, for_calls=label_calls)
         computed_total = stats[["train", "val", "test"]].sum(axis=1)
-        pd.testing.assert_series_equal(stats["total"], computed_total, check_names=False)
+        pd.testing.assert_series_equal(
+            stats["total"], computed_total, check_names=False
+        )
 
     def test_rows_are_call_names(self, snippet_table_df, label_calls):
         """One row per call in for_calls."""
@@ -78,7 +80,9 @@ def test_output_columns_preserved(self, snippet_table_df, orcai_parameter_snippe
         result = _filter_snippet_table(snippet_table_df, orcai_parameter_snippets)
         assert set(result.columns) == set(snippet_table_df.columns)
 
-    def test_fraction_removal_zero_keeps_all(self, snippet_table_df, orcai_parameter_snippets):
+    def test_fraction_removal_zero_keeps_all(
+        self, snippet_table_df, orcai_parameter_snippets
+    ):
         """fraction_removal=0 keeps all no-label snippets."""
         params = {**orcai_parameter_snippets}
         params["snippets"] = {**params["snippets"], "fraction_removal": 0.0}
@@ -91,13 +95,17 @@ def test_index_reset(self, snippet_table_df, orcai_parameter_snippets):
         assert list(result.index) == list(range(len(result)))
 
     @pytest.mark.parametrize("seed", [0, 42, 123])
-    def test_deterministic_with_same_seed(self, snippet_table_df, orcai_parameter_snippets, seed):
+    def test_deterministic_with_same_seed(
+        self, snippet_table_df, orcai_parameter_snippets, seed
+    ):
         """Same rng seed produces identical results."""
         rng1 = np.random.default_rng(seed)
         rng2 = np.random.default_rng(seed)
         r1 = _filter_snippet_table(snippet_table_df, orcai_parameter_snippets, rng=rng1)
         r2 = _filter_snippet_table(snippet_table_df, orcai_parameter_snippets, rng=rng2)
-        pd.testing.assert_frame_equal(r1.reset_index(drop=True), r2.reset_index(drop=True))
+        pd.testing.assert_frame_equal(
+            r1.reset_index(drop=True), r2.reset_index(drop=True)
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -142,11 +150,15 @@ def _build_recording_dir(
 class TestMakeSnippetTable:
     """Tests for _make_snippet_table."""
 
-    def test_success_returns_dataframe(self, tmp_path, label_calls, orcai_parameter_snippets):
+    def test_success_returns_dataframe(
+        self, tmp_path, label_calls, orcai_parameter_snippets
+    ):
         """Returns a DataFrame when directory structure is complete."""
         rec_dir = tmp_path / "test_rec"
         _build_recording_dir(rec_dir, label_calls)
-        snippet_table, _, _, _, status = _make_snippet_table(rec_dir, orcai_parameter_snippets)
+        snippet_table, _, _, _, status = _make_snippet_table(
+            rec_dir, orcai_parameter_snippets
+        )
         assert status == "success"
         assert isinstance(snippet_table, pd.DataFrame)
 
@@ -155,10 +167,18 @@ def test_output_columns(self, tmp_path, label_calls, orcai_parameter_snippets):
         rec_dir = tmp_path / "test_rec"
         _build_recording_dir(rec_dir, label_calls)
         snippet_table, *_ = _make_snippet_table(rec_dir, orcai_parameter_snippets)
-        for col in ["recording", "recording_data_dir", "data_type", "row_start", "row_stop"]:
+        for col in [
+            "recording",
+            "recording_data_dir",
+            "data_type",
+            "row_start",
+            "row_stop",
+        ]:
             assert col in snippet_table.columns
 
-    def test_missing_spectrogram_raises(self, tmp_path, label_calls, orcai_parameter_snippets):
+    def test_missing_spectrogram_raises(
+        self, tmp_path, label_calls, orcai_parameter_snippets
+    ):
         """FileNotFoundError raised when times.json is missing."""
         rec_dir = tmp_path / "no_spec"
         rec_dir.mkdir()
@@ -167,7 +187,9 @@ def test_missing_spectrogram_raises(self, tmp_path, label_calls, orcai_parameter
         with pytest.raises(FileNotFoundError):
             _make_snippet_table(rec_dir, orcai_parameter_snippets)
 
-    def test_missing_label_file_returns_none(self, tmp_path, label_calls, orcai_parameter_snippets):
+    def test_missing_label_file_returns_none(
+        self, tmp_path, label_calls, orcai_parameter_snippets
+    ):
         """Returns None snippet table when labels.zarr is missing."""
         rec_dir = tmp_path / "no_labels"
         rec_dir.mkdir()
@@ -176,15 +198,21 @@ def test_missing_label_file_returns_none(self, tmp_path, label_calls, orcai_para
         (spec_dir / "times.json").write_text(
             json.dumps({"min": 0.0, "max": 500.0, "length": 500})
         )
-        snippet_table, _, _, _, status = _make_snippet_table(rec_dir, orcai_parameter_snippets)
+        snippet_table, _, _, _, status = _make_snippet_table(
+            rec_dir, orcai_parameter_snippets
+        )
         assert snippet_table is None
         assert status == "missing label files"
 
-    def test_recording_too_short_returns_none(self, tmp_path, label_calls, orcai_parameter_snippets):
+    def test_recording_too_short_returns_none(
+        self, tmp_path, label_calls, orcai_parameter_snippets
+    ):
         """Returns None when recording is shorter than segment_duration."""
         rec_dir = tmp_path / "short_rec"
         # Recording of 5s, segment_duration=10 → n_segments=0
         _build_recording_dir(rec_dir, label_calls, n_time=50, recording_duration=5.0)
-        snippet_table, _, _, _, status = _make_snippet_table(rec_dir, orcai_parameter_snippets)
+        snippet_table, _, _, _, status = _make_snippet_table(
+            rec_dir, orcai_parameter_snippets
+        )
         assert snippet_table is None
         assert status == "shorter than segment_duration"
diff --git a/tests/test_spectrogram.py b/tests/test_spectrogram.py
index 3bb9ddc..931b2b3 100644
--- a/tests/test_spectrogram.py
+++ b/tests/test_spectrogram.py
@@ -6,13 +6,16 @@
 """
 
 import json
-from pathlib import Path
 
 import numpy as np
 import pytest
 import zarr
 
-from orcai.spectrogram import calculate_spectrogram, preprocess_spectrogram, save_spectrogram
+from orcai.spectrogram import (
+    calculate_spectrogram,
+    preprocess_spectrogram,
+    save_spectrogram,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -31,7 +34,9 @@ def _make_audio(self, n_samples: int = 8000) -> np.ndarray:
     def test_output_shapes(self, spectrogram_parameter):
         """Returns (spectrogram, frequencies, times) with consistent shapes."""
         audio = self._make_audio()
-        spec, freqs, times = calculate_spectrogram(audio, channel=1, spectrogram_parameter=spectrogram_parameter)
+        spec, freqs, times = calculate_spectrogram(
+            audio, channel=1, spectrogram_parameter=spectrogram_parameter
+        )
         n_fft_bins = spectrogram_parameter["nfft"] // 2 + 1
         assert spec.shape[0] == n_fft_bins
         assert freqs.shape[0] == n_fft_bins
@@ -40,7 +45,9 @@ def test_output_shapes(self, spectrogram_parameter):
     def test_spectrogram_dtype_float(self, spectrogram_parameter):
         """Spectrogram values are floating point."""
         audio = self._make_audio()
-        spec, _, _ = calculate_spectrogram(audio, channel=1, spectrogram_parameter=spectrogram_parameter)
+        spec, _, _ = calculate_spectrogram(
+            audio, channel=1, spectrogram_parameter=spectrogram_parameter
+        )
         assert np.issubdtype(spec.dtype, np.floating)
 
     def test_multichannel_selects_correct_channel(self, spectrogram_parameter):
@@ -59,7 +66,9 @@ def test_multichannel_selects_correct_channel(self, spectrogram_parameter):
     def test_frequencies_cover_nyquist(self, spectrogram_parameter):
         """Frequency axis spans 0 to Nyquist (sr/2)."""
         audio = self._make_audio()
-        _, freqs, _ = calculate_spectrogram(audio, channel=1, spectrogram_parameter=spectrogram_parameter)
+        _, freqs, _ = calculate_spectrogram(
+            audio, channel=1, spectrogram_parameter=spectrogram_parameter
+        )
         assert freqs[0] == pytest.approx(0.0)
         assert freqs[-1] == pytest.approx(spectrogram_parameter["sampling_rate"] / 2)
 
@@ -72,19 +81,27 @@ def test_frequencies_cover_nyquist(self, spectrogram_parameter):
 class TestPreprocessSpectrogram:
     """Tests for preprocess_spectrogram."""
 
-    def test_output_transposed(self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter):
+    def test_output_transposed(
+        self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
+    ):
         """Output spectrogram is transposed (time x freq) relative to input (freq x time)."""
         n_freq_in, n_time = synthetic_spectrogram_raw.shape
         spec_out, _ = preprocess_spectrogram(
             synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
         )
         n_freq_expected = (
-            np.argwhere(spectrogram_frequencies >= spectrogram_parameter["freq_range"][1])[0][0]
-            - np.argwhere(spectrogram_frequencies <= spectrogram_parameter["freq_range"][0])[0][0]
+            np.argwhere(
+                spectrogram_frequencies >= spectrogram_parameter["freq_range"][1]
+            )[0][0]
+            - np.argwhere(
+                spectrogram_frequencies <= spectrogram_parameter["freq_range"][0]
+            )[0][0]
         )
         assert spec_out.shape == (n_time, n_freq_expected)
 
-    def test_output_normalized_0_1(self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter):
+    def test_output_normalized_0_1(
+        self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
+    ):
         """Output values are clipped and normalized to [0, 1]."""
         spec_out, _ = preprocess_spectrogram(
             synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
@@ -92,7 +109,9 @@ def test_output_normalized_0_1(self, synthetic_spectrogram_raw, spectrogram_freq
         assert spec_out.min() >= 0.0
         assert spec_out.max() <= 1.0
 
-    def test_frequency_range_applied(self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter):
+    def test_frequency_range_applied(
+        self, synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
+    ):
         """Frequency range is applied: output has fewer bins and max is bounded by freq_range[1]."""
         _, freqs_out = preprocess_spectrogram(
             synthetic_spectrogram_raw, spectrogram_frequencies, spectrogram_parameter
@@ -102,14 +121,20 @@ def test_frequency_range_applied(self, synthetic_spectrogram_raw, spectrogram_fr
         # Output should be a strict subset of input frequencies
         assert len(freqs_out) < len(spectrogram_frequencies)
 
-    def test_quantile_clipping_reduces_range(self, spectrogram_frequencies, spectrogram_parameter):
+    def test_quantile_clipping_reduces_range(
+        self, spectrogram_frequencies, spectrogram_parameter
+    ):
         """Quantile clipping reduces the dynamic range of extreme values."""
         # Create spectrogram with one very large outlier
         rng = np.random.default_rng(42)
-        spec = rng.standard_normal((spectrogram_frequencies.shape[0], 50)).astype(np.float32)
+        spec = rng.standard_normal((spectrogram_frequencies.shape[0], 50)).astype(
+            np.float32
+        )
         spec[0, 0] = 1e6  # extreme outlier
 
-        spec_out, _ = preprocess_spectrogram(spec, spectrogram_frequencies, spectrogram_parameter)
+        spec_out, _ = preprocess_spectrogram(
+            spec, spectrogram_frequencies, spectrogram_parameter
+        )
         # After clipping + normalization the max should still be 1.0 (not inflated)
         assert spec_out.max() == pytest.approx(1.0, abs=1e-5)
 
@@ -122,37 +147,57 @@ def test_quantile_clipping_reduces_range(self, spectrogram_frequencies, spectrog
 class TestSaveSpectrogram:
     """Tests for save_spectrogram."""
 
-    def test_creates_zarr_file(self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies):
+    def test_creates_zarr_file(
+        self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies
+    ):
         """zarr spectrogram file is created in output_dir."""
         times = np.linspace(0, 1, synthetic_spectrogram_raw.shape[1])
-        save_spectrogram(synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path)
+        save_spectrogram(
+            synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path
+        )
         assert (tmp_path / "spectrogram.zarr").exists()
 
-    def test_creates_frequencies_json(self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies):
+    def test_creates_frequencies_json(
+        self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies
+    ):
         """frequencies.json is written to output_dir."""
         times = np.linspace(0, 1, synthetic_spectrogram_raw.shape[1])
-        save_spectrogram(synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path)
+        save_spectrogram(
+            synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path
+        )
         freq_file = tmp_path / "frequencies.json"
         assert freq_file.exists()
 
-    def test_creates_times_json(self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies):
+    def test_creates_times_json(
+        self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies
+    ):
         """times.json is written to output_dir."""
         times = np.linspace(0, 1, synthetic_spectrogram_raw.shape[1])
-        save_spectrogram(synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path)
+        save_spectrogram(
+            synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path
+        )
         assert (tmp_path / "times.json").exists()
 
-    def test_zarr_shape_matches_input(self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies):
+    def test_zarr_shape_matches_input(
+        self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies
+    ):
         """Saved zarr array has the same shape as the input spectrogram."""
         times = np.linspace(0, 1, synthetic_spectrogram_raw.shape[1])
-        save_spectrogram(synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path)
+        save_spectrogram(
+            synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path
+        )
         arr = zarr.open_array(tmp_path / "spectrogram.zarr", mode="r")
         assert arr.shape == synthetic_spectrogram_raw.shape
 
-    def test_times_json_content(self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies):
+    def test_times_json_content(
+        self, tmp_path, synthetic_spectrogram_raw, spectrogram_frequencies
+    ):
         """times.json has min, max, length keys matching the times array."""
         n = synthetic_spectrogram_raw.shape[1]
         times = np.linspace(0.0, 2.0, n)
-        save_spectrogram(synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path)
+        save_spectrogram(
+            synthetic_spectrogram_raw, spectrogram_frequencies, times, tmp_path
+        )
         data = json.loads((tmp_path / "times.json").read_text())
         assert data["min"] == pytest.approx(0.0, abs=1e-5)
         assert data["max"] == pytest.approx(2.0, abs=1e-5)
diff --git a/tests/test_test_models.py b/tests/test_test_models.py
index 4d7d963..9d8e595 100644
--- a/tests/test_test_models.py
+++ b/tests/test_test_models.py
@@ -137,7 +137,12 @@ def test_tp_fp_fn_tn_sum_to_one(self):
         y_true, y_pred = self._perfect_batch()
         result = compute_confusion_table(y_true, y_pred, ["A", "B"])
         for lbl in result.index:
-            total = result.loc[lbl, "TP"] + result.loc[lbl, "FP"] + result.loc[lbl, "FN"] + result.loc[lbl, "TN"]
+            total = (
+                result.loc[lbl, "TP"]
+                + result.loc[lbl, "FP"]
+                + result.loc[lbl, "FN"]
+                + result.loc[lbl, "TN"]
+            )
             assert total == pytest.approx(1.0, abs=1e-6)
 
 
@@ -154,22 +159,31 @@ def test_output_shape(self):
         n = 3
         m1 = np.eye(n, dtype=int)
         m2 = np.eye(n, dtype=int)
-        result = _compute_misclassification_table(m1, m2, "true", "pred", ["A", "B", "C"])
-        assert result.shape == (n + 1, n + 2)  # +1 NOLABEL row, +1 NOLABEL col, +1 fraction_time
+        result = _compute_misclassification_table(
+            m1, m2, "true", "pred", ["A", "B", "C"]
+        )
+        assert result.shape == (
+            n + 1,
+            n + 2,
+        )  # +1 NOLABEL row, +1 NOLABEL col, +1 fraction_time
 
     def test_diagonal_dominant_for_perfect_predictions(self):
         """Perfect label alignment → high diagonal values."""
         m = np.zeros((6, 2), dtype=int)
         m[:3, 0] = 1  # first 3 rows: label A
         m[3:, 1] = 1  # last 3 rows: label B
-        result = _compute_misclassification_table(m, m.copy(), "true", "pred", ["A", "B"])
+        result = _compute_misclassification_table(
+            m, m.copy(), "true", "pred", ["A", "B"]
+        )
         assert result.loc["true_A", "pred_A"] == pytest.approx(1.0)
         assert result.loc["true_B", "pred_B"] == pytest.approx(1.0)
 
     def test_column_and_index_names(self):
         """Columns and index follow <suffix>_<label> convention."""
         m = np.zeros((4, 2), dtype=int)
-        result = _compute_misclassification_table(m, m.copy(), "true", "pred", ["X", "Y"])
+        result = _compute_misclassification_table(
+            m, m.copy(), "true", "pred", ["X", "Y"]
+        )
         assert "pred_X" in result.columns
         assert "true_X" in result.index
 
@@ -195,6 +209,8 @@ def test_both_values_are_dataframes(self):
         import pandas as pd
 
         m = np.zeros((4, 2), dtype=int)
-        result = compute_misclassification_tables(m, m.copy(), "true", "pred", ["A", "B"])
+        result = compute_misclassification_tables(
+            m, m.copy(), "true", "pred", ["A", "B"]
+        )
         for df in result.values():
             assert isinstance(df, pd.DataFrame)
diff --git a/tests/test_train.py b/tests/test_train.py
index 56b1067..08c9975 100644
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -6,8 +6,6 @@
 """
 
 import keras
-import numpy as np
-import pytest
 
 from orcai.train import _count_params
 
@@ -36,8 +34,8 @@ def test_multi_layer_model(self):
         """Counts params across multiple layers."""
         model = keras.Sequential(
             [
-                keras.layers.Dense(8, input_shape=(4,)),   # 4*8+8 = 40
-                keras.layers.Dense(2),                     # 8*2+2 = 18
+                keras.layers.Dense(8, input_shape=(4,)),  # 4*8+8 = 40
+                keras.layers.Dense(2),  # 8*2+2 = 18
             ]
         )
         count = _count_params(model.trainable_weights)

From 1470b1a4cd82dffcfc16823f27bfeef398fab230 Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Tue, 7 Apr 2026 09:30:45 +0200
Subject: [PATCH 4/7] add CI

---
 .github/workflows/build.yml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/build.yml

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..1942360
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,34 @@
+name: build orcai
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  uv-example:
+    name: python
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v6
+        with:
+          python-version-file: "pyproject.toml"
+          
+      - name: Install uv
+        uses: astral-sh/setup-uv@7
+        with:
+          enable-cache: true
+
+      - name: Install the project
+        run: uv sync --locked --all-extras --dev
+      
+      - name: check formatting
+        run: uv run ruff check
+
+      - name: run tests
+        run: uv run pytest

From 404861fb6f2096983f12dc459899ea29182fb78b Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Tue, 7 Apr 2026 09:34:56 +0200
Subject: [PATCH 5/7] v3.0.1

---
 CHANGELOG.md   | 8 ++++++++
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6b2f57..6c0fc02 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [3.0.1]
+
+### Added
+
+- improve reporting of tf devices
+- CI build workflow
+
 ## [3.0.0]
 
 ### Added
@@ -743,4 +750,5 @@
 [2.2.0]:https://github.com/ethz-tb/orcAI/releases/tag/v2.2.0
 [2.3.0]:https://github.com/ethz-tb/orcAI/releases/tag/v2.3.0
 [3.0.0]:https://github.com/ethz-tb/orcAI/releases/tag/v3.0.0
+[3.0.1]:https://github.com/ethz-tb/orcAI/releases/tag/v3.0.1
 
diff --git a/pyproject.toml b/pyproject.toml
index edf2de7..b37321b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "uv_build"
 
 [project]
 name = "orcai"
-version = "3.0.0"
+version = "3.0.1"
 authors = [
     { name = "Chérine Baumgartner", email = "cherine.baumgartner@env.ethz.ch" },
     { name = "Sebastian Bonhoeffer", email = "sebastian.bonhoeffer@env.ethz.ch" },
diff --git a/uv.lock b/uv.lock
index 591a12f..9ca56bd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -752,7 +752,7 @@ wheels = [
 
 [[package]]
 name = "orcai"
-version = "3.0.0"
+version = "3.0.1"
 source = { editable = "." }
 dependencies = [
     { name = "humanize" },

From d48f513c0ada29dcdd51df7997e80635455aac51 Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Tue, 7 Apr 2026 09:37:28 +0200
Subject: [PATCH 6/7] fix uv-setup version

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1942360..abddde9 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,7 +20,7 @@ jobs:
           python-version-file: "pyproject.toml"
           
       - name: Install uv
-        uses: astral-sh/setup-uv@7
+        uses: astral-sh/setup-uv@v8.0.0
         with:
           enable-cache: true
 

From fc6f1a77f180b3263d12c8a12a547e75f8242032 Mon Sep 17 00:00:00 2001
From: Daniel Angst <daniel.angst@env.ethz.ch>
Date: Tue, 7 Apr 2026 09:44:24 +0200
Subject: [PATCH 7/7] pin uv version

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index abddde9..d4b72ae 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,6 +23,7 @@ jobs:
         uses: astral-sh/setup-uv@v8.0.0
         with:
           enable-cache: true
+          version: "0.11.3"
 
       - name: Install the project
         run: uv sync --locked --all-extras --dev