From 91d87daef61b1d3bf84cacb10a002f75291b83b9 Mon Sep 17 00:00:00 2001 From: Muhammad Saqlain <2mesaqlain@gmail.com> Date: Fri, 26 Dec 2025 01:42:09 +0500 Subject: [PATCH 1/2] Test: Assert specific evaluation scores for sample data --- tests/test_main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index 4aea9480a..e11db2307 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -524,6 +524,8 @@ def test_evaluate(m, tmpdir): df = pd.read_csv(csv_file) assert results["results"].shape[0] == df.shape[0] + assert results["box_precision"] == pytest.approx(0.8, abs=0.01) + assert results["box_recall"] == pytest.approx(0.7213, abs=0.01) def test_train_callbacks(m): From a9b3b5e7b75db673b8b70f29eb12ad478d0d5219 Mon Sep 17 00:00:00 2001 From: Muhammad Saqlain <2mesaqlain@gmail.com> Date: Thu, 8 Jan 2026 06:09:35 +0500 Subject: [PATCH 2/2] Refactor: Move strict eval checks to benchmark test and relax unit test --- tests/test_benchmark.py | 20 ++++++++++++++++++++ tests/test_main.py | 13 +++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 tests/test_benchmark.py diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py new file mode 100644 index 000000000..0237670ca --- /dev/null +++ b/tests/test_benchmark.py @@ -0,0 +1,20 @@ +import pytest +from deepforest import main, get_data + +def test_benchmark_release(): + """ + Benchmark test to ensure the specific release version of the model + produces consistent results. + """ + # Load the model using a SPECIFIC revision (Commit SHA) + release_sha = "cc21436bc5d572dde8ff5f93c1e71a32f563cace" + + m = main.deepforest() + m.load_model("weecology/deepforest-tree", revision=release_sha) + + csv_file = get_data("OSBS_029.csv") + results = m.evaluate(csv_file, iou_threshold=0.4) + + # Strict Assertions (for The "Benchmark") + assert results["box_precision"] == pytest.approx(0.8, abs=0.01) + assert results["box_recall"] == pytest.approx(0.7213, abs=0.01) diff --git a/tests/test_main.py b/tests/test_main.py index e11db2307..4223a53b6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -511,12 +511,16 @@ def test_predict_tile_from_array(m, path): assert not prediction.empty -def test_evaluate(m, tmpdir): +def test_evaluate(m): csv_file = get_data("OSBS_029.csv") results = m.evaluate(csv_file, iou_threshold=0.4) - assert np.round(results["box_precision"], 2) > 0.5 - assert np.round(results["box_recall"], 2) > 0.5 + # Relaxed assertions (Sanity Check only) + # Allows model improvements without breaking tests + assert results["box_precision"] > 0.7 + assert results["box_recall"] > 0.5 + + # Structure and Label checks assert len(results["results"].predicted_label.dropna().unique()) == 1 assert results["results"].predicted_label.dropna().unique()[0] == "Tree" assert results["predictions"].shape[0] > 0 @@ -524,9 +528,6 @@ def test_evaluate(m, tmpdir): df = pd.read_csv(csv_file) assert results["results"].shape[0] == df.shape[0] - assert results["box_precision"] == pytest.approx(0.8, abs=0.01) - assert results["box_recall"] == pytest.approx(0.7213, abs=0.01) - def test_train_callbacks(m): csv_file = get_data("example.csv")