From 923a470ae8d0d2709b83c1ade94888ed3a153806 Mon Sep 17 00:00:00 2001 From: James Robinson-Bohnslav Date: Tue, 31 Mar 2026 07:57:34 -0400 Subject: [PATCH 1/5] Fix #116: add_label_to_project() no longer eats first column from external CSVs The old code used pd.read_csv(path, index_col=0) which assumes every CSV has a DEG-style unnamed numeric index column. External CSVs without one had their first data column silently consumed as the index. Now reads without index_col and explicitly detects/drops unnamed index columns. Added tests for DEG-style, external-no-index, and external-with-background CSV formats. --- deepethogram/projects.py | 7 +++- tests/test_projects.py | 75 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/deepethogram/projects.py b/deepethogram/projects.py index 9e54e04..43f864c 100644 --- a/deepethogram/projects.py +++ b/deepethogram/projects.py @@ -171,7 +171,12 @@ def add_label_to_project(path_to_labels: Union[str, os.PathLike], path_to_video) if os.path.isfile(label_dst): warnings.warn("Label already exists in destination {}, overwriting...".format(label_dst)) - df = pd.read_csv(path_to_labels, index_col=0) + df = pd.read_csv(path_to_labels) + # Drop unnamed index column if present (DEG-generated CSVs have one) + first_col = df.columns[0] + if first_col == "" or str(first_col).startswith("Unnamed"): + df = df.drop(columns=[first_col]) + if "none" in list(df.columns): df = df.rename(columns={"none": "background"}) if "background" not in list(df.columns): diff --git a/tests/test_projects.py b/tests/test_projects.py index 4da8c2c..f2c45cb 100644 --- a/tests/test_projects.py +++ b/tests/test_projects.py @@ -115,5 +115,80 @@ def test_add_external_label(): projects.add_label_to_project(labelfile, videofile) +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_add_label_deg_style_csv(tmp_path): + """Test add_label_to_project with DEG-generated CSV (has unnamed index column).""" + make_project_from_archive() + mousedir = os.path.join(project_path, "DATA", "mouse06") + videofile = os.path.join(mousedir, "mouse06.h5") + + # Create a DEG-style CSV with unnamed numeric index + csv_path = tmp_path / "labels_with_index.csv" + csv_path.write_text( + ",background,behavior1,behavior2\n" + "0,1,0,0\n" + "1,0,1,0\n" + "2,0,0,1\n" + ) + + result = projects.add_label_to_project(str(csv_path), videofile) + df = pd.read_csv(result, index_col=0) + assert "background" in df.columns + assert "behavior1" in df.columns + assert "behavior2" in df.columns + assert df.shape[1] == 3 # background + 2 behaviors + + +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_add_label_external_csv_no_index(tmp_path): + """Test add_label_to_project with external CSV (no index column, no background). + + Regression test for GitHub issue #116: the old code used index_col=0 which + silently ate the first data column when no index column was present. + """ + make_project_from_archive() + mousedir = os.path.join(project_path, "DATA", "mouse06") + videofile = os.path.join(mousedir, "mouse06.h5") + + # Create a user-provided CSV without index or background column + csv_path = tmp_path / "labels_no_index.csv" + csv_path.write_text( + "behavior1,behavior2\n" + "0,0\n" + "1,0\n" + "0,1\n" + ) + + result = projects.add_label_to_project(str(csv_path), videofile) + df = pd.read_csv(result, index_col=0) + assert "background" in df.columns, "background column should be auto-inserted" + assert "behavior1" in df.columns, "behavior1 should NOT be eaten by index_col" + assert "behavior2" in df.columns + assert df.shape[1] == 3 # background + behavior1 + behavior2 + + +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_add_label_external_csv_with_background_no_index(tmp_path): + """Test external CSV that has background but no index column.""" + make_project_from_archive() + mousedir = os.path.join(project_path, "DATA", "mouse06") + videofile = os.path.join(mousedir, "mouse06.h5") + + csv_path = tmp_path / "labels_bg_no_index.csv" + csv_path.write_text( + "background,behavior1,behavior2\n" + "1,0,0\n" + "0,1,0\n" + "0,0,1\n" + ) + + result = projects.add_label_to_project(str(csv_path), videofile) + df = pd.read_csv(result, index_col=0) + assert "background" in df.columns + assert "behavior1" in df.columns + assert "behavior2" in df.columns + assert df.shape[1] == 3 + + if __name__ == "__main__": test_add_external_label() From a1d95860d82af02b333ba64f4f734be3c8353ab2 Mon Sep 17 00:00:00 2001 From: James Robinson-Bohnslav Date: Tue, 31 Mar 2026 07:57:40 -0400 Subject: [PATCH 2/5] Fix #129: move plt.switch_backend('agg') from module level into training functions The agg backend was set at import time in flow_generator/train.py, feature_extractor/train.py, and sequence/train.py. This killed interactive matplotlib for anyone importing these modules (e.g. in notebooks or scripts that call plt.show()). Moving the call inside the training functions preserves headless behavior during training while allowing interactive use when importing. --- deepethogram/feature_extractor/train.py | 3 +-- deepethogram/flow_generator/train.py | 3 +-- deepethogram/sequence/train.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/deepethogram/feature_extractor/train.py b/deepethogram/feature_extractor/train.py index b5e0d6f..7cee9b1 100644 --- a/deepethogram/feature_extractor/train.py +++ b/deepethogram/feature_extractor/train.py @@ -42,12 +42,11 @@ "and test dataloaders.", ) -plt.switch_backend("agg") - log = logging.getLogger(__name__) def feature_extractor_train(cfg: DictConfig) -> nn.Module: + plt.switch_backend("agg") """Trains feature extractor models from a configuration. Parameters diff --git a/deepethogram/flow_generator/train.py b/deepethogram/flow_generator/train.py index 609214e..e5e6c1a 100644 --- a/deepethogram/flow_generator/train.py +++ b/deepethogram/flow_generator/train.py @@ -32,12 +32,11 @@ flow_generators = utils.get_models_from_module(models, get_function=False) -plt.switch_backend("agg") - log = logging.getLogger(__name__) def flow_generator_train(cfg: DictConfig) -> nn.Module: + plt.switch_backend("agg") """Trains flow generator models from a configuration. Parameters diff --git a/deepethogram/sequence/train.py b/deepethogram/sequence/train.py index e172993..2a59461 100644 --- a/deepethogram/sequence/train.py +++ b/deepethogram/sequence/train.py @@ -19,10 +19,9 @@ log = logging.getLogger(__name__) -plt.switch_backend("agg") - def sequence_train(cfg: DictConfig) -> nn.Module: + plt.switch_backend("agg") """Trains sequence models from a configuration. Parameters From 4e6fa34543d0e0641913edcf31167d61d4ef0cff Mon Sep 17 00:00:00 2001 From: James Robinson-Bohnslav Date: Tue, 31 Mar 2026 10:47:29 -0400 Subject: [PATCH 3/5] fix docstrings --- deepethogram/feature_extractor/train.py | 2 +- deepethogram/flow_generator/train.py | 2 +- deepethogram/sequence/train.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deepethogram/feature_extractor/train.py b/deepethogram/feature_extractor/train.py index 7cee9b1..14c4353 100644 --- a/deepethogram/feature_extractor/train.py +++ b/deepethogram/feature_extractor/train.py @@ -46,7 +46,6 @@ def feature_extractor_train(cfg: DictConfig) -> nn.Module: - plt.switch_backend("agg") """Trains feature extractor models from a configuration. Parameters @@ -59,6 +58,7 @@ def feature_extractor_train(cfg: DictConfig) -> nn.Module: nn.Module Trained feature extractor """ + plt.switch_backend("agg") cfg = projects.setup_run(cfg) log.info("args: {}".format(" ".join(sys.argv))) diff --git a/deepethogram/flow_generator/train.py b/deepethogram/flow_generator/train.py index e5e6c1a..4a46335 100644 --- a/deepethogram/flow_generator/train.py +++ b/deepethogram/flow_generator/train.py @@ -36,7 +36,6 @@ def flow_generator_train(cfg: DictConfig) -> nn.Module: - plt.switch_backend("agg") """Trains flow generator models from a configuration. Parameters @@ -49,6 +48,7 @@ def flow_generator_train(cfg: DictConfig) -> nn.Module: nn.Module Trained flow generator """ + plt.switch_backend("agg") cfg = projects.setup_run(cfg) log.info("args: {}".format(" ".join(sys.argv))) # only two custom overwrites of the configuration file diff --git a/deepethogram/sequence/train.py b/deepethogram/sequence/train.py index 2a59461..5fbbb02 100644 --- a/deepethogram/sequence/train.py +++ b/deepethogram/sequence/train.py @@ -21,7 +21,6 @@ def sequence_train(cfg: DictConfig) -> nn.Module: - plt.switch_backend("agg") """Trains sequence models from a configuration. Parameters @@ -34,6 +33,7 @@ def sequence_train(cfg: DictConfig) -> nn.Module: nn.Module Trained sequence model """ + plt.switch_backend("agg") cfg = projects.setup_run(cfg) log.info("args: {}".format(" ".join(sys.argv))) From 5fd2fe68c5c620d13aad3b779eeb161853338937 Mon Sep 17 00:00:00 2001 From: Jim Bohnslav Date: Tue, 31 Mar 2026 10:52:29 -0400 Subject: [PATCH 4/5] Bump version to 0.4.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dc9e477..b18b27a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "deepethogram" -version = "0.3.0" +version = "0.4.0" description = "Temporal action detection for biology" readme = "README.md" authors = [ From 7d8834f56f71cf94dd364efe7a813fe03bb01d24 Mon Sep 17 00:00:00 2001 From: Jim Bohnslav Date: Tue, 31 Mar 2026 11:59:19 -0400 Subject: [PATCH 5/5] uv sync --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 37f8acc..2c9261d 100644 --- a/uv.lock +++ b/uv.lock @@ -331,7 +331,7 @@ wheels = [ [[package]] name = "deepethogram" -version = "0.3.0" +version = "0.4.0" source = { editable = "." } dependencies = [ { name = "chardet" },