From 71bd9a277ecf82e1d270952419d2e9de06443993 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 14:55:06 +0100 Subject: [PATCH 01/14] Refactor sparql query_df to use query_raw --- courier/services/ontodocker/sparql.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 42dd54f..3a14aad 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -3,10 +3,10 @@ from __future__ import annotations from dataclasses import dataclass +import json from typing import TYPE_CHECKING import pandas as pd -from SPARQLWrapper import SPARQLWrapper from courier.exceptions import ValidationError from courier.services.ontodocker._compat import make_dataframe @@ -109,21 +109,26 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame ------ ValidationError If `dataset`, `query`, or `columns` are invalid. - Exception - Any exceptions raised by SPARQLWrapper or result conversion. + ValueError + If the endpoint response cannot be decoded as JSON. + HttpError + If the underlying HTTP request fails. """ + if not dataset or not dataset.strip(): + raise ValidationError("dataset must be non-empty") if not query or not query.strip(): raise ValidationError("query must be non-empty") if not columns: raise ValidationError("columns must be a non-empty list of strings") - endpoint = self.endpoint(dataset) - - sparql = SPARQLWrapper(endpoint) - sparql.setReturnFormat("json") - if self.client.token: - sparql.addCustomHttpHeader("Authorization", f"Bearer {self.client.token}") - sparql.setQuery(query) + text = self.query_raw( + dataset.strip(), + query, + accept="application/sparql-results+json", + ) + try: + result = json.loads(text) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to decode SPARQL results JSON: {e}") from e - result = sparql.queryAndConvert() return make_dataframe(result, columns) From ed4e5e1b34917f6efcc5be6b67866b7c061cff2f Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 14:56:32 +0100 Subject: [PATCH 02/14] Update tests for sparql query_df without SPARQLWrapper --- tests/unit/test_ontodocker_client.py | 60 ++++++++++++---------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 4dc2c44..0f615a0 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -354,47 +354,37 @@ def test_query_df_validates_query_and_columns(self): with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=[]) - def test_query_df_uses_sparqlwrapper_and_adds_auth_header_when_token_set(self): + def test_query_df_uses_get_with_query_param_and_accept_header(self): s = _FakeSession() + s.response = _FakeResponse( + text=( + "{\"results\": {\"bindings\": " + "[{\"a\": {\"value\": \"1\"}, \"b\": {\"value\": \"2\"}}]}}" + ), + request=_FakeRequest("GET"), + ) c = OntodockerClient("https://example.org", token="abc", session=s) - wrappers: list[object] = [] - - class _FakeSparqlWrapper: - def __init__(self, endpoint: str): - self.endpoint = endpoint - self.headers: dict[str, str] = {} - self.query = None - self.return_format = None - wrappers.append(self) - - def setReturnFormat(self, fmt: str): - self.return_format = fmt - - def addCustomHttpHeader(self, key: str, value: str): - self.headers[key] = value - - def setQuery(self, query: str): - self.query = query - - def queryAndConvert(self): - return { - "results": { - "bindings": [ - {"a": {"value": "1"}, "b": {"value": "2"}}, - ] - } - } - - with mock.patch( - "courier.services.ontodocker.sparql.SPARQLWrapper", _FakeSparqlWrapper - ): - df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) + + df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) self.assertIsInstance(df, pd.DataFrame) self.assertEqual(list(df.columns), ["a", "b"]) self.assertEqual(df.iloc[0].tolist(), ["1", "2"]) - self.assertEqual(len(wrappers), 1) - self.assertEqual(wrappers[0].headers["Authorization"], "Bearer abc") + self.assertEqual(s.calls[0]["method"], "GET") + self.assertEqual( + s.calls[0]["url"], + "https://example.org/api/v1/jena/ds/sparql", + ) + self.assertEqual( + s.calls[0]["params"], + {"query": "SELECT ?a ?b WHERE {}"}, + ) + self.assertEqual( + s.calls[0]["headers"], + {"Accept": "application/sparql-results+json"}, + ) + # token is handled by HttpClient via session headers, not per-request + self.assertEqual(s.headers.get("Authorization"), "Bearer abc") if __name__ == "__main__": From 6fc8280c1517cab4cf6f947c103491d08dd4a7f2 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 15:03:51 +0100 Subject: [PATCH 03/14] Make SPARQL result DataFrame follow requested columns --- courier/services/ontodocker/_compat.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/courier/services/ontodocker/_compat.py b/courier/services/ontodocker/_compat.py index ee10bd0..4d7b9c0 100644 --- a/courier/services/ontodocker/_compat.py +++ b/courier/services/ontodocker/_compat.py @@ -107,8 +107,7 @@ def extract_dataset_names(sparql_endpoints: list[str]) -> list[str]: def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame: - """ - Convert a SPARQL JSON result into a pandas DataFrame. + """Convert a SPARQL JSON result into a pandas DataFrame. Parameters ---------- @@ -130,10 +129,10 @@ def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame: rows: list[list[str | None]] = [] for binding in result["results"]["bindings"]: row: list[str | None] = [] +<<<<<<< HEAD for column in columns: value = binding.get(column) row.append(value.get("value") if isinstance(value, dict) else None) rows.append(row) - df = pd.DataFrame(rows, columns=columns) - return df + return pd.DataFrame(rows, columns=columns) From 953c68070aeac3a3f5cb7716eccba7c285ad9cac Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 15:06:01 +0100 Subject: [PATCH 04/14] Update tests for make_dataframe missing values --- tests/unit/services/ontodocker/test_compat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/services/ontodocker/test_compat.py b/tests/unit/services/ontodocker/test_compat.py index b8a0141..d91cc02 100644 --- a/tests/unit/services/ontodocker/test_compat.py +++ b/tests/unit/services/ontodocker/test_compat.py @@ -147,11 +147,11 @@ def test_binding_key_order_does_not_affect_column_alignment(self): expected = pd.DataFrame([["1", "2"]], columns=["a", "b"]) pdt.assert_frame_equal(df, expected) - def test_missing_variable_in_binding_becomes_null(self): + def test_missing_variable_in_binding_yields_none(self): result = self._make_result( ["a", "b"], [{"a": {"value": "1"}}], # "b" is missing ) df = _compat.make_dataframe(result, ["a", "b"]) - self.assertEqual(df.iloc[0]["a"], "1") - self.assertTrue(pd.isna(df.iloc[0]["b"])) + expected = pd.DataFrame([["1", None]], columns=["a", "b"]) + pdt.assert_frame_equal(df, expected) From e5b4d9593b5a2d64d630eb0cf4fa00a5dafd5333 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 15:22:15 +0100 Subject: [PATCH 05/14] black --- tests/unit/test_ontodocker_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 0f615a0..8e5c03a 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -358,8 +358,8 @@ def test_query_df_uses_get_with_query_param_and_accept_header(self): s = _FakeSession() s.response = _FakeResponse( text=( - "{\"results\": {\"bindings\": " - "[{\"a\": {\"value\": \"1\"}, \"b\": {\"value\": \"2\"}}]}}" + '{"results": {"bindings": ' + '[{"a": {"value": "1"}, "b": {"value": "2"}}]}}' ), request=_FakeRequest("GET"), ) From baadaad276d36e08f5463d74212d09db576fbe8d Mon Sep 17 00:00:00 2001 From: marian bruns Date: Wed, 18 Mar 2026 15:24:42 +0100 Subject: [PATCH 06/14] ruff --- courier/services/ontodocker/sparql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 3a14aad..a77a12a 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -2,8 +2,8 @@ from __future__ import annotations -from dataclasses import dataclass import json +from dataclasses import dataclass from typing import TYPE_CHECKING import pandas as pd From 6428a9a3feb48c10281f8b33fa137fa813f431e1 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 19 Mar 2026 15:02:38 +0100 Subject: [PATCH 07/14] include in test query_df: empty dataset validation, invalid JSON rasing ValueError, dataset trimming and hanling of missing bindings --- tests/unit/test_ontodocker_client.py | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 8e5c03a..a70dedf 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -348,12 +348,49 @@ def test_query_df_validates_query_and_columns(self): s = _FakeSession() c = OntodockerClient("https://example.org", session=s) + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("", "SELECT ?a WHERE {}", columns=["a"]) + with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "", columns=["a"]) with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=[]) + def test_query_df_raises_value_error_on_invalid_json(self): + s = _FakeSession() + s.response = _FakeResponse(text="not json", request=_FakeRequest("GET")) + c = OntodockerClient("https://example.org", session=s) + + with self.assertRaisesRegex(ValueError, "Failed to decode SPARQL results JSON"): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a"]) + + def test_query_df_strips_dataset_and_maps_missing_bindings_to_none(self): + s = _FakeSession() + s.response = _FakeResponse( + text=( + '{"results": {"bindings": [' + '{"a": {"value": "1"}}, ' + '{"b": {"value": "2"}}' + "]}}" + ), + request=_FakeRequest("GET"), + ) + c = OntodockerClient("https://example.org", session=s) + + df = c.sparql.query_df(" ds ", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df.columns), ["a", "b"]) + self.assertEqual(df.iloc[0]["a"], "1") + self.assertTrue(pd.isna(df.iloc[0]["b"])) + self.assertTrue(pd.isna(df.iloc[1]["a"])) + self.assertEqual(df.iloc[1]["b"], "2") + self.assertEqual( + s.calls[0]["url"], + "https://example.org/api/v1/jena/ds/sparql", + ) + def test_query_df_uses_get_with_query_param_and_accept_header(self): s = _FakeSession() s.response = _FakeResponse( From 409afba114e4277f6dcde263dd176912f49250ca Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 19 Mar 2026 15:11:37 +0100 Subject: [PATCH 08/14] fix columns validation in courier/services/ontodocker/sparql.py --- courier/services/ontodocker/sparql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index a77a12a..551258d 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -118,7 +118,9 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame raise ValidationError("dataset must be non-empty") if not query or not query.strip(): raise ValidationError("query must be non-empty") - if not columns: + if not columns or any( + not isinstance(column, str) or not column.strip() for column in columns + ): raise ValidationError("columns must be a non-empty list of strings") text = self.query_raw( From f5004758ccbdaaa463c94699553d7274a3209add Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 19 Mar 2026 15:12:09 +0100 Subject: [PATCH 09/14] update test_ontodocker_client --- tests/unit/test_ontodocker_client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index a70dedf..c11fab3 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -357,6 +357,12 @@ def test_query_df_validates_query_and_columns(self): with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=[]) + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", " "]) + + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", 1]) + def test_query_df_raises_value_error_on_invalid_json(self): s = _FakeSession() s.response = _FakeResponse(text="not json", request=_FakeRequest("GET")) From 1e7997e270ba380614972106835192b6eb92d779 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 26 Mar 2026 11:39:55 +0100 Subject: [PATCH 10/14] Remove leftover conflict marker in _compat --- courier/services/ontodocker/_compat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/courier/services/ontodocker/_compat.py b/courier/services/ontodocker/_compat.py index 4d7b9c0..3dab1b1 100644 --- a/courier/services/ontodocker/_compat.py +++ b/courier/services/ontodocker/_compat.py @@ -129,7 +129,6 @@ def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame: rows: list[list[str | None]] = [] for binding in result["results"]["bindings"]: row: list[str | None] = [] -<<<<<<< HEAD for column in columns: value = binding.get(column) row.append(value.get("value") if isinstance(value, dict) else None) From 092811902375d510aa91f8a13f9678052c5fb381 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 26 Mar 2026 12:48:24 +0100 Subject: [PATCH 11/14] Deduplicate query_df validation --- courier/services/ontodocker/sparql.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 551258d..5d657ef 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -114,17 +114,13 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame HttpError If the underlying HTTP request fails. """ - if not dataset or not dataset.strip(): - raise ValidationError("dataset must be non-empty") - if not query or not query.strip(): - raise ValidationError("query must be non-empty") if not columns or any( not isinstance(column, str) or not column.strip() for column in columns ): raise ValidationError("columns must be a non-empty list of strings") text = self.query_raw( - dataset.strip(), + dataset, query, accept="application/sparql-results+json", ) From e98d786016389480c5a5bdfa068c6aed200a8989 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 26 Mar 2026 12:56:29 +0100 Subject: [PATCH 12/14] Enforce list type for query_df columns --- courier/services/ontodocker/sparql.py | 8 ++++++-- tests/unit/test_ontodocker_client.py | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 5d657ef..5d46c9a 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -114,8 +114,12 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame HttpError If the underlying HTTP request fails. """ - if not columns or any( - not isinstance(column, str) or not column.strip() for column in columns + if ( + not isinstance(columns, list) + or not columns + or any( + not isinstance(column, str) or not column.strip() for column in columns + ) ): raise ValidationError("columns must be a non-empty list of strings") diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index c11fab3..77a50e2 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -363,6 +363,9 @@ def test_query_df_validates_query_and_columns(self): with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", 1]) + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",)) + def test_query_df_raises_value_error_on_invalid_json(self): s = _FakeSession() s.response = _FakeResponse(text="not json", request=_FakeRequest("GET")) From 30205e7fab9dd40b9903ec016ecfe224e0d8e670 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 26 Mar 2026 13:08:18 +0100 Subject: [PATCH 13/14] Add focused coverage for query_df delegation --- tests/unit/test_ontodocker_client.py | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 77a50e2..2855de4 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -374,6 +374,38 @@ def test_query_df_raises_value_error_on_invalid_json(self): with self.assertRaisesRegex(ValueError, "Failed to decode SPARQL results JSON"): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a"]) + def test_query_df_delegates_to_query_raw_with_accept_header(self): + s = _FakeSession() + c = OntodockerClient("https://example.org", session=s) + + with mock.patch.object( + c.sparql, + "query_raw", + return_value='{"results": {"bindings": [{"a": {"value": "1"}}]}}', + ) as query_raw: + df = c.sparql.query_df(" ds ", "SELECT ?a WHERE {}", columns=["a"]) + + query_raw.assert_called_once_with( + " ds ", + "SELECT ?a WHERE {}", + accept="application/sparql-results+json", + ) + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df.columns), ["a"]) + self.assertEqual(df.iloc[0].tolist(), ["1"]) + + def test_query_df_does_not_call_query_raw_when_columns_invalid(self): + s = _FakeSession() + c = OntodockerClient("https://example.org", session=s) + + with ( + mock.patch.object(c.sparql, "query_raw") as query_raw, + self.assertRaises(ValidationError), + ): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",)) + + query_raw.assert_not_called() + def test_query_df_strips_dataset_and_maps_missing_bindings_to_none(self): s = _FakeSession() s.response = _FakeResponse( From 62e213df2166bd4682a636716150fb44b10ed705 Mon Sep 17 00:00:00 2001 From: marian bruns Date: Thu, 26 Mar 2026 15:22:42 +0100 Subject: [PATCH 14/14] Let JSON decode errors propagate from query_df --- courier/services/ontodocker/sparql.py | 5 +---- tests/unit/test_ontodocker_client.py | 5 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 5d46c9a..a0614a4 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -128,9 +128,6 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame query, accept="application/sparql-results+json", ) - try: - result = json.loads(text) - except json.JSONDecodeError as e: - raise ValueError(f"Failed to decode SPARQL results JSON: {e}") from e + result = json.loads(text) return make_dataframe(result, columns) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 2855de4..0fbe840 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -1,3 +1,4 @@ +import json import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -366,12 +367,12 @@ def test_query_df_validates_query_and_columns(self): with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",)) - def test_query_df_raises_value_error_on_invalid_json(self): + def test_query_df_raises_json_decode_error_on_invalid_json(self): s = _FakeSession() s.response = _FakeResponse(text="not json", request=_FakeRequest("GET")) c = OntodockerClient("https://example.org", session=s) - with self.assertRaisesRegex(ValueError, "Failed to decode SPARQL results JSON"): + with self.assertRaises(json.JSONDecodeError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a"]) def test_query_df_delegates_to_query_raw_with_accept_header(self):