diff --git a/courier/services/ontodocker/_compat.py b/courier/services/ontodocker/_compat.py index ee10bd0..3dab1b1 100644 --- a/courier/services/ontodocker/_compat.py +++ b/courier/services/ontodocker/_compat.py @@ -107,8 +107,7 @@ def extract_dataset_names(sparql_endpoints: list[str]) -> list[str]: def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame: - """ - Convert a SPARQL JSON result into a pandas DataFrame. + """Convert a SPARQL JSON result into a pandas DataFrame. Parameters ---------- @@ -135,5 +134,4 @@ def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame: row.append(value.get("value") if isinstance(value, dict) else None) rows.append(row) - df = pd.DataFrame(rows, columns=columns) - return df + return pd.DataFrame(rows, columns=columns) diff --git a/courier/services/ontodocker/sparql.py b/courier/services/ontodocker/sparql.py index 42dd54f..a0614a4 100644 --- a/courier/services/ontodocker/sparql.py +++ b/courier/services/ontodocker/sparql.py @@ -2,11 +2,11 @@ from __future__ import annotations +import json from dataclasses import dataclass from typing import TYPE_CHECKING import pandas as pd -from SPARQLWrapper import SPARQLWrapper from courier.exceptions import ValidationError from courier.services.ontodocker._compat import make_dataframe @@ -109,21 +109,25 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame ------ ValidationError If `dataset`, `query`, or `columns` are invalid. - Exception - Any exceptions raised by SPARQLWrapper or result conversion. + ValueError + If the endpoint response cannot be decoded as JSON. + HttpError + If the underlying HTTP request fails. """ - if not query or not query.strip(): - raise ValidationError("query must be non-empty") - if not columns: + if ( + not isinstance(columns, list) + or not columns + or any( + not isinstance(column, str) or not column.strip() for column in columns + ) + ): raise ValidationError("columns must be a non-empty list of strings") - endpoint = self.endpoint(dataset) - - sparql = SPARQLWrapper(endpoint) - sparql.setReturnFormat("json") - if self.client.token: - sparql.addCustomHttpHeader("Authorization", f"Bearer {self.client.token}") - sparql.setQuery(query) + text = self.query_raw( + dataset, + query, + accept="application/sparql-results+json", + ) + result = json.loads(text) - result = sparql.queryAndConvert() return make_dataframe(result, columns) diff --git a/tests/unit/services/ontodocker/test_compat.py b/tests/unit/services/ontodocker/test_compat.py index b8a0141..d91cc02 100644 --- a/tests/unit/services/ontodocker/test_compat.py +++ b/tests/unit/services/ontodocker/test_compat.py @@ -147,11 +147,11 @@ def test_binding_key_order_does_not_affect_column_alignment(self): expected = pd.DataFrame([["1", "2"]], columns=["a", "b"]) pdt.assert_frame_equal(df, expected) - def test_missing_variable_in_binding_becomes_null(self): + def test_missing_variable_in_binding_yields_none(self): result = self._make_result( ["a", "b"], [{"a": {"value": "1"}}], # "b" is missing ) df = _compat.make_dataframe(result, ["a", "b"]) - self.assertEqual(df.iloc[0]["a"], "1") - self.assertTrue(pd.isna(df.iloc[0]["b"])) + expected = pd.DataFrame([["1", None]], columns=["a", "b"]) + pdt.assert_frame_equal(df, expected) diff --git a/tests/unit/test_ontodocker_client.py b/tests/unit/test_ontodocker_client.py index 4dc2c44..0fbe840 100644 --- a/tests/unit/test_ontodocker_client.py +++ b/tests/unit/test_ontodocker_client.py @@ -1,3 +1,4 @@ +import json import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -348,53 +349,121 @@ def test_query_df_validates_query_and_columns(self): s = _FakeSession() c = OntodockerClient("https://example.org", session=s) + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("", "SELECT ?a WHERE {}", columns=["a"]) + with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "", columns=["a"]) with self.assertRaises(ValidationError): _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=[]) - def test_query_df_uses_sparqlwrapper_and_adds_auth_header_when_token_set(self): + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", " "]) + + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", 1]) + + with self.assertRaises(ValidationError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",)) + + def test_query_df_raises_json_decode_error_on_invalid_json(self): s = _FakeSession() - c = OntodockerClient("https://example.org", token="abc", session=s) - wrappers: list[object] = [] - - class _FakeSparqlWrapper: - def __init__(self, endpoint: str): - self.endpoint = endpoint - self.headers: dict[str, str] = {} - self.query = None - self.return_format = None - wrappers.append(self) - - def setReturnFormat(self, fmt: str): - self.return_format = fmt - - def addCustomHttpHeader(self, key: str, value: str): - self.headers[key] = value - - def setQuery(self, query: str): - self.query = query - - def queryAndConvert(self): - return { - "results": { - "bindings": [ - {"a": {"value": "1"}, "b": {"value": "2"}}, - ] - } - } - - with mock.patch( - "courier.services.ontodocker.sparql.SPARQLWrapper", _FakeSparqlWrapper + s.response = _FakeResponse(text="not json", request=_FakeRequest("GET")) + c = OntodockerClient("https://example.org", session=s) + + with self.assertRaises(json.JSONDecodeError): + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a"]) + + def test_query_df_delegates_to_query_raw_with_accept_header(self): + s = _FakeSession() + c = OntodockerClient("https://example.org", session=s) + + with mock.patch.object( + c.sparql, + "query_raw", + return_value='{"results": {"bindings": [{"a": {"value": "1"}}]}}', + ) as query_raw: + df = c.sparql.query_df(" ds ", "SELECT ?a WHERE {}", columns=["a"]) + + query_raw.assert_called_once_with( + " ds ", + "SELECT ?a WHERE {}", + accept="application/sparql-results+json", + ) + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df.columns), ["a"]) + self.assertEqual(df.iloc[0].tolist(), ["1"]) + + def test_query_df_does_not_call_query_raw_when_columns_invalid(self): + s = _FakeSession() + c = OntodockerClient("https://example.org", session=s) + + with ( + mock.patch.object(c.sparql, "query_raw") as query_raw, + self.assertRaises(ValidationError), ): - df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) + _ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",)) + + query_raw.assert_not_called() + + def test_query_df_strips_dataset_and_maps_missing_bindings_to_none(self): + s = _FakeSession() + s.response = _FakeResponse( + text=( + '{"results": {"bindings": [' + '{"a": {"value": "1"}}, ' + '{"b": {"value": "2"}}' + "]}}" + ), + request=_FakeRequest("GET"), + ) + c = OntodockerClient("https://example.org", session=s) + + df = c.sparql.query_df(" ds ", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df.columns), ["a", "b"]) + self.assertEqual(df.iloc[0]["a"], "1") + self.assertTrue(pd.isna(df.iloc[0]["b"])) + self.assertTrue(pd.isna(df.iloc[1]["a"])) + self.assertEqual(df.iloc[1]["b"], "2") + self.assertEqual( + s.calls[0]["url"], + "https://example.org/api/v1/jena/ds/sparql", + ) + + def test_query_df_uses_get_with_query_param_and_accept_header(self): + s = _FakeSession() + s.response = _FakeResponse( + text=( + '{"results": {"bindings": ' + '[{"a": {"value": "1"}, "b": {"value": "2"}}]}}' + ), + request=_FakeRequest("GET"), + ) + c = OntodockerClient("https://example.org", token="abc", session=s) + + df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"]) self.assertIsInstance(df, pd.DataFrame) self.assertEqual(list(df.columns), ["a", "b"]) self.assertEqual(df.iloc[0].tolist(), ["1", "2"]) - self.assertEqual(len(wrappers), 1) - self.assertEqual(wrappers[0].headers["Authorization"], "Bearer abc") + self.assertEqual(s.calls[0]["method"], "GET") + self.assertEqual( + s.calls[0]["url"], + "https://example.org/api/v1/jena/ds/sparql", + ) + self.assertEqual( + s.calls[0]["params"], + {"query": "SELECT ?a ?b WHERE {}"}, + ) + self.assertEqual( + s.calls[0]["headers"], + {"Accept": "application/sparql-results+json"}, + ) + # token is handled by HttpClient via session headers, not per-request + self.assertEqual(s.headers.get("Authorization"), "Bearer abc") if __name__ == "__main__":