Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions courier/services/ontodocker/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ def extract_dataset_names(sparql_endpoints: list[str]) -> list[str]:


def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame:
"""
Convert a SPARQL JSON result into a pandas DataFrame.
"""Convert a SPARQL JSON result into a pandas DataFrame.

Parameters
----------
Expand All @@ -135,5 +134,4 @@ def make_dataframe(result: dict, columns: list[str]) -> pd.DataFrame:
row.append(value.get("value") if isinstance(value, dict) else None)
rows.append(row)

df = pd.DataFrame(rows, columns=columns)
return df
return pd.DataFrame(rows, columns=columns)
32 changes: 18 additions & 14 deletions courier/services/ontodocker/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

from __future__ import annotations

import json
from dataclasses import dataclass
from typing import TYPE_CHECKING

import pandas as pd
from SPARQLWrapper import SPARQLWrapper

from courier.exceptions import ValidationError
from courier.services.ontodocker._compat import make_dataframe
Expand Down Expand Up @@ -109,21 +109,25 @@ def query_df(self, dataset: str, query: str, columns: list[str]) -> pd.DataFrame
------
ValidationError
If `dataset`, `query`, or `columns` are invalid.
Exception
Any exceptions raised by SPARQLWrapper or result conversion.
ValueError
If the endpoint response cannot be decoded as JSON.
HttpError
If the underlying HTTP request fails.
"""
if not query or not query.strip():
raise ValidationError("query must be non-empty")
if not columns:
if (
not isinstance(columns, list)
or not columns
or any(
not isinstance(column, str) or not column.strip() for column in columns
)
):
raise ValidationError("columns must be a non-empty list of strings")

endpoint = self.endpoint(dataset)

sparql = SPARQLWrapper(endpoint)
sparql.setReturnFormat("json")
if self.client.token:
sparql.addCustomHttpHeader("Authorization", f"Bearer {self.client.token}")
sparql.setQuery(query)
text = self.query_raw(
dataset,
query,
accept="application/sparql-results+json",
)
result = json.loads(text)

result = sparql.queryAndConvert()
return make_dataframe(result, columns)
6 changes: 3 additions & 3 deletions tests/unit/services/ontodocker/test_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,11 @@ def test_binding_key_order_does_not_affect_column_alignment(self):
expected = pd.DataFrame([["1", "2"]], columns=["a", "b"])
pdt.assert_frame_equal(df, expected)

def test_missing_variable_in_binding_becomes_null(self):
def test_missing_variable_in_binding_yields_none(self):
result = self._make_result(
["a", "b"],
[{"a": {"value": "1"}}], # "b" is missing
)
df = _compat.make_dataframe(result, ["a", "b"])
self.assertEqual(df.iloc[0]["a"], "1")
self.assertTrue(pd.isna(df.iloc[0]["b"]))
expected = pd.DataFrame([["1", None]], columns=["a", "b"])
pdt.assert_frame_equal(df, expected)
139 changes: 104 additions & 35 deletions tests/unit/test_ontodocker_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -348,53 +349,121 @@ def test_query_df_validates_query_and_columns(self):
s = _FakeSession()
c = OntodockerClient("https://example.org", session=s)

with self.assertRaises(ValidationError):
_ = c.sparql.query_df("", "SELECT ?a WHERE {}", columns=["a"])

with self.assertRaises(ValidationError):
_ = c.sparql.query_df("ds", "", columns=["a"])

with self.assertRaises(ValidationError):
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=[])

def test_query_df_uses_sparqlwrapper_and_adds_auth_header_when_token_set(self):
with self.assertRaises(ValidationError):
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", " "])

with self.assertRaises(ValidationError):
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a", 1])

with self.assertRaises(ValidationError):
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",))

def test_query_df_raises_json_decode_error_on_invalid_json(self):
s = _FakeSession()
c = OntodockerClient("https://example.org", token="abc", session=s)
wrappers: list[object] = []

class _FakeSparqlWrapper:
def __init__(self, endpoint: str):
self.endpoint = endpoint
self.headers: dict[str, str] = {}
self.query = None
self.return_format = None
wrappers.append(self)

def setReturnFormat(self, fmt: str):
self.return_format = fmt

def addCustomHttpHeader(self, key: str, value: str):
self.headers[key] = value

def setQuery(self, query: str):
self.query = query

def queryAndConvert(self):
return {
"results": {
"bindings": [
{"a": {"value": "1"}, "b": {"value": "2"}},
]
}
}

with mock.patch(
"courier.services.ontodocker.sparql.SPARQLWrapper", _FakeSparqlWrapper
s.response = _FakeResponse(text="not json", request=_FakeRequest("GET"))
c = OntodockerClient("https://example.org", session=s)

with self.assertRaises(json.JSONDecodeError):
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=["a"])

def test_query_df_delegates_to_query_raw_with_accept_header(self):
s = _FakeSession()
c = OntodockerClient("https://example.org", session=s)

with mock.patch.object(
c.sparql,
"query_raw",
return_value='{"results": {"bindings": [{"a": {"value": "1"}}]}}',
) as query_raw:
df = c.sparql.query_df(" ds ", "SELECT ?a WHERE {}", columns=["a"])

query_raw.assert_called_once_with(
" ds ",
"SELECT ?a WHERE {}",
accept="application/sparql-results+json",
)
self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(list(df.columns), ["a"])
self.assertEqual(df.iloc[0].tolist(), ["1"])

def test_query_df_does_not_call_query_raw_when_columns_invalid(self):
s = _FakeSession()
c = OntodockerClient("https://example.org", session=s)

with (
mock.patch.object(c.sparql, "query_raw") as query_raw,
self.assertRaises(ValidationError),
):
df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"])
_ = c.sparql.query_df("ds", "SELECT ?a WHERE {}", columns=("a",))

query_raw.assert_not_called()

def test_query_df_strips_dataset_and_maps_missing_bindings_to_none(self):
s = _FakeSession()
s.response = _FakeResponse(
text=(
'{"results": {"bindings": ['
'{"a": {"value": "1"}}, '
'{"b": {"value": "2"}}'
"]}}"
),
request=_FakeRequest("GET"),
)
c = OntodockerClient("https://example.org", session=s)

df = c.sparql.query_df(" ds ", "SELECT ?a ?b WHERE {}", columns=["a", "b"])

self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(list(df.columns), ["a", "b"])
self.assertEqual(df.iloc[0]["a"], "1")
self.assertTrue(pd.isna(df.iloc[0]["b"]))
self.assertTrue(pd.isna(df.iloc[1]["a"]))
self.assertEqual(df.iloc[1]["b"], "2")
self.assertEqual(
s.calls[0]["url"],
"https://example.org/api/v1/jena/ds/sparql",
)

def test_query_df_uses_get_with_query_param_and_accept_header(self):
s = _FakeSession()
s.response = _FakeResponse(
text=(
'{"results": {"bindings": '
'[{"a": {"value": "1"}, "b": {"value": "2"}}]}}'
),
request=_FakeRequest("GET"),
)
c = OntodockerClient("https://example.org", token="abc", session=s)

df = c.sparql.query_df("ds", "SELECT ?a ?b WHERE {}", columns=["a", "b"])

self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(list(df.columns), ["a", "b"])
self.assertEqual(df.iloc[0].tolist(), ["1", "2"])
self.assertEqual(len(wrappers), 1)
self.assertEqual(wrappers[0].headers["Authorization"], "Bearer abc")
self.assertEqual(s.calls[0]["method"], "GET")
self.assertEqual(
s.calls[0]["url"],
"https://example.org/api/v1/jena/ds/sparql",
)
self.assertEqual(
s.calls[0]["params"],
{"query": "SELECT ?a ?b WHERE {}"},
)
self.assertEqual(
s.calls[0]["headers"],
{"Accept": "application/sparql-results+json"},
)
# token is handled by HttpClient via session headers, not per-request
self.assertEqual(s.headers.get("Authorization"), "Bearer abc")


if __name__ == "__main__":
Expand Down
Loading