diff --git a/courier/__init__.py b/courier/__init__.py index 390d878..30cf51b 100644 --- a/courier/__init__.py +++ b/courier/__init__.py @@ -8,25 +8,9 @@ __version__ = "0.0.0+unknown" from courier.http_client import HttpClient -from courier.ontodocker import ( - create_empty_dataset, - delete_dataset, - download_dataset_as_turtle_file, - extract_dataset_names, - get_all_dataset_sparql_endpoints, - rectify_endpoints, - upload_turtlefile, -) from courier.services.ontodocker import OntodockerClient __all__ = [ "HttpClient", "OntodockerClient", - "rectify_endpoints", - "get_all_dataset_sparql_endpoints", - "extract_dataset_names", - "download_dataset_as_turtle_file", - "create_empty_dataset", - "upload_turtlefile", - "delete_dataset", ] diff --git a/courier/ontodocker.py b/courier/ontodocker.py deleted file mode 100644 index 4607c01..0000000 --- a/courier/ontodocker.py +++ /dev/null @@ -1,252 +0,0 @@ -"""Legacy Ontodocker functional API (deprecated). - -This module used to provide a set of standalone functions for interacting with an -Ontodocker/Fuseki deployment. - -The preferred API is now :class:`courier.services.ontodocker.OntodockerClient`. -The legacy functions are kept for backwards compatibility and forward all -requests to the new client implementation. - -All functions in this module emit a :class:`DeprecationWarning`. -""" - -from __future__ import annotations - -import warnings -from pathlib import Path -from urllib.parse import urlsplit - -import pandas as pd - -from courier.services.ontodocker import OntodockerClient -from courier.services.ontodocker._compat import ( - extract_dataset_names, - make_dataframe, - parse_endpoints_response, - rectify_endpoints, -) - -__all__ = [ - "rectify_endpoints", - "get_all_dataset_sparql_endpoints", - "extract_dataset_names", - "download_dataset_as_turtle_file", - "create_empty_dataset", - "upload_turtlefile", - "delete_dataset", - "make_dataframe", - "send_query", -] - - -def _warn_deprecated(name: str) -> None: - warnings.warn( - f"courier.ontodocker.{name} is deprecated; use courier.services.ontodocker.OntodockerClient instead.", - DeprecationWarning, - stacklevel=2, - ) - - -def _validate_host_address(address: str) -> str: - # Preserve legacy behavior/documentation: address is expected without scheme. - if not address or not address.strip(): - raise ValueError( - "address must be a non-empty host (without scheme), e.g. 'ontodocker.example.org'" - ) - if "://" in address: - raise ValueError( - "address must not include a scheme (remove 'http://...' or 'https://...')" - ) - return address.strip() - - -def get_all_dataset_sparql_endpoints( - address: str, - token: str | None = None, - *, - timeout: tuple[int, int] = (5, 5), - verify: bool = True, - scheme: str = "https", - rectify: bool = True, -) -> list[str]: - _warn_deprecated("get_all_dataset_sparql_endpoints") - - address = _validate_host_address(address) - client = OntodockerClient( - address, - token=token, - default_scheme=scheme, - timeout=(float(timeout[0]), float(timeout[1])), - verify=verify, - ) - client.endpoints.rectify_legacy = rectify - return client.endpoints.list_raw() - - -def download_dataset_as_turtle_file( - address: str, - dataset_name: str, - *, - token: str | None = None, - turtlefile_name: str | None = None, - timeout: tuple[int, int] = (5, 5), - verify: bool = True, - scheme: str = "https", -) -> str: - _warn_deprecated("download_dataset_as_turtle_file") - - address = _validate_host_address(address) - - if not dataset_name or not dataset_name.strip(): - raise ValueError("dataset_name must be non-empty") - - if turtlefile_name is None: - turtlefile_name = str(Path.cwd() / f"{dataset_name.strip()}.ttl") - warnings.warn( - "No path/filename to save the turtle file to was explicitly" - f" provided. It is saved under {turtlefile_name}", - UserWarning, - stacklevel=2, - ) - - client = OntodockerClient( - address, - token=token, - default_scheme=scheme, - timeout=(float(timeout[0]), float(timeout[1])), - verify=verify, - ) - - # New API returns content; legacy returns filename. - _ = client.datasets.download_turtle(dataset_name.strip(), turtlefile_name) - return turtlefile_name - - -def create_empty_dataset( - address: str, - dataset_name: str, - *, - token: str | None = None, - timeout: tuple[int, int] = (5, 5), - verify: bool = True, - scheme: str = "https", -) -> str: - _warn_deprecated("create_empty_dataset") - - address = _validate_host_address(address) - - if not dataset_name or not dataset_name.strip(): - raise ValueError("dataset_name must be non-empty") - - client = OntodockerClient( - address, - token=token, - default_scheme=scheme, - timeout=(float(timeout[0]), float(timeout[1])), - verify=verify, - ) - return client.datasets.create(dataset_name.strip()) - - -def upload_turtlefile( - address: str, - dataset_name: str, - turtlefile: str | None = None, - *, - token: str | None = None, - timeout: tuple[int, int] = (5, 5), - verify: bool = True, - scheme: str = "https", -) -> str: - _warn_deprecated("upload_turtlefile") - - address = _validate_host_address(address) - - if not dataset_name or not dataset_name.strip(): - raise ValueError("dataset_name must be non-empty") - - if turtlefile is None or not turtlefile.strip(): - raise ValueError("A turtlefile must be provided.") - - client = OntodockerClient( - address, - token=token, - default_scheme=scheme, - timeout=(float(timeout[0]), float(timeout[1])), - verify=verify, - ) - return client.datasets.upload_turtlefile(dataset_name.strip(), turtlefile) - - -def delete_dataset( - address: str, - dataset_name: str, - *, - token: str | None = None, - timeout: tuple[int, int] = (5, 5), - verify: bool = True, - scheme: str = "https", -) -> str: - _warn_deprecated("delete_dataset") - - address = _validate_host_address(address) - - if not dataset_name or not dataset_name.strip(): - raise ValueError("dataset_name must be non-empty") - - client = OntodockerClient( - address, - token=token, - default_scheme=scheme, - timeout=(float(timeout[0]), float(timeout[1])), - verify=verify, - ) - return client.datasets.delete(dataset_name.strip()) - - -def send_query( - endpoint: str, - query: str, - columns: list[str] | None = None, - *, - token: str | None = None, - print_to_screen: bool = False, -) -> pd.DataFrame: - _warn_deprecated("send_query") - - if not endpoint or not endpoint.strip(): - raise ValueError("endpoint must be non-empty.") - - if not query or not query.strip(): - raise ValueError("query must be non-empty.") - - if columns is None: - raise ValueError("Please provide columns for the expected response.") - - endpoint = endpoint.strip() - if "://" not in endpoint: - raise ValueError( - "endpoint must include a URL scheme, e.g. 'https://example.org/api/v1/jena/ds/sparql'." - ) - - parts = urlsplit(endpoint) - base = f"{parts.scheme}://{parts.netloc}" - - # Reuse robust extraction logic from the new implementation. - dataset = extract_dataset_names([endpoint])[0] - - client = OntodockerClient(base, token=token) - result_df = client.sparql.query_df(dataset, query.strip(), columns=columns) - - if print_to_screen: - print(f'Sending query to "{endpoint}". Result:') - print(result_df) - print("") - - return result_df - - -# Backwards-compatible name for the parser. -# The old API implicitly parsed literal `list[str]` responses via `ast.literal_eval`. -# Keeping this import here makes it easy for users to migrate. -_parse_endpoints_response = parse_endpoints_response diff --git a/tests/unit/test_ontodocker_legacy_shim.py b/tests/unit/test_ontodocker_legacy_shim.py deleted file mode 100644 index dd300f3..0000000 --- a/tests/unit/test_ontodocker_legacy_shim.py +++ /dev/null @@ -1,110 +0,0 @@ -import unittest -import warnings -from pathlib import Path -from tempfile import TemporaryDirectory -from unittest import mock - -import pandas as pd - -import courier - - -class TestOntodockerLegacyShim(unittest.TestCase): - def test_legacy_functions_emit_deprecation_warning(self): - with ( - mock.patch("courier.ontodocker.OntodockerClient"), - self.assertWarns(DeprecationWarning), - ): - _ = courier.get_all_dataset_sparql_endpoints("example.org") - - def test_get_all_dataset_sparql_endpoints_delegates_and_passes_options(self): - fake_client = mock.Mock() - fake_client.endpoints.list_raw.return_value = [ - "https://example.org/api/v1/jena/ds/sparql" - ] - fake_client.endpoints.rectify_legacy = True - - with ( - mock.patch( - "courier.ontodocker.OntodockerClient", return_value=fake_client - ) as m, - self.assertWarns(DeprecationWarning), - ): - out = courier.get_all_dataset_sparql_endpoints( - "example.org", - token="abc", - timeout=(1, 2), - verify=False, - scheme="http", - rectify=False, - ) - - self.assertEqual(out, ["https://example.org/api/v1/jena/ds/sparql"]) - m.assert_called_once_with( - "example.org", - token="abc", - default_scheme="http", - timeout=(1.0, 2.0), - verify=False, - ) - self.assertFalse(fake_client.endpoints.rectify_legacy) - fake_client.endpoints.list_raw.assert_called_once_with() - - def test_download_dataset_as_turtle_file_returns_path_and_warns_on_default(self): - fake_client = mock.Mock() - fake_client.datasets.download_turtle.return_value = Path("unused.ttl") - - with ( - TemporaryDirectory() as tmpdir, - mock.patch("courier.ontodocker.OntodockerClient", return_value=fake_client), - mock.patch("courier.ontodocker.Path.cwd", return_value=Path(tmpdir)), - warnings.catch_warnings(record=True) as w, - ): - warnings.simplefilter("always") - out = courier.download_dataset_as_turtle_file("example.org", "ds") - - expected_path = Path(tmpdir) / "ds.ttl" - self.assertEqual( - [warning.category for warning in w], - [DeprecationWarning, UserWarning], - ) - self.assertIn("deprecated", str(w[0].message)) - self.assertIn(str(expected_path), str(w[1].message)) - self.assertEqual(out, str(expected_path)) - fake_client.datasets.download_turtle.assert_called_once_with( - "ds", str(expected_path) - ) - - def test_send_query_delegates_to_sparql_query_df(self): - fake_client = mock.Mock() - fake_client.sparql.query_df.return_value = pd.DataFrame([["1"]], columns=["a"]) - - with ( - mock.patch("courier.ontodocker.OntodockerClient", return_value=fake_client), - self.assertWarns(DeprecationWarning), - ): - df = courier.ontodocker.send_query( - "https://example.org/api/v1/jena/ds/sparql", - "SELECT ?a WHERE {}", - columns=["a"], - token="abc", - ) - - self.assertIsInstance(df, pd.DataFrame) - fake_client.sparql.query_df.assert_called_once_with( - "ds", - "SELECT ?a WHERE {}", - columns=["a"], - ) - - def test_send_query_requires_scheme_in_endpoint(self): - with self.assertRaisesRegex(ValueError, "must include a URL scheme"): - _ = courier.ontodocker.send_query( - "example.org/api/v1/jena/ds/sparql", - "SELECT ?a WHERE {}", - columns=["a"], - ) - - -if __name__ == "__main__": - unittest.main()