From c51e481c0b8d99052fb55641d23754708b776db8 Mon Sep 17 00:00:00 2001 From: Pierlou Date: Fri, 29 Aug 2025 14:58:10 +0200 Subject: [PATCH 1/2] refactor: switch to httpx --- README.md | 2 +- datagouv/base_object.py | 10 ++--- datagouv/client.py | 4 +- datagouv/resource.py | 6 +-- pyproject.toml | 6 +-- tests/conftest.py | 80 ++++++++++++++++++++------------------ tests/test_dataset.py | 23 ++++++----- tests/test_organization.py | 25 ++++++------ tests/test_resource.py | 26 +++++++------ 9 files changed, 97 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index 792c721..78dd907 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ for dat in organization.datasets(): print(f"{dat.title} has {len(dat.resources)} resources") ``` -> **Note:** If you encounter errors during API calls, the client will raise appropriate exceptions (e.g., `PermissionError` for authentication issues, `requests.exceptions.HTTPError` for API errors). +> **Note:** If you encounter errors during API calls, the client will raise appropriate exceptions (e.g., `PermissionError` for authentication issues, `httpx.HTTPError` for API errors). > **Note:** If you want to get objects from demo or dev, you must use a client: ```python diff --git a/datagouv/base_object.py b/datagouv/base_object.py index dcf8a7f..09ba89c 100755 --- a/datagouv/base_object.py +++ b/datagouv/base_object.py @@ -1,6 +1,6 @@ import logging -import requests +import httpx from .client import Client from .retry import simple_connection_retry @@ -34,7 +34,7 @@ def refresh(self, _from_response: dict | None = None) -> dict: return metadata @simple_connection_retry - def update(self, payload: dict) -> requests.Response: + def update(self, payload: dict) -> httpx.Response: assert_auth(self._client) logging.info(f"🔁 Putting {self.uri} with {payload}") r = self._client.session.put(self.uri, json=payload) @@ -43,7 +43,7 @@ def update(self, payload: dict) -> requests.Response: return r @simple_connection_retry - def delete(self) -> requests.Response: + def delete(self) -> httpx.Response: assert_auth(self._client) logging.info(f"🚮 Deleting {self.uri}") r = self._client.session.delete(self.uri) @@ -51,7 +51,7 @@ def delete(self) -> requests.Response: return r @simple_connection_retry - def update_extras(self, payload: dict) -> requests.Response: + def update_extras(self, payload: dict) -> httpx.Response: assert_auth(self._client) logging.info(f"🔁 Putting {self.uri} with extras {payload}") r = self._client.session.put(self.uri.replace("api/1", "api/2") + "extras/", json=payload) @@ -60,7 +60,7 @@ def update_extras(self, payload: dict) -> requests.Response: return r @simple_connection_retry - def delete_extras(self, payload: dict) -> requests.Response: + def delete_extras(self, payload: dict) -> httpx.Response: assert_auth(self._client) logging.info(f"🚮 Deleting extras {payload} for {self.uri}") r = self._client.session.delete( diff --git a/datagouv/client.py b/datagouv/client.py index 10a9b6a..b6123dd 100755 --- a/datagouv/client.py +++ b/datagouv/client.py @@ -1,6 +1,6 @@ from typing import Iterator -import requests +import httpx class Client: @@ -10,7 +10,7 @@ def __init__(self, environment: str = "www", api_key: str | None = None): if environment not in self._envs: raise ValueError(f"`environment` must be in {self._envs}") self.base_url = f"https://{environment}.data.gouv.fr" - self.session = requests.Session() + self.session = httpx.Client() self._authenticated = False if api_key: self._authenticated = True diff --git a/datagouv/resource.py b/datagouv/resource.py index 4d85c2c..31fd9a5 100755 --- a/datagouv/resource.py +++ b/datagouv/resource.py @@ -1,6 +1,6 @@ import logging -import requests +import httpx from .base_object import BaseObject, Creator, assert_auth from .client import Client @@ -79,10 +79,10 @@ def dataset(self): def download(self, path: str | None = None, chunk_size: int = 8192, **kwargs): if path is None: path = f"{self.id}.{self.format}" - with requests.get(self.url, stream=True, **kwargs) as r: + with httpx.stream("GET", self.url, **kwargs) as r: r.raise_for_status() with open(path, "wb") as f: - for chunk in r.iter_content(chunk_size=chunk_size): + for chunk in r.iter_raw(chunk_size=chunk_size): f.write(chunk) def get_api2_metadata(self) -> dict: diff --git a/pyproject.toml b/pyproject.toml index 7b1b1b8..85cf47c 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "0.1.4.dev" description = "Wrapper for the data.gouv.fr API" authors = [{ name = "Etalab", email = "opendatateam@data.gouv.fr" }] dependencies = [ - "requests>=2.32.4,<3", + "httpx>=0.28.1,<1", "tenacity>=9.0.0,<10", ] requires-python = ">=3.10,<3.14" @@ -14,8 +14,8 @@ keywords = ["api", "wrapper", "datagouv"] [project.optional-dependencies] dev = [ - "pytest==8.3.0", - "requests_mock==1.12.1", + "httpx>=0.28.1,<1", + "pytest-httpx>=0.35.0,<1", "bumpx>=0.3.10", "ruff>=0.11.2", ] diff --git a/tests/conftest.py b/tests/conftest.py index 9d3f56d..19f29d7 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ import json +import httpx # noqa import pytest -import requests_mock DATASET_ID = "0123456789abcdef01234567" RESOURCE_ID = "aaaaaaaa-1111-bbbb-2222-cccccccccccc" @@ -23,60 +23,66 @@ @pytest.fixture -def dataset_api_call(): - with requests_mock.Mocker() as m: - m.get(f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/", json=dataset_metadata) - yield m +def dataset_api_call(httpx_mock): + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/", + json=dataset_metadata, + is_reusable=True, + ) + yield httpx_mock @pytest.fixture -def static_resource_api1_call(): - with requests_mock.Mocker() as m: - m.get( - f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/", - json=resource_metadata_api1, - ) - yield m +def static_resource_api1_call(httpx_mock): + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/", + json=resource_metadata_api1, + is_reusable=True, + ) + yield httpx_mock @pytest.fixture -def remote_resource_api1_call(): +def remote_resource_api1_call(httpx_mock): remote_metadata = resource_metadata_api1 remote_metadata["filetype"] = "remote" remote_metadata["url"] = "https://example.com/file.csv" - with requests_mock.Mocker() as m: - m.get( - f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/", - json=remote_metadata, - ) - yield m + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/", + json=remote_metadata, + is_reusable=True, + ) + yield httpx_mock @pytest.fixture -def static_resource_api2_call(): - with requests_mock.Mocker() as m: - m.get( - f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/", - json=resource_metadata_api2, - ) - yield m +def static_resource_api2_call(httpx_mock): + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/", + json=resource_metadata_api2, + is_reusable=True, + ) + yield httpx_mock @pytest.fixture -def remote_resource_api2_call(): +def remote_resource_api2_call(httpx_mock): remote_metadata = resource_metadata_api2 remote_metadata["resource"]["filetype"] = "remote" remote_metadata["resource"]["url"] = "https://example.com/file.csv" - with requests_mock.Mocker() as m: - m.get( - f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/", - json=remote_metadata, - ) - yield m + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/", + json=remote_metadata, + is_reusable=True, + ) + yield httpx_mock @pytest.fixture -def organization_api_call(): - with requests_mock.Mocker() as m: - m.get(f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/", json=organization_metadata) - yield m +def organization_api_call(httpx_mock): + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/", + json=organization_metadata, + is_reusable=True, + ) + yield httpx_mock diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 680537b..f84256e 100755 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -2,8 +2,8 @@ import shutil from unittest.mock import patch +import httpx # noqa import pytest -import requests_mock from conftest import DATASET_ID, OWNER_ID, dataset_metadata from datagouv.base_object import BaseObject @@ -20,7 +20,7 @@ def test_dataset_instance(dataset_api_call): def test_dataset_attributes_and_methods(dataset_api_call): client = Client() d = client.dataset(DATASET_ID) - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: d_from_response = Dataset(dataset_metadata["id"], _from_response=dataset_metadata) # when instanciating from a response, we don't call the API another time mock_func.assert_not_called() @@ -66,7 +66,7 @@ def test_authentification_assertion(): def test_resources(): - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: d_from_response = Dataset(DATASET_ID, _from_response=dataset_metadata) assert len(d_from_response.resources) == len(dataset_metadata["resources"]) assert all(isinstance(r, Resource) for r in d_from_response.resources) @@ -75,21 +75,26 @@ def test_resources(): def test_dataset_no_fetch(): - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: d = Dataset(DATASET_ID, fetch=False) mock_func.assert_not_called() assert all(getattr(d, a, None) is None for a in Dataset._attributes) assert d.uri -def test_download_dataset_resources(dataset_api_call): +def test_download_dataset_resources(dataset_api_call, httpx_mock): d = Dataset(DATASET_ID) folder = "data_test" os.mkdir(folder) - with requests_mock.Mocker() as m: - for res in d.resources: - m.get(res.url, content=b"a,b,c\n1,2,3") - d.download_resources(folder=folder, resources_types=["main"]) + for res in d.resources: + # only mocking the resources we download, otherwise httpx raises + if res.type == "main": + httpx_mock.add_response( + url=res.url, + content=b"a,b,c\n1,2,3", + is_reusable=True, + ) + d.download_resources(folder=folder, resources_types=["main"]) assert len(os.listdir(folder)) == len([r for r in d.resources if r.type == "main"]) shutil.rmtree(folder) diff --git a/tests/test_organization.py b/tests/test_organization.py index 51244d9..901ceef 100755 --- a/tests/test_organization.py +++ b/tests/test_organization.py @@ -1,7 +1,7 @@ from unittest.mock import patch +import httpx # noqa import pytest -import requests_mock from conftest import ( DATAGOUV_URL, ORGANIZATION_ID, @@ -24,7 +24,7 @@ def test_organization_instance(organization_api_call): def test_organization_attributes_and_methods(organization_api_call): client = Client() o = client.organization(ORGANIZATION_ID) - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: o_from_response = Organization( organization_metadata["id"], _from_response=organization_metadata ) @@ -70,20 +70,19 @@ def test_authentification_assertion(): o_from_response.create_dataset({"title": "Titre", "owner": OWNER_ID}) -def test_datasets(): - with requests_mock.Mocker() as m: - m.get( - f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/datasets/", - json={"data": [dataset_metadata], "next_page": None}, - ) - o_from_response = Organization(ORGANIZATION_ID, _from_response=organization_metadata) - datasets = list(o_from_response.datasets()) - assert len(datasets) == 1 - assert isinstance(datasets[0], Dataset) +def test_datasets(httpx_mock): + httpx_mock.add_response( + url=f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/datasets/", + json={"data": [dataset_metadata], "next_page": None}, + ) + o_from_response = Organization(ORGANIZATION_ID, _from_response=organization_metadata) + datasets = list(o_from_response.datasets()) + assert len(datasets) == 1 + assert isinstance(datasets[0], Dataset) def test_organization_no_fetch(): - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: o = Organization(ORGANIZATION_ID, fetch=False) mock_func.assert_not_called() assert all(getattr(o, a, None) is None for a in Organization._attributes) diff --git a/tests/test_resource.py b/tests/test_resource.py index 02ccefa..00bdeba 100755 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -1,8 +1,8 @@ import os from unittest.mock import patch +import httpx # noqa import pytest -import requests_mock from conftest import DATASET_ID, RESOURCE_ID, resource_metadata_api1 from datagouv.base_object import BaseObject @@ -31,7 +31,7 @@ def test_remote_resource_instance_with_dataset_id(remote_resource_api1_call): def test_resource_attributes_and_methods(static_resource_api2_call): client = Client() r = client.resource(RESOURCE_ID) - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: r_from_response = Resource( RESOURCE_ID, dataset_id=DATASET_ID, _from_response=resource_metadata_api1 ) @@ -84,7 +84,7 @@ def test_upload_file_into_remote(remote_resource_api2_call): def test_resource_no_fetch(): # no fetch only if the dataset_id is given, otherwise we ping api/2 - with patch("requests.Session.get") as mock_func: + with patch("httpx.Client.get") as mock_func: r = Resource(RESOURCE_ID, DATASET_ID, fetch=False) mock_func.assert_not_called() assert all(getattr(r, a, None) is None for a in Resource._attributes) @@ -98,14 +98,16 @@ def test_resource_no_fetch(): None, ], ) -def test_resource_download(remote_resource_api1_call, file_name): +def test_resource_download(remote_resource_api1_call, file_name, httpx_mock): r = Client().resource(RESOURCE_ID, dataset_id=DATASET_ID) - with requests_mock.Mocker() as m: - m.get(r.url, content=b"a,b,c\n1,2,3") - r.download(file_name) - local_name = file_name or f"{r.id}.{r.format}" - with open(local_name, "r") as f: - rows = f.readlines() - assert rows[0] == "a,b,c\n" - assert rows[1] == "1,2,3" + httpx_mock.add_response( + url=r.url, + content=b"a,b,c\n1,2,3", + ) + r.download(file_name) + local_name = file_name or f"{r.id}.{r.format}" + with open(local_name, "r") as f: + rows = f.readlines() + assert rows[0] == "a,b,c\n" + assert rows[1] == "1,2,3" os.remove(local_name) From b0809bf3cb63f24a339fbaf65ba6ece0e00c9c6c Mon Sep 17 00:00:00 2001 From: Pierlou Date: Fri, 29 Aug 2025 14:59:33 +0200 Subject: [PATCH 2/2] docs: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b51afc4..4cdc723 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Add `preview_url` field to resources' attributes [#18](https://github.com/datagouv/csv-detective/pull/18) - Pass the organization's client to its datasets [#19](https://github.com/datagouv/csv-detective/pull/19) +- Switch to `httpx` [#21](https://github.com/datagouv/csv-detective/pull/21) ## 0.1.3 (2025-08-21)