Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

- Add `preview_url` field to resources' attributes [#18](https://github.com/datagouv/csv-detective/pull/18)
- Pass the organization's client to its datasets [#19](https://github.com/datagouv/csv-detective/pull/19)
- Switch to `httpx` [#21](https://github.com/datagouv/csv-detective/pull/21)

## 0.1.3 (2025-08-21)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ for dat in organization.datasets():
print(f"{dat.title} has {len(dat.resources)} resources")
```

> **Note:** If you encounter errors during API calls, the client will raise appropriate exceptions (e.g., `PermissionError` for authentication issues, `requests.exceptions.HTTPError` for API errors).
> **Note:** If you encounter errors during API calls, the client will raise appropriate exceptions (e.g., `PermissionError` for authentication issues, `httpx.HTTPError` for API errors).

> **Note:** If you want to get objects from demo or dev, you must use a client:
```python
Expand Down
10 changes: 5 additions & 5 deletions datagouv/base_object.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

import requests
import httpx

from .client import Client
from .retry import simple_connection_retry
Expand Down Expand Up @@ -34,7 +34,7 @@ def refresh(self, _from_response: dict | None = None) -> dict:
return metadata

@simple_connection_retry
def update(self, payload: dict) -> requests.Response:
def update(self, payload: dict) -> httpx.Response:
assert_auth(self._client)
logging.info(f"🔁 Putting {self.uri} with {payload}")
r = self._client.session.put(self.uri, json=payload)
Expand All @@ -43,15 +43,15 @@ def update(self, payload: dict) -> requests.Response:
return r

@simple_connection_retry
def delete(self) -> requests.Response:
def delete(self) -> httpx.Response:
assert_auth(self._client)
logging.info(f"🚮 Deleting {self.uri}")
r = self._client.session.delete(self.uri)
r.raise_for_status()
return r

@simple_connection_retry
def update_extras(self, payload: dict) -> requests.Response:
def update_extras(self, payload: dict) -> httpx.Response:
assert_auth(self._client)
logging.info(f"🔁 Putting {self.uri} with extras {payload}")
r = self._client.session.put(self.uri.replace("api/1", "api/2") + "extras/", json=payload)
Expand All @@ -60,7 +60,7 @@ def update_extras(self, payload: dict) -> requests.Response:
return r

@simple_connection_retry
def delete_extras(self, payload: dict) -> requests.Response:
def delete_extras(self, payload: dict) -> httpx.Response:
assert_auth(self._client)
logging.info(f"🚮 Deleting extras {payload} for {self.uri}")
r = self._client.session.delete(
Expand Down
4 changes: 2 additions & 2 deletions datagouv/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Iterator

import requests
import httpx


class Client:
Expand All @@ -10,7 +10,7 @@ def __init__(self, environment: str = "www", api_key: str | None = None):
if environment not in self._envs:
raise ValueError(f"`environment` must be in {self._envs}")
self.base_url = f"https://{environment}.data.gouv.fr"
self.session = requests.Session()
self.session = httpx.Client()
self._authenticated = False
if api_key:
self._authenticated = True
Expand Down
6 changes: 3 additions & 3 deletions datagouv/resource.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

import requests
import httpx

from .base_object import BaseObject, Creator, assert_auth
from .client import Client
Expand Down Expand Up @@ -79,10 +79,10 @@ def dataset(self):
def download(self, path: str | None = None, chunk_size: int = 8192, **kwargs):
if path is None:
path = f"{self.id}.{self.format}"
with requests.get(self.url, stream=True, **kwargs) as r:
with httpx.stream("GET", self.url, **kwargs) as r:
r.raise_for_status()
with open(path, "wb") as f:
for chunk in r.iter_content(chunk_size=chunk_size):
for chunk in r.iter_raw(chunk_size=chunk_size):
f.write(chunk)

def get_api2_metadata(self) -> dict:
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.4.dev"
description = "Wrapper for the data.gouv.fr API"
authors = [{ name = "Etalab", email = "opendatateam@data.gouv.fr" }]
dependencies = [
"requests>=2.32.4,<3",
"httpx>=0.28.1,<1",
"tenacity>=9.0.0,<10",
]
requires-python = ">=3.10,<3.14"
Expand All @@ -14,8 +14,8 @@ keywords = ["api", "wrapper", "datagouv"]

[project.optional-dependencies]
dev = [
"pytest==8.3.0",
"requests_mock==1.12.1",
"httpx>=0.28.1,<1",
"pytest-httpx>=0.35.0,<1",
"bumpx>=0.3.10",
"ruff>=0.11.2",
]
Expand Down
80 changes: 43 additions & 37 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json

import httpx # noqa
import pytest
import requests_mock

DATASET_ID = "0123456789abcdef01234567"
RESOURCE_ID = "aaaaaaaa-1111-bbbb-2222-cccccccccccc"
Expand All @@ -23,60 +23,66 @@


@pytest.fixture
def dataset_api_call():
with requests_mock.Mocker() as m:
m.get(f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/", json=dataset_metadata)
yield m
def dataset_api_call(httpx_mock):
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/",
json=dataset_metadata,
is_reusable=True,
)
yield httpx_mock


@pytest.fixture
def static_resource_api1_call():
with requests_mock.Mocker() as m:
m.get(
f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/",
json=resource_metadata_api1,
)
yield m
def static_resource_api1_call(httpx_mock):
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/",
json=resource_metadata_api1,
is_reusable=True,
)
yield httpx_mock


@pytest.fixture
def remote_resource_api1_call():
def remote_resource_api1_call(httpx_mock):
remote_metadata = resource_metadata_api1
remote_metadata["filetype"] = "remote"
remote_metadata["url"] = "https://example.com/file.csv"
with requests_mock.Mocker() as m:
m.get(
f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/",
json=remote_metadata,
)
yield m
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/1/datasets/{DATASET_ID}/resources/{RESOURCE_ID}/",
json=remote_metadata,
is_reusable=True,
)
yield httpx_mock


@pytest.fixture
def static_resource_api2_call():
with requests_mock.Mocker() as m:
m.get(
f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/",
json=resource_metadata_api2,
)
yield m
def static_resource_api2_call(httpx_mock):
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/",
json=resource_metadata_api2,
is_reusable=True,
)
yield httpx_mock


@pytest.fixture
def remote_resource_api2_call():
def remote_resource_api2_call(httpx_mock):
remote_metadata = resource_metadata_api2
remote_metadata["resource"]["filetype"] = "remote"
remote_metadata["resource"]["url"] = "https://example.com/file.csv"
with requests_mock.Mocker() as m:
m.get(
f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/",
json=remote_metadata,
)
yield m
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/2/datasets/resources/{RESOURCE_ID}/",
json=remote_metadata,
is_reusable=True,
)
yield httpx_mock


@pytest.fixture
def organization_api_call():
with requests_mock.Mocker() as m:
m.get(f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/", json=organization_metadata)
yield m
def organization_api_call(httpx_mock):
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/",
json=organization_metadata,
is_reusable=True,
)
yield httpx_mock
23 changes: 14 additions & 9 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import shutil
from unittest.mock import patch

import httpx # noqa
import pytest
import requests_mock
from conftest import DATASET_ID, OWNER_ID, dataset_metadata

from datagouv.base_object import BaseObject
Expand All @@ -20,7 +20,7 @@ def test_dataset_instance(dataset_api_call):
def test_dataset_attributes_and_methods(dataset_api_call):
client = Client()
d = client.dataset(DATASET_ID)
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
d_from_response = Dataset(dataset_metadata["id"], _from_response=dataset_metadata)
# when instanciating from a response, we don't call the API another time
mock_func.assert_not_called()
Expand Down Expand Up @@ -66,7 +66,7 @@ def test_authentification_assertion():


def test_resources():
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
d_from_response = Dataset(DATASET_ID, _from_response=dataset_metadata)
assert len(d_from_response.resources) == len(dataset_metadata["resources"])
assert all(isinstance(r, Resource) for r in d_from_response.resources)
Expand All @@ -75,21 +75,26 @@ def test_resources():


def test_dataset_no_fetch():
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
d = Dataset(DATASET_ID, fetch=False)
mock_func.assert_not_called()
assert all(getattr(d, a, None) is None for a in Dataset._attributes)
assert d.uri


def test_download_dataset_resources(dataset_api_call):
def test_download_dataset_resources(dataset_api_call, httpx_mock):
d = Dataset(DATASET_ID)
folder = "data_test"
os.mkdir(folder)
with requests_mock.Mocker() as m:
for res in d.resources:
m.get(res.url, content=b"a,b,c\n1,2,3")
d.download_resources(folder=folder, resources_types=["main"])
for res in d.resources:
# only mocking the resources we download, otherwise httpx raises
if res.type == "main":
httpx_mock.add_response(
url=res.url,
content=b"a,b,c\n1,2,3",
is_reusable=True,
)
d.download_resources(folder=folder, resources_types=["main"])
assert len(os.listdir(folder)) == len([r for r in d.resources if r.type == "main"])
shutil.rmtree(folder)

Expand Down
25 changes: 12 additions & 13 deletions tests/test_organization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest.mock import patch

import httpx # noqa
import pytest
import requests_mock
from conftest import (
DATAGOUV_URL,
ORGANIZATION_ID,
Expand All @@ -24,7 +24,7 @@ def test_organization_instance(organization_api_call):
def test_organization_attributes_and_methods(organization_api_call):
client = Client()
o = client.organization(ORGANIZATION_ID)
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
o_from_response = Organization(
organization_metadata["id"], _from_response=organization_metadata
)
Expand Down Expand Up @@ -70,20 +70,19 @@ def test_authentification_assertion():
o_from_response.create_dataset({"title": "Titre", "owner": OWNER_ID})


def test_datasets():
with requests_mock.Mocker() as m:
m.get(
f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/datasets/",
json={"data": [dataset_metadata], "next_page": None},
)
o_from_response = Organization(ORGANIZATION_ID, _from_response=organization_metadata)
datasets = list(o_from_response.datasets())
assert len(datasets) == 1
assert isinstance(datasets[0], Dataset)
def test_datasets(httpx_mock):
httpx_mock.add_response(
url=f"{DATAGOUV_URL}api/1/organizations/{ORGANIZATION_ID}/datasets/",
json={"data": [dataset_metadata], "next_page": None},
)
o_from_response = Organization(ORGANIZATION_ID, _from_response=organization_metadata)
datasets = list(o_from_response.datasets())
assert len(datasets) == 1
assert isinstance(datasets[0], Dataset)


def test_organization_no_fetch():
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
o = Organization(ORGANIZATION_ID, fetch=False)
mock_func.assert_not_called()
assert all(getattr(o, a, None) is None for a in Organization._attributes)
Expand Down
26 changes: 14 additions & 12 deletions tests/test_resource.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
from unittest.mock import patch

import httpx # noqa
import pytest
import requests_mock
from conftest import DATASET_ID, RESOURCE_ID, resource_metadata_api1

from datagouv.base_object import BaseObject
Expand Down Expand Up @@ -31,7 +31,7 @@ def test_remote_resource_instance_with_dataset_id(remote_resource_api1_call):
def test_resource_attributes_and_methods(static_resource_api2_call):
client = Client()
r = client.resource(RESOURCE_ID)
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
r_from_response = Resource(
RESOURCE_ID, dataset_id=DATASET_ID, _from_response=resource_metadata_api1
)
Expand Down Expand Up @@ -84,7 +84,7 @@ def test_upload_file_into_remote(remote_resource_api2_call):

def test_resource_no_fetch():
# no fetch only if the dataset_id is given, otherwise we ping api/2
with patch("requests.Session.get") as mock_func:
with patch("httpx.Client.get") as mock_func:
r = Resource(RESOURCE_ID, DATASET_ID, fetch=False)
mock_func.assert_not_called()
assert all(getattr(r, a, None) is None for a in Resource._attributes)
Expand All @@ -98,14 +98,16 @@ def test_resource_no_fetch():
None,
],
)
def test_resource_download(remote_resource_api1_call, file_name):
def test_resource_download(remote_resource_api1_call, file_name, httpx_mock):
r = Client().resource(RESOURCE_ID, dataset_id=DATASET_ID)
with requests_mock.Mocker() as m:
m.get(r.url, content=b"a,b,c\n1,2,3")
r.download(file_name)
local_name = file_name or f"{r.id}.{r.format}"
with open(local_name, "r") as f:
rows = f.readlines()
assert rows[0] == "a,b,c\n"
assert rows[1] == "1,2,3"
httpx_mock.add_response(
url=r.url,
content=b"a,b,c\n1,2,3",
)
r.download(file_name)
local_name = file_name or f"{r.id}.{r.format}"
with open(local_name, "r") as f:
rows = f.readlines()
assert rows[0] == "a,b,c\n"
assert rows[1] == "1,2,3"
os.remove(local_name)