Skip to content

Commit 05b1089

Browse files
authored
Not declaring OpenAlex missing a DOI if 429'd (#1180)
1 parent 10b69ff commit 05b1089

File tree

6 files changed

+245
-28
lines changed

6 files changed

+245
-28
lines changed

src/paperqa/clients/openalex.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import json
43
import logging
54
import os
65
from collections.abc import Collection
@@ -91,14 +90,23 @@ async def get_doc_details_from_openalex(
9190

9291
if fields:
9392
params["select"] = ",".join(fields)
93+
# Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
94+
# and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
95+
# being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
96+
# even with up to 3 retries
9497
response = await client.get(
9598
url, params=params, timeout=OPENALEX_API_REQUEST_TIMEOUT
9699
)
97100
try:
98101
response.raise_for_status()
99102
response_data = response.json()
100-
except (httpx.HTTPStatusError, json.JSONDecodeError) as exc:
101-
raise DOINotFoundError("Could not find paper given DOI/title.") from exc
103+
except httpx.HTTPStatusError as exc:
104+
if response.status_code == httpx.codes.NOT_FOUND:
105+
raise DOINotFoundError(
106+
f"Could not find paper given DOI/title,"
107+
f" response text was {response.text!r}."
108+
) from exc
109+
raise # Can get 429'd by OpenAlex
102110

103111
if response_data.get("status") == "failed":
104112
raise DOINotFoundError("OpenAlex API returned a failed status for the query.")

tests/cassettes/test_does_openalex_work[not-in-openalex].yaml

Lines changed: 68 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/cassettes/test_does_openalex_work[not-oa-in-openalex].yaml

Lines changed: 67 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/cassettes/test_does_openalex_work[oa-in-openalex2].yaml

Lines changed: 68 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/test_clients.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -797,27 +797,33 @@ async def test_tricky_journal_quality_results(doi: str, score: int) -> None:
797797

798798
@pytest.mark.vcr
799799
@pytest.mark.parametrize(
800-
("doi", "oa"),
800+
("doi", "in_oa", "is_openaccess"),
801801
[
802-
("10.1021/acs.jctc.5b00178", True),
802+
pytest.param("10.1021/acs.jctc.5b00178", True, True, id="oa-in-openalex1"),
803+
pytest.param("10.1093/nar/gkw1164", True, True, id="oa-in-openalex2"),
804+
pytest.param("10.1002/wrna.1370", True, False, id="not-oa-in-openalex"),
805+
pytest.param(
806+
"10.1046/j.1365-2699.2003.00795", False, None, id="not-in-openalex"
807+
),
803808
],
804809
)
805810
@pytest.mark.asyncio
806-
async def test_does_openalex_work(doi: str, oa: bool) -> None:
811+
async def test_does_openalex_work(
812+
doi: str, in_oa: bool, is_openaccess: bool | None
813+
) -> None:
807814
"""Run a simple test of OpenAlex, which we primarily want for open access checks."""
808-
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
815+
async with httpx_aiohttp.HttpxAiohttpClient(timeout=10) as http_client:
809816
openalex_client = DocMetadataClient(
810-
http_client,
811-
metadata_clients=[OpenAlexProvider],
812-
)
813-
openalex_details = await openalex_client.query(
814-
doi=doi,
815-
fields=["open_access"],
816-
)
817-
assert openalex_details, "Failed to query OpenAlex"
818-
assert (
819-
openalex_details.other["open_access"]["is_oa"] is oa
820-
), "Open access data should match"
821-
assert (
822-
openalex_details.year is None
823-
), "Year should not be populated because we set fields"
817+
http_client, metadata_clients=[OpenAlexProvider]
818+
)
819+
openalex_details = await openalex_client.query(doi=doi, fields=["open_access"])
820+
if in_oa:
821+
assert openalex_details, "Failed to query OpenAlex"
822+
assert (
823+
openalex_details.other["open_access"]["is_oa"] == is_openaccess
824+
), "Open access data should match"
825+
assert (
826+
openalex_details.year is None
827+
), "Year should not be populated because we set fields"
828+
else:
829+
assert not openalex_details, "Should have failed"

0 commit comments

Comments
 (0)