Skip to content

Commit 1169f63

Browse files
authored
Retrying OpenAlex httpx.ReadTimeout and 500's (#1182)
1 parent 699e426 commit 1169f63

File tree

1 file changed

+34
-10
lines changed

1 file changed

+34
-10
lines changed

src/paperqa/clients/openalex.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
from urllib.parse import quote
1010

1111
import httpx
12+
from tenacity import (
13+
AsyncRetrying,
14+
before_sleep_log,
15+
retry_if_exception,
16+
stop_after_attempt,
17+
)
1218

1319
from paperqa.types import DocDetails
1420
from paperqa.utils import BIBTEX_MAPPING, mutate_acute_accents, strings_similarity
@@ -58,7 +64,7 @@ def get_openalex_api_key() -> str | None:
5864
return os.getenv("OPENALEX_API_KEY")
5965

6066

61-
async def get_doc_details_from_openalex(
67+
async def get_doc_details_from_openalex( # noqa: PLR0912
6268
client: httpx.AsyncClient,
6369
doi: str | None = None,
6470
title: str | None = None,
@@ -100,16 +106,33 @@ async def get_doc_details_from_openalex(
100106

101107
if fields:
102108
params["select"] = ",".join(fields)
103-
# Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
104-
# and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
105-
# being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
106-
# even with up to 3 retries
107-
response = await client.get(
108-
url, params=params, headers=headers, timeout=OPENALEX_API_REQUEST_TIMEOUT
109-
)
110109
try:
111-
response.raise_for_status()
112-
response_data = response.json()
110+
# Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
111+
# and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
112+
# being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
113+
# even with up to 3 retries
114+
async for attempt in AsyncRetrying(
115+
retry=retry_if_exception(
116+
lambda exc: (
117+
isinstance(exc, httpx.ReadTimeout)
118+
or (
119+
isinstance(exc, httpx.HTTPStatusError)
120+
and exc.response.status_code
121+
== httpx.codes.INTERNAL_SERVER_ERROR
122+
)
123+
)
124+
),
125+
before_sleep=before_sleep_log(logger, logging.WARNING),
126+
stop=stop_after_attempt(3),
127+
):
128+
with attempt:
129+
response = await client.get(
130+
url,
131+
params=params,
132+
headers=headers,
133+
timeout=OPENALEX_API_REQUEST_TIMEOUT,
134+
)
135+
response.raise_for_status()
113136
except httpx.HTTPStatusError as exc:
114137
if response.status_code == httpx.codes.NOT_FOUND:
115138
raise DOINotFoundError(
@@ -118,6 +141,7 @@ async def get_doc_details_from_openalex(
118141
) from exc
119142
raise # Can get 429'd by OpenAlex
120143

144+
response_data = response.json()
121145
if response_data.get("status") == "failed":
122146
raise DOINotFoundError("OpenAlex API returned a failed status for the query.")
123147

0 commit comments

Comments
 (0)