99from urllib .parse import quote
1010
1111import httpx
12+ from tenacity import (
13+ AsyncRetrying ,
14+ before_sleep_log ,
15+ retry_if_exception ,
16+ stop_after_attempt ,
17+ )
1218
1319from paperqa .types import DocDetails
1420from paperqa .utils import BIBTEX_MAPPING , mutate_acute_accents , strings_similarity
@@ -58,7 +64,7 @@ def get_openalex_api_key() -> str | None:
5864 return os .getenv ("OPENALEX_API_KEY" )
5965
6066
61- async def get_doc_details_from_openalex (
67+ async def get_doc_details_from_openalex ( # noqa: PLR0912
6268 client : httpx .AsyncClient ,
6369 doi : str | None = None ,
6470 title : str | None = None ,
@@ -100,16 +106,33 @@ async def get_doc_details_from_openalex(
100106
101107 if fields :
102108 params ["select" ] = "," .join (fields )
103- # Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
104- # and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
105- # being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
106- # even with up to 3 retries
107- response = await client .get (
108- url , params = params , headers = headers , timeout = OPENALEX_API_REQUEST_TIMEOUT
109- )
110109 try :
111- response .raise_for_status ()
112- response_data = response .json ()
110+ # Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
111+ # and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
112+ # being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
113+ # even with up to 3 retries
114+ async for attempt in AsyncRetrying (
115+ retry = retry_if_exception (
116+ lambda exc : (
117+ isinstance (exc , httpx .ReadTimeout )
118+ or (
119+ isinstance (exc , httpx .HTTPStatusError )
120+ and exc .response .status_code
121+ == httpx .codes .INTERNAL_SERVER_ERROR
122+ )
123+ )
124+ ),
125+ before_sleep = before_sleep_log (logger , logging .WARNING ),
126+ stop = stop_after_attempt (3 ),
127+ ):
128+ with attempt :
129+ response = await client .get (
130+ url ,
131+ params = params ,
132+ headers = headers ,
133+ timeout = OPENALEX_API_REQUEST_TIMEOUT ,
134+ )
135+ response .raise_for_status ()
113136 except httpx .HTTPStatusError as exc :
114137 if response .status_code == httpx .codes .NOT_FOUND :
115138 raise DOINotFoundError (
@@ -118,6 +141,7 @@ async def get_doc_details_from_openalex(
118141 ) from exc
119142 raise # Can get 429'd by OpenAlex
120143
144+ response_data = response .json ()
121145 if response_data .get ("status" ) == "failed" :
122146 raise DOINotFoundError ("OpenAlex API returned a failed status for the query." )
123147
0 commit comments