Skip to content

Commit f225911

Browse files
authored
Merge pull request #39 from Unstructured-IO/jj/log-retry
2 parents d5050ec + a7f5477 commit f225911

File tree

5 files changed

+60
-20
lines changed

5 files changed

+60
-20
lines changed
Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import os
21
import pytest
2+
import logging
3+
import re
34

45
import requests_mock
56

@@ -8,15 +9,11 @@
89
from unstructured_client.utils.retries import BackoffStrategy, RetryConfig
910

1011

11-
def get_api_key():
12-
api_key = os.getenv("UNS_API_KEY")
13-
if api_key is None:
14-
raise ValueError("""UNS_API_KEY environment variable not set.
15-
Set it in your current shell session with `export UNS_API_KEY=<api_key>`""")
16-
return api_key
12+
FAKE_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
1713

18-
# this test requires UNS_API_KEY be set in your shell session. Ex: `export UNS_API_KEY=<api_key>`
19-
def test_backoff_strategy():
14+
15+
def test_retry_with_backoff_does_retry(caplog):
16+
caplog.set_level(logging.INFO)
2017
filename = "README.md"
2118
backoff_strategy = BackoffStrategy(
2219
initial_interval=100, max_interval=1000, exponent=1.5, max_elapsed_time=3000
@@ -28,13 +25,10 @@ def test_backoff_strategy():
2825
with requests_mock.Mocker() as mock:
2926
# mock a 500 status code for POST requests to the api
3027
mock.post("https://api.unstructured.io/general/v0/general", status_code=500)
31-
session = UnstructuredClient(api_key_auth=get_api_key())
28+
session = UnstructuredClient(api_key_auth=FAKE_KEY)
3229

3330
with open(filename, "rb") as f:
34-
files=shared.Files(
35-
content=f.read(),
36-
file_name=filename,
37-
)
31+
files=shared.Files(content=f.read(), file_name=filename)
3832

3933
req = shared.PartitionParameters(files=files)
4034

@@ -45,3 +39,28 @@ def test_backoff_strategy():
4539

4640
# the number of retries varies
4741
assert len(mock.request_history) > 1
42+
43+
44+
def test_backoff_strategy_logs_retries(caplog):
45+
caplog.set_level(logging.INFO)
46+
filename = "README.md"
47+
backoff_strategy = BackoffStrategy(
48+
initial_interval=100, max_interval=1000, exponent=1.5, max_elapsed_time=3000
49+
)
50+
retries = RetryConfig(
51+
strategy="backoff", backoff=backoff_strategy, retry_connection_errors=True
52+
)
53+
54+
with requests_mock.Mocker() as mock:
55+
# mock a 500 status code for POST requests to the api
56+
mock.post("https://api.unstructured.io/general/v0/general", status_code=500)
57+
session = UnstructuredClient(api_key_auth=FAKE_KEY)
58+
59+
with open(filename, "rb") as f:
60+
files=shared.Files(content=f.read(), file_name=filename)
61+
62+
req = shared.PartitionParameters(files=files)
63+
with pytest.raises(Exception):
64+
session.general.partition(req, retries=retries)
65+
pattern = re.compile(f"{re.escape('Retry attempt #1. Sleeping')}.*{'seconds before retry'}")
66+
assert bool(pattern.search(caplog.text))

src/unstructured_client/general.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Any, List, Optional
55
from unstructured_client import utils
66
from unstructured_client.models import errors, operations, shared
7-
from unstructured_client.utils._decorators import suggest_defining_url_if_401 # human code
7+
from unstructured_client.utils._human_utils import suggest_defining_url_if_401 # human code
88

99
class General:
1010
sdk_configuration: SDKConfiguration

src/unstructured_client/sdk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Callable, Dict, Union
77
from unstructured_client import utils
88
from unstructured_client.models import shared
9-
from unstructured_client.utils._decorators import clean_server_url # human code
9+
from unstructured_client.utils._human_utils import clean_server_url # human code
1010

1111
class UnstructuredClient:
1212
r"""Unstructured Pipeline API: Partition documents with the Unstructured library"""

src/unstructured_client/utils/_decorators.py renamed to src/unstructured_client/utils/_human_utils.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
from __future__ import annotations
22

33
import functools
4-
from typing import cast, Callable, TYPE_CHECKING, Optional
4+
import logging
5+
import sys
6+
from typing import Callable, Optional, TYPE_CHECKING, cast
57
from typing_extensions import ParamSpec
6-
from urllib.parse import urlparse, urlunparse, ParseResult
8+
from urllib.parse import ParseResult, urlparse, urlunparse
79
import warnings
810

9-
from unstructured_client.models import errors, operations
10-
1111
if TYPE_CHECKING:
1212
from unstructured_client.general import General
13+
from unstructured_client.models import operations
1314

1415

1516
_P = ParamSpec("_P")
@@ -75,6 +76,8 @@ def suggest_defining_url_if_401(
7576

7677
@functools.wraps(func)
7778
def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> operations.PartitionResponse:
79+
from unstructured_client.models import errors # pylint: disable=C0415
80+
7881
try:
7982
return func(*args, **kwargs)
8083
except errors.SDKError as error:
@@ -88,3 +91,18 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> operations.PartitionResponse
8891
return func(*args, **kwargs)
8992

9093
return wrapper
94+
95+
96+
def log_retries(retry_count: int, sleep: float, exception: Exception):
97+
"""Function for logging retries to give users visibility into requests."""
98+
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s', stream=sys.stdout)
99+
logger = logging.getLogger('unstructured-client')
100+
logger.setLevel(logging.INFO)
101+
logger.info(
102+
"Response status code: %s Retry attempt #%s. Sleeping %s seconds before retry.",
103+
exception.response.status_code,
104+
retry_count,
105+
round(sleep, 1),
106+
)
107+
if bool(exception.response.text):
108+
logger.info(exception.response.text)

src/unstructured_client/utils/retries.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
import requests
88

9+
from unstructured_client.utils._human_utils import log_retries # human code
10+
911

1012
class BackoffStrategy:
1113
initial_interval: int
@@ -116,5 +118,6 @@ def retry_with_backoff(func, initial_interval=500, max_interval=60000, exponent=
116118
exponent**retries + random.uniform(0, 1))
117119
if sleep > max_interval/1000:
118120
sleep = max_interval/1000
121+
log_retries(retry_count=retries+1, sleep=sleep, exception=exception) # human code
119122
time.sleep(sleep)
120123
retries += 1

0 commit comments

Comments
 (0)