Skip to content

Commit c41845c

Browse files
authored
chore: remove SDK vlm integration tests (#301)
We have a number of integration tests that just hit SaaS with different vlm strategies. None of these are particular to testing the SDK layer, and just serve to slow us down. This is a common pattern in here, expect more test cleanup to come!
1 parent 3932384 commit c41845c

File tree

1 file changed

+0
-121
lines changed

1 file changed

+0
-121
lines changed

_test_unstructured_client/integration/test_integration.py

Lines changed: 0 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -221,127 +221,6 @@ async def call_api():
221221
assert len(elements) > 0
222222

223223

224-
@pytest.mark.parametrize("split_pdf", [True, False])
225-
@pytest.mark.parametrize("vlm_model", ["gpt-4o"])
226-
@pytest.mark.parametrize("vlm_model_provider", ["openai"])
227-
@pytest.mark.parametrize(
228-
"filename",
229-
[
230-
"layout-parser-paper-fast.pdf",
231-
"fake-power-point.ppt",
232-
"embedded-images-tables.jpg",
233-
]
234-
)
235-
def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
236-
with open(doc_path / filename, "rb") as f:
237-
files = shared.Files(
238-
content=f.read(),
239-
file_name=filename,
240-
)
241-
242-
req = operations.PartitionRequest(
243-
partition_parameters=shared.PartitionParameters(
244-
files=files,
245-
strategy="vlm",
246-
vlm_model=vlm_model,
247-
vlm_model_provider=vlm_model_provider,
248-
languages=["eng"],
249-
split_pdf_page=split_pdf,
250-
)
251-
)
252-
253-
response = client.general.partition(
254-
request=req
255-
)
256-
assert response.status_code == 200
257-
assert len(response.elements) > 0
258-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
259-
260-
261-
@pytest.mark.parametrize("split_pdf", [True, False])
262-
@pytest.mark.parametrize("vlm_model",
263-
[
264-
"us.amazon.nova-pro-v1:0",
265-
"us.amazon.nova-lite-v1:0",
266-
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
267-
"us.anthropic.claude-3-opus-20240229-v1:0",
268-
"us.anthropic.claude-3-haiku-20240307-v1:0",
269-
"us.anthropic.claude-3-sonnet-20240229-v1:0",
270-
"us.meta.llama3-2-90b-instruct-v1:0",
271-
"us.meta.llama3-2-11b-instruct-v1:0",
272-
]
273-
)
274-
@pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
275-
@pytest.mark.parametrize(
276-
"filename",
277-
[
278-
"layout-parser-paper-fast.pdf",
279-
"fake-power-point.ppt",
280-
"embedded-images-tables.jpg",
281-
]
282-
)
283-
def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
284-
with open(doc_path / filename, "rb") as f:
285-
files = shared.Files(
286-
content=f.read(),
287-
file_name=filename,
288-
)
289-
290-
req = operations.PartitionRequest(
291-
partition_parameters=shared.PartitionParameters(
292-
files=files,
293-
strategy="vlm",
294-
vlm_model=vlm_model,
295-
vlm_model_provider=vlm_model_provider,
296-
languages=["eng"],
297-
split_pdf_page=split_pdf,
298-
)
299-
)
300-
301-
response = client.general.partition(
302-
request=req
303-
)
304-
assert response.status_code == 200
305-
assert len(response.elements) > 0
306-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
307-
308-
@pytest.mark.parametrize("split_pdf", [True, False])
309-
@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
310-
@pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
311-
@pytest.mark.parametrize(
312-
"filename",
313-
[
314-
"layout-parser-paper-fast.pdf",
315-
"fake-power-point.ppt",
316-
"embedded-images-tables.jpg",
317-
]
318-
)
319-
def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
320-
with open(doc_path / filename, "rb") as f:
321-
files = shared.Files(
322-
content=f.read(),
323-
file_name=filename,
324-
)
325-
326-
req = operations.PartitionRequest(
327-
partition_parameters=shared.PartitionParameters(
328-
files=files,
329-
strategy="vlm",
330-
vlm_model=vlm_model,
331-
vlm_model_provider=vlm_model_provider,
332-
languages=["eng"],
333-
split_pdf_page=split_pdf,
334-
)
335-
)
336-
337-
response = client.general.partition(
338-
request=req
339-
)
340-
assert response.status_code == 200
341-
assert len(response.elements) > 0
342-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
343-
344-
345224
def test_returns_422_for_invalid_pdf(
346225
caplog: pytest.LogCaptureFixture,
347226
doc_path: Path,

0 commit comments

Comments
 (0)