@@ -221,127 +221,6 @@ async def call_api():
221
221
assert len (elements ) > 0
222
222
223
223
224
- @pytest .mark .parametrize ("split_pdf" , [True , False ])
225
- @pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
226
- @pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
227
- @pytest .mark .parametrize (
228
- "filename" ,
229
- [
230
- "layout-parser-paper-fast.pdf" ,
231
- "fake-power-point.ppt" ,
232
- "embedded-images-tables.jpg" ,
233
- ]
234
- )
235
- def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
236
- with open (doc_path / filename , "rb" ) as f :
237
- files = shared .Files (
238
- content = f .read (),
239
- file_name = filename ,
240
- )
241
-
242
- req = operations .PartitionRequest (
243
- partition_parameters = shared .PartitionParameters (
244
- files = files ,
245
- strategy = "vlm" ,
246
- vlm_model = vlm_model ,
247
- vlm_model_provider = vlm_model_provider ,
248
- languages = ["eng" ],
249
- split_pdf_page = split_pdf ,
250
- )
251
- )
252
-
253
- response = client .general .partition (
254
- request = req
255
- )
256
- assert response .status_code == 200
257
- assert len (response .elements ) > 0
258
- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
259
-
260
-
261
- @pytest .mark .parametrize ("split_pdf" , [True , False ])
262
- @pytest .mark .parametrize ("vlm_model" ,
263
- [
264
- "us.amazon.nova-pro-v1:0" ,
265
- "us.amazon.nova-lite-v1:0" ,
266
- "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
267
- "us.anthropic.claude-3-opus-20240229-v1:0" ,
268
- "us.anthropic.claude-3-haiku-20240307-v1:0" ,
269
- "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
270
- "us.meta.llama3-2-90b-instruct-v1:0" ,
271
- "us.meta.llama3-2-11b-instruct-v1:0" ,
272
- ]
273
- )
274
- @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
275
- @pytest .mark .parametrize (
276
- "filename" ,
277
- [
278
- "layout-parser-paper-fast.pdf" ,
279
- "fake-power-point.ppt" ,
280
- "embedded-images-tables.jpg" ,
281
- ]
282
- )
283
- def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
284
- with open (doc_path / filename , "rb" ) as f :
285
- files = shared .Files (
286
- content = f .read (),
287
- file_name = filename ,
288
- )
289
-
290
- req = operations .PartitionRequest (
291
- partition_parameters = shared .PartitionParameters (
292
- files = files ,
293
- strategy = "vlm" ,
294
- vlm_model = vlm_model ,
295
- vlm_model_provider = vlm_model_provider ,
296
- languages = ["eng" ],
297
- split_pdf_page = split_pdf ,
298
- )
299
- )
300
-
301
- response = client .general .partition (
302
- request = req
303
- )
304
- assert response .status_code == 200
305
- assert len (response .elements ) > 0
306
- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
307
-
308
- @pytest .mark .parametrize ("split_pdf" , [True , False ])
309
- @pytest .mark .parametrize ("vlm_model" , ["claude-3-5-sonnet-20241022" ,])
310
- @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
311
- @pytest .mark .parametrize (
312
- "filename" ,
313
- [
314
- "layout-parser-paper-fast.pdf" ,
315
- "fake-power-point.ppt" ,
316
- "embedded-images-tables.jpg" ,
317
- ]
318
- )
319
- def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
320
- with open (doc_path / filename , "rb" ) as f :
321
- files = shared .Files (
322
- content = f .read (),
323
- file_name = filename ,
324
- )
325
-
326
- req = operations .PartitionRequest (
327
- partition_parameters = shared .PartitionParameters (
328
- files = files ,
329
- strategy = "vlm" ,
330
- vlm_model = vlm_model ,
331
- vlm_model_provider = vlm_model_provider ,
332
- languages = ["eng" ],
333
- split_pdf_page = split_pdf ,
334
- )
335
- )
336
-
337
- response = client .general .partition (
338
- request = req
339
- )
340
- assert response .status_code == 200
341
- assert len (response .elements ) > 0
342
- assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
343
-
344
-
345
224
def test_returns_422_for_invalid_pdf (
346
225
caplog : pytest .LogCaptureFixture ,
347
226
doc_path : Path ,
0 commit comments