feat: integrate weaviate #117
GitHub Actions / JUnit Test Report
failed
May 28, 2025 in 0s
604 tests run, 590 passed, 13 skipped, 1 failed.
Annotations
Check failure on line 44 in packages/ragbits-document-search/tests/integration/test_docling.py
github-actions / JUnit Test Report
test_docling.test_docling_parser[PDFDocument]
assert 6 == 7
+ where 6 = len([TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='Figure 1: The Transformer - model architecture.', key='Figure 1: The Transformer - model architecture.', text_representation='Figure 1: The Transformer - model architecture.'), TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='The Transformer follows this overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, respectively.', key='The Transformer follows this overall architecture using sta...16`\x11\x9d\xe0\xf9\x8aO\xcb\xd7=M]\x9a\xe4i\xd7\xbe+{\x98gTh\x85\xca9\x89\xb6:,\n\xa7\r\x8d\xb9\xc8#\x19\xcd\x00t\xf0M\x1d\xcd\xbcs\xc4\xc1\xa3\x91C\xa3\x0e\xe0\x8c\x83RV~\x83o%\x9f\x87t\xcbi\x81\x12\xc3i\x14n\x0ff\x08\x01\xad\n\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00\xaf=\x9c73\xdb\xcd"\x9f6\xdd\xcb\xc6\xc0\xe0\x82A\x04}\x08=>\x9e\x95b\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x02\xbd\xf5\x95\xbe\xa3e5\x9d\xd2o\x82U\xda\xea\t\x07\x1e\xc4r\x0f\xbdQ\xb6\xf0\xfd\xb4\x17\xf0\xde\xcbqww<\x08R\x16\xb9\x97\x7f\x96\x0f\x04\x81\xea@\xc6z\xe3\xeah\xa2\x805\xaa\xbd\xe5\x947\xf6\xfeE\xc2\x96\x8bz\xb9\\\xe0\x1d\xa4\x10\x0f\xa8\xc8\x1cQE\x00X\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x03\xff\xd9', description=None, ocr_extracted_text='Figure 1: The Transformer - model architecture.', key='Extracted text: Figure 1: The Transformer - model architecture.', text_representation='Extracted text: Figure 1: The Transformer - model architecture.')])
Raw output
document_metadata = DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits...bits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source'))
expected_num_elements = 7
@pytest.mark.parametrize(
("document_metadata", "expected_num_elements"),
[
pytest.param(
DocumentMeta.from_literal("Name of Peppa's brother is George."),
1,
id="TextDocument",
),
pytest.param(
DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md"),
1,
id="MarkdownDocument",
),
pytest.param(
DocumentMeta.from_local_path(
Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png"
),
6,
id="ImageDocument",
),
pytest.param(
DocumentMeta.from_local_path(
Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf"
),
7,
id="PDFDocument",
),
],
)
async def test_docling_parser(document_metadata: DocumentMeta, expected_num_elements: int) -> None:
document = await document_metadata.fetch()
parser = DoclingDocumentParser()
elements = await parser.parse(document)
> assert len(elements) == expected_num_elements
E assert 6 == 7
E + where 6 = len([TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='Figure 1: The Transformer - model architecture.', key='Figure 1: The Transformer - model architecture.', text_representation='Figure 1: The Transformer - model architecture.'), TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='The Transformer follows this overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, respectively.', key='The Transformer follows this overall architecture using sta...16`\x11\x9d\xe0\xf9\x8aO\xcb\xd7=M]\x9a\xe4i\xd7\xbe+{\x98gTh\x85\xca9\x89\xb6:,\n\xa7\r\x8d\xb9\xc8#\x19\xcd\x00t\xf0M\x1d\xcd\xbcs\xc4\xc1\xa3\x91C\xa3\x0e\xe0\x8c\x83RV~\x83o%\x9f\x87t\xcbi\x81\x12\xc3i\x14n\x0ff\x08\x01\xad\n\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00\xaf=\x9c73\xdb\xcd"\x9f6\xdd\xcb\xc6\xc0\xe0\x82A\x04}\x08=>\x9e\x95b\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x02\xbd\xf5\x95\xbe\xa3e5\x9d\xd2o\x82U\xda\xea\t\x07\x1e\xc4r\x0f\xbdQ\xb6\xf0\xfd\xb4\x17\xf0\xde\xcbqww<\x08R\x16\xb9\x97\x7f\x96\x0f\x04\x81\xea@\xc6z\xe3\xeah\xa2\x805\xaa\xbd\xe5\x947\xf6\xfeE\xc2\x96\x8bz\xb9\\\xe0\x1d\xa4\x10\x0f\xa8\xc8\x1cQE\x00X\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x03\xff\xd9', description=None, ocr_extracted_text='Figure 1: The Transformer - model architecture.', key='Extracted text: Figure 1: The Transformer - model architecture.', text_representation='Extracted text: Figure 1: The Transformer - model architecture.')])
packages/ragbits-document-search/tests/integration/test_docling.py:44: AssertionError
Loading