Skip to content

feat: integrate weaviate #118

feat: integrate weaviate

feat: integrate weaviate #118

GitHub Actions / JUnit Test Report failed May 28, 2025 in 0s

604 tests run, 590 passed, 13 skipped, 1 failed.

Annotations

Check failure on line 44 in packages/ragbits-document-search/tests/integration/test_docling.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_docling.test_docling_parser[PDFDocument]

assert 6 == 7
 +  where 6 = len([TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='Figure 1: The Transformer - model architecture.', key='Figure 1: The Transformer - model architecture.', text_representation='Figure 1: The Transformer - model architecture.'), TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='The Transformer follows this overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, respectively.', key='The Transformer follows this overall architecture using sta...16`\x11\x9d\xe0\xf9\x8aO\xcb\xd7=M]\x9a\xe4i\xd7\xbe+{\x98gTh\x85\xca9\x89\xb6:,\n\xa7\r\x8d\xb9\xc8#\x19\xcd\x00t\xf0M\x1d\xcd\xbcs\xc4\xc1\xa3\x91C\xa3\x0e\xe0\x8c\x83RV~\x83o%\x9f\x87t\xcbi\x81\x12\xc3i\x14n\x0ff\x08\x01\xad\n\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00\xaf=\x9c73\xdb\xcd"\x9f6\xdd\xcb\xc6\xc0\xe0\x82A\x04}\x08=>\x9e\x95b\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x02\xbd\xf5\x95\xbe\xa3e5\x9d\xd2o\x82U\xda\xea\t\x07\x1e\xc4r\x0f\xbdQ\xb6\xf0\xfd\xb4\x17\xf0\xde\xcbqww<\x08R\x16\xb9\x97\x7f\x96\x0f\x04\x81\xea@\xc6z\xe3\xeah\xa2\x805\xaa\xbd\xe5\x947\xf6\xfeE\xc2\x96\x8bz\xb9\\\xe0\x1d\xa4\x10\x0f\xa8\xc8\x1cQE\x00X\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x03\xff\xd9', description=None, ocr_extracted_text='Figure 1: The Transformer - model architecture.', key='Extracted text: Figure 1: The Transformer - model architecture.', text_representation='Extracted text: Figure 1: The Transformer - model architecture.')])
Raw output
document_metadata = DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits...bits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source'))
expected_num_elements = 7

    @pytest.mark.parametrize(
        ("document_metadata", "expected_num_elements"),
        [
            pytest.param(
                DocumentMeta.from_literal("Name of Peppa's brother is George."),
                1,
                id="TextDocument",
            ),
            pytest.param(
                DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md"),
                1,
                id="MarkdownDocument",
            ),
            pytest.param(
                DocumentMeta.from_local_path(
                    Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png"
                ),
                6,
                id="ImageDocument",
            ),
            pytest.param(
                DocumentMeta.from_local_path(
                    Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf"
                ),
                7,
                id="PDFDocument",
            ),
        ],
    )
    async def test_docling_parser(document_metadata: DocumentMeta, expected_num_elements: int) -> None:
        document = await document_metadata.fetch()
        parser = DoclingDocumentParser()
    
        elements = await parser.parse(document)
    
>       assert len(elements) == expected_num_elements
E       assert 6 == 7
E        +  where 6 = len([TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='Figure 1: The Transformer - model architecture.', key='Figure 1: The Transformer - model architecture.', text_representation='Figure 1: The Transformer - model architecture.'), TextElement(element_type='text', document_meta=DocumentMeta(document_type=<DocumentType.PDF: 'pdf'>, source=LocalFileSource(path=PosixPath('/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf'), source_type='local_file_source')), location=ElementLocation(page_number=1, coordinates=None), score=None, content='The Transformer follows this overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, respectively.', key='The Transformer follows this overall architecture using sta...16`\x11\x9d\xe0\xf9\x8aO\xcb\xd7=M]\x9a\xe4i\xd7\xbe+{\x98gTh\x85\xca9\x89\xb6:,\n\xa7\r\x8d\xb9\xc8#\x19\xcd\x00t\xf0M\x1d\xcd\xbcs\xc4\xc1\xa3\x91C\xa3\x0e\xe0\x8c\x83RV~\x83o%\x9f\x87t\xcbi\x81\x12\xc3i\x14n\x0ff\x08\x01\xad\n\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00(\xa2\x8a\x00\xaf=\x9c73\xdb\xcd"\x9f6\xdd\xcb\xc6\xc0\xe0\x82A\x04}\x08=>\x9e\x95b\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x02\xbd\xf5\x95\xbe\xa3e5\x9d\xd2o\x82U\xda\xea\t\x07\x1e\xc4r\x0f\xbdQ\xb6\xf0\xfd\xb4\x17\xf0\xde\xcbqww<\x08R\x16\xb9\x97\x7f\x96\x0f\x04\x81\xea@\xc6z\xe3\xeah\xa2\x805\xaa\xbd\xe5\x947\xf6\xfeE\xc2\x96\x8bz\xb9\\\xe0\x1d\xa4\x10\x0f\xa8\xc8\x1cQE\x00X\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x00\xa2\x8a(\x03\xff\xd9', description=None, ocr_extracted_text='Figure 1: The Transformer - model architecture.', key='Extracted text: Figure 1: The Transformer - model architecture.', text_representation='Extracted text: Figure 1: The Transformer - model architecture.')])

packages/ragbits-document-search/tests/integration/test_docling.py:44: AssertionError