diff --git a/src/bluesearch/entrypoint/database/add.py b/src/bluesearch/entrypoint/database/add.py index 7055f2941..6cbde64d7 100644 --- a/src/bluesearch/entrypoint/database/add.py +++ b/src/bluesearch/entrypoint/database/add.py @@ -136,9 +136,14 @@ def run( } article_mappings.append(article_mapping) + # Integrate abstract into the sentences table + entire_text = [ + ("Abstract", "\n".join(article.abstract)), + *article.section_paragraphs, + ] + swapped = ( - (text, (section, ppos)) - for ppos, (section, text) in enumerate(article.section_paragraphs) + (text, (section, ppos)) for ppos, (section, text) in enumerate(entire_text) ) for doc, (section, ppos) in nlp.pipe(swapped, as_tuples=True): for spos, sent in enumerate(doc.sents): diff --git a/tests/unit/entrypoint/database/test_add.py b/tests/unit/entrypoint/database/test_add.py index 306915250..55cb4db9a 100644 --- a/tests/unit/entrypoint/database/test_add.py +++ b/tests/unit/entrypoint/database/test_add.py @@ -126,7 +126,7 @@ def test_no_sentences(tmp_path, engine_sqlite, monkeypatch, model_entities): article = Article( title="Title", authors=["Author"], - abstract="Abstract", + abstract=[], section_paragraphs=[], pubmed_id="PubMed ID", pmc_id="PMC ID",