From 4f68acff8028383adc0f84879bcdab171d5fbdb1 Mon Sep 17 00:00:00 2001 From: Emilie Delattre Date: Mon, 7 Mar 2022 17:44:01 +0100 Subject: [PATCH 1/2] Integrate abstract of articles into sentences table --- src/bluesearch/entrypoint/database/add.py | 5 ++++- tests/unit/entrypoint/database/test_add.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/bluesearch/entrypoint/database/add.py b/src/bluesearch/entrypoint/database/add.py index 7055f2941..dae2741c4 100644 --- a/src/bluesearch/entrypoint/database/add.py +++ b/src/bluesearch/entrypoint/database/add.py @@ -136,9 +136,12 @@ def run( } article_mappings.append(article_mapping) + # Integrate abstract into the sentences table + entire_text = [("Abstract", "\n".join(article.abstract)), *article.section_paragraphs] + swapped = ( (text, (section, ppos)) - for ppos, (section, text) in enumerate(article.section_paragraphs) + for ppos, (section, text) in enumerate(entire_text) ) for doc, (section, ppos) in nlp.pipe(swapped, as_tuples=True): for spos, sent in enumerate(doc.sents): diff --git a/tests/unit/entrypoint/database/test_add.py b/tests/unit/entrypoint/database/test_add.py index 306915250..55cb4db9a 100644 --- a/tests/unit/entrypoint/database/test_add.py +++ b/tests/unit/entrypoint/database/test_add.py @@ -126,7 +126,7 @@ def test_no_sentences(tmp_path, engine_sqlite, monkeypatch, model_entities): article = Article( title="Title", authors=["Author"], - abstract="Abstract", + abstract=[], section_paragraphs=[], pubmed_id="PubMed ID", pmc_id="PMC ID", From 96b34991e38c9c645cd7667d85ac7d97b5662afc Mon Sep 17 00:00:00 2001 From: Emilie Delattre Date: Mon, 7 Mar 2022 17:48:16 +0100 Subject: [PATCH 2/2] Fix linting --- src/bluesearch/entrypoint/database/add.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bluesearch/entrypoint/database/add.py b/src/bluesearch/entrypoint/database/add.py index dae2741c4..6cbde64d7 100644 --- a/src/bluesearch/entrypoint/database/add.py +++ b/src/bluesearch/entrypoint/database/add.py @@ -137,11 +137,13 @@ def run( article_mappings.append(article_mapping) # Integrate abstract into the sentences table - entire_text = [("Abstract", "\n".join(article.abstract)), *article.section_paragraphs] + entire_text = [ + ("Abstract", "\n".join(article.abstract)), + *article.section_paragraphs, + ] swapped = ( - (text, (section, ppos)) - for ppos, (section, text) in enumerate(entire_text) + (text, (section, ppos)) for ppos, (section, text) in enumerate(entire_text) ) for doc, (section, ppos) in nlp.pipe(swapped, as_tuples=True): for spos, sent in enumerate(doc.sents):