ClimateCompatibleGrowth · willu47 · Feb 4, 2026 · Jan 28, 2026 · Jan 30, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ exclude: '^docs/conf.py'
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v5.0.0
+  rev: v6.0.0
   hooks:
   - id: trailing-whitespace
   - id: check-added-large-files
@@ -18,26 +18,26 @@ repos:
     args: ['--fix=auto']  # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
 
 - repo: https://github.com/pycqa/isort
-  rev: 5.13.2
+  rev: 7.0.0
   hooks:
   - id: isort
     args: ["--profile", "black", "--filter-files"]
 
 - repo: https://github.com/psf/black
-  rev: 24.10.0
+  rev: 26.1.0
   hooks:
   - id: black
     language_version: python3
 
 - repo: https://github.com/PyCQA/flake8
-  rev: 7.1.1
+  rev: 7.3.0
   hooks:
   - id: flake8
   ## You can add flake8 plugins via `additional_dependencies`:
   #  additional_dependencies: [flake8-bugbear]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.13.0  # Use the sha / tag you want to point at
+    rev: v1.19.1  # Use the sha / tag you want to point at
     hooks:
     -   id: mypy
         additional_dependencies: ['types-requests']
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/readme.md b/readme.md
@@ -6,7 +6,7 @@ The package is not yet deployed to PyPI. Only an editable (development) install
 2. Clone the repository `git clonehttps://github.com/ClimateCompatibleGrowth/research_index_backend.git`
 3. Change directory `cd research_index_backend`
 4. Install the package `pip install -e .` as an editable package (development install)
-5. Obtain an OpenAIRE Graph refresh token and create a .env file with the following parameters: 
+5. Obtain an OpenAIRE Graph refresh token and create a .env file with the following parameters:
    ```MG_HOST=
       MG_PORT=
       MG_PORT_ALT=
@@ -30,7 +30,7 @@ The package is not yet deployed to PyPI. Only an editable (development) install
 
         research_index --help
         usage: research_index [-h] [-i] [-l LIMIT] [-u] list_of_dois
-        
+
         positional arguments:
           list_of_dois          Path to CSV file containing list of DOIs
 
@@ -39,7 +39,7 @@ The package is not yet deployed to PyPI. Only an editable (development) install
           -i, --initialise      Delete existing data and create new database
           -l, --limit N         Limit number of DOIs to process (default: 50)
           -u, --update-metadata Update metadata for existing DOIs
-          -w, --write-metadata  Save JSON responses to disk 
+          -w, --write-metadata  Save JSON responses to disk
 
         Examples:
           -> Process 10 DOIs from file:

diff --git a/src/research_index_backend/create_graph_from_doi.py b/src/research_index_backend/create_graph_from_doi.py
@@ -192,7 +192,9 @@ def upload_article_to_memgraph(output: AnonymousArticle) -> bool:
     return True
 
 
-def main(list_of_dois: list, limit: int, update_metadata: bool, write_metadata: bool):
+def main(
+    list_of_dois: list, limit: int, update_metadata: bool, write_metadata: bool
+):
     try:
         doi_manager = DOIManager(
             list_of_dois, limit=limit, update_metadata=update_metadata
@@ -343,8 +345,12 @@ def entry_point(db: Driver) -> None:
         logger.info("Deleted graph")
         load_initial_data(join("data", "init"))
 
-    doi_manager = main(list_of_dois, limit=args.limit, update_metadata=args.update_metadata,
-    write_metadata=args.write_metadata)
+    doi_manager = main(
+        list_of_dois,
+        limit=args.limit,
+        update_metadata=args.update_metadata,
+        write_metadata=args.write_metadata,
+    )
     add_country_relations()
     metrics, processed_dois = doi_manager.ingestion_metrics()
 
@@ -357,17 +363,18 @@ def entry_point(db: Driver) -> None:
         print(f"{key.ljust(max_key_length)} | {value}")
 
     print("\nProcessing Results:")
-    print(f"\n• Failed metadata DOIs ({metrics['metadata_failure']}):") 
-    for doi in processed_dois['metadata_failure']:
+    print(f"\n• Failed metadata DOIs ({metrics['metadata_failure']}):")
+    for doi in processed_dois["metadata_failure"]:
         print(f"  - {doi}")
-        
-    print(f"\n• Invalid pattern DOIs ({metrics['invalid_pattern_dois']}):") 
-    for doi in processed_dois['invalid_pattern_dois']:
+
+    print(f"\n• Invalid pattern DOIs ({metrics['invalid_pattern_dois']}):")
+    for doi in processed_dois["invalid_pattern_dois"]:
         print(f"  - {doi}")
-        
+
     print(f"\n• Duplicated Submissions ({metrics['duplicated_submissions']}):")
-    for doi in processed_dois['duplicated_submissions']:
+    for doi in processed_dois["duplicated_submissions"]:
         print(f"  - {doi}")
-
+
+
 if __name__ == "__main__":
     entry_point()
diff --git a/src/research_index_backend/doi.py b/src/research_index_backend/doi.py
@@ -11,13 +11,15 @@
 from collections import Counter
 from logging import getLogger
 from re import IGNORECASE, compile
-from typing import Dict, List
+from typing import Any, Dict, List, Tuple
 
 from neo4j import Driver
+
+# https://neo4j.com/docs/api/python-driver/current/api.html#errors
 from neo4j.exceptions import (
-    ServiceUnavailable,
     Neo4jError,
-)  # https://neo4j.com/docs/api/python-driver/current/api.html#errors
+    ServiceUnavailable,
+)
 from pydantic import BaseModel
 
 from .session import connect_to_db
@@ -38,16 +40,12 @@ class DOI(BaseModel):
     ingestion_success: bool = False
 
 
-class DOITracker(BaseModel):
-    doi_tracker: Dict[str, DOI]
-
-
 class DOIManager:
-    """Manages the validation and ingestion tracking of Digital Object Identifiers (DOIs).
+    """Tracks the validation and ingestion of Digital Object Identifiers (DOIs)
 
-    This class handles DOI validation, database existence checks, and metadata tracking.
-    It processes DOIs up to a specified limit and can optionally update metadata
-    for existing entries.
+    This class handles DOI validation, database existence checks, and metadata
+    tracking. It processes DOIs up to a specified limit and can optionally
+    update metadata for existing entries.
 
     Parameters
     ----------
@@ -89,7 +87,10 @@ class DOIManager:
     """
 
     def __init__(
-        self, list_of_dois: List[str], limit: int, update_metadata: bool = False
+        self,
+        list_of_dois: List[str],
+        limit: int,
+        update_metadata: bool = False,
     ) -> None:
 
         self._validate_inputs(list_of_dois, limit, update_metadata)
@@ -104,9 +105,10 @@ def __init__(
             limit if limit < len(self.list_of_dois) else len(self.list_of_dois)
         )
         self.update_metadata = update_metadata
-        self.doi_tracker: DOITracker = {
+        self.doi_tracker: Dict[str, DOI] = {
             doi: DOI(doi=doi) for doi in self.list_of_dois[: self.limit]
         }
+
         self.PATTERN = compile(DOI_PATTERN, IGNORECASE)
 
     def _validate_inputs(
@@ -184,26 +186,32 @@ def search_dois(self, db: Driver) -> None:
         self.num_existing_dois = len(self.existing_dois)
 
         logger.info(
-            f"Found {self.num_existing_dois} existing and {self.num_new_dois} new DOIs"
+            f"Found {self.num_existing_dois} existing and "
+            + "{self.num_new_dois} new DOIs"
         )
 
-    def validate_dois(self) -> Dict[str, List[str]]:
+    @connect_to_db
+    def validate_dois(self, db: Driver) -> Dict[str, DOI]:
         try:
             self.pattern_check()
-            self.search_dois()
+            self.search_dois(db)
             return self.doi_tracker
         except Exception as e:
             logger.error(f"DOI validation failed: {e}")
             raise
 
-    def ingestion_metrics(self) -> Dict[str, int]:
+    def ingestion_metrics(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         total_time = self.end_time - self.start_time
 
         processed_dois = (
             self.valid_pattern_dois if self.update_metadata else self.new_dois
         )
-
-        duplicated_submissions = [doi for doi, count in Counter(self.list_of_dois).items() if count > 1]
+
+        duplicated_submissions = [
+            doi
+            for doi, count in Counter(self.list_of_dois).items()
+            if count > 1
+        ]
 
         metadata_pass = [
             doi

diff --git a/tests/fixtures/authors.json b/tests/fixtures/authors.json
@@ -128,4 +128,4 @@
             "$": "Rogner, Holger"
         }
     ]
-}
+}
diff --git a/tests/fixtures/zenodo.json b/tests/fixtures/zenodo.json
@@ -682,4 +682,4 @@
         },
         "browseResults": null
     }
-}
+}
diff --git a/tests/test_dois.py b/tests/test_dois.py
@@ -1,4 +1,5 @@
 import pytest
+
 from research_index_backend.doi import DOIManager
 
 valid_dois = [
@@ -22,7 +23,7 @@
 raw_dois = [
     "10.1371/journal.pclm.0000331",
     "doi.org/10.5281/zenodo.11395843",
-    "doi.org/10.5281/zenodo.11396572", 
+    "doi.org/10.5281/zenodo.11396572",
     "10.5281/zenodo.11396370",
     "https://doi.org/10.5281/zenodo.11395518",
     "10.5281/zenodo.11395518.",
@@ -33,26 +34,33 @@
     "10.1371/journal.pclm.0000331",
     "10.5281/zenodo.11395843",
     "10.5281/zenodo.11396572",
-    "10.5281/zenodo.11396370", 
+    "10.5281/zenodo.11396370",
     "10.5281/zenodo.11395518",
     "10.5281/zenodo.11395518",
     "10.5281/zenodo.11395519",
 ]
 
+
 def test_valid_dois():
     """Test that valid DOI patterns are correctly identified."""
-    doi_manager = DOIManager(valid_dois, limit=len(valid_dois), update_metadata=False)
+    doi_manager = DOIManager(
+        valid_dois, limit=len(valid_dois), update_metadata=False
+    )
     doi_manager.pattern_check()
     for doi in doi_manager.doi_tracker:
         assert doi_manager.doi_tracker[doi].valid_pattern
 
+
 def test_invalid_dois():
     """Test that invalid DOI patterns are correctly identified."""
-    doi_manager = DOIManager(invalid_dois, limit=len(invalid_dois), update_metadata=False)
+    doi_manager = DOIManager(
+        invalid_dois, limit=len(invalid_dois), update_metadata=False
+    )
     doi_manager.pattern_check()
     for doi in doi_manager.doi_tracker:
         assert not doi_manager.doi_tracker[doi].valid_pattern
 
+
 def test_mixed_dois():
     """Test processing of mixed valid and invalid DOIs."""
     doi_manager = DOIManager(
@@ -61,58 +69,82 @@ def test_mixed_dois():
         update_metadata=False,
     )
     doi_manager.pattern_check()
-    valid_count = sum(1 for doi in doi_manager.doi_tracker.values() if doi.valid_pattern)
-    invalid_count = sum(1 for doi in doi_manager.doi_tracker.values() if not doi.valid_pattern)
-
+    valid_count = sum(
+        1 for doi in doi_manager.doi_tracker.values() if doi.valid_pattern
+    )
+    invalid_count = sum(
+        1 for doi in doi_manager.doi_tracker.values() if not doi.valid_pattern
+    )
+
     assert valid_count == len(valid_dois)
     assert invalid_count == len(invalid_dois)
 
+
 def test_doi_objects():
     """Test DOI object initialization and default values."""
-    doi_manager = DOIManager(valid_dois, limit=len(valid_dois), update_metadata=False)
+    doi_manager = DOIManager(
+        valid_dois, limit=len(valid_dois), update_metadata=False
+    )
     doi_manager.pattern_check()
-    
+
     for doi in doi_manager.doi_tracker:
         doi_obj = doi_manager.doi_tracker[doi]
         assert doi_obj.doi == doi, "DOI string mismatch"
         assert doi_obj.valid_pattern, "Pattern should be valid"
         assert not doi_obj.already_exists, "Should not exist by default"
-        assert not doi_obj.openalex_metadata, "Should not have OpenAlex metadata"
-        assert not doi_obj.openaire_metadata, "Should not have OpenAire metadata"
+        assert (
+            not doi_obj.openalex_metadata
+        ), "Should not have OpenAlex metadata"
+        assert (
+            not doi_obj.openaire_metadata
+        ), "Should not have OpenAire metadata"
         assert not doi_obj.ingestion_success, "Should not be ingested"
 
+
 def test_pattern_cleaner():
     """Test DOI pattern cleaning functionality."""
-    doi_manager = DOIManager(raw_dois, limit=len(raw_dois), update_metadata=False)
+    doi_manager = DOIManager(
+        raw_dois, limit=len(raw_dois), update_metadata=False
+    )
     assert doi_manager.list_of_dois == cleaned_dois, "DOI cleaning failed"
 
+
 def test_case_insensitive_pattern():
     """Test that DOI pattern matching is case insensitive."""
     doi_manager = DOIManager(
         ["10.5281/zenodo.8140241", "10.5281/ZENODO.8140241"],
         limit=2,
-        update_metadata=False
+        update_metadata=False,
     )
     doi_manager.pattern_check()
     assert all(doi.valid_pattern for doi in doi_manager.doi_tracker.values())
-
+
+
 def test_invalid_limit():
-    """Test that providing an invalid (negative) limit raises a ValueError."""
+    """Providing an invalid (negative) limit raises a ValueError"""
     with pytest.raises(ValueError):
         # Expect DOIManager to raise an error upon invalid limit input.
-        doi_manager = DOIManager(["10.5281/zenodo.8140241"], limit=-5, update_metadata=False)
+        doi_manager = DOIManager(
+            ["10.5281/zenodo.8140241"], limit=-5, update_metadata=False
+        )
         doi_manager.validate_dois()
 
+
 def test_wrong_type_for_doi_list():
-    """Test that providing a wrong type (non-iterable) for DOI list raises a TypeError."""
+    """Providing a wrong type (non-iterable) for DOI list raises a TypeError"""
     with pytest.raises(TypeError):
         # Passing a single string instead of a list should raise a TypeError.
         DOIManager("10.5281/zenodo.8140241", limit=1, update_metadata=False)
-
+
+
 def test_wrong_tyoe_for_update_metadata():
-    """Test that providing a wrong type for update_metadata raises a TypeError."""
+    """Providing a wrong type for update_metadata raises a TypeError"""
     with pytest.raises(TypeError):
         # Passing a string instead of a boolean should raise a TypeError.
-        DOIManager(["10.5281/zenodo.8140241"], limit=1, update_metadata="False")
-
-# TODO: should the elements of the list of DOIs be checked for type or this is handled in the entry point?
+        DOIManager(
+            ["10.5281/zenodo.8140241"], limit=1, update_metadata="False"
+        )
+
+
+# TODO: should the elements of the list of DOIs be checked for type
+# or this is handled in the entry point?
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of

		The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

		THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-Original file line number
+Diff line change
@@ Expand Up / @@ -128,4 +128,4 @@ @@
                 "$": "Rogner, Holger"
             }
         ]
-    }
+    }