From 45f274da64eef5a59dd33767bf3e2f2d190d373f Mon Sep 17 00:00:00 2001
From: essmaw <essmay.touami@ens.uvsq.fr>
Date: Wed, 7 Jan 2026 16:59:14 +0100
Subject: [PATCH 1/3] Apply suggested modifications from @pierrepo.

---
 models/{dataset_model.py => dataset.py} | 51 +++++++------------------
 models/{file_model.py => file.py}       | 38 +++++-------------
 2 files changed, 23 insertions(+), 66 deletions(-)
 rename models/{dataset_model.py => dataset.py} (86%)
 rename models/{file_model.py => file.py} (83%)

diff --git a/models/dataset_model.py b/models/dataset.py
similarity index 86%
rename from models/dataset_model.py
rename to models/dataset.py
index feaff05..eec8552 100644
--- a/models/dataset_model.py
+++ b/models/dataset.py
@@ -21,12 +21,7 @@
 
 from pydantic import BaseModel, Field, StringConstraints, field_validator
 
-from scripts.toolbox import (
-    DatasetProject,
-    DatasetRepository,
-    format_date,
-    validate_http_url,
-)
+from scripts.toolbox import DatasetProject, DatasetRepository, format_date
 
 DOI = Annotated[
     str,
@@ -39,7 +34,7 @@
 # =====================================================================
 # Base dataset class
 # =====================================================================
-class DatasetModel(BaseModel):
+class DatasetMetadata(BaseModel):
     """
     Base Pydantic model for scraped molecular dynamics datasets.
 
@@ -142,7 +137,7 @@ class DatasetModel(BaseModel):
             "Must start with '10.' and follow the standard DOI format."
         ),
     )
-    links: list[str] | None = Field(
+    external_links: list[str] | None = Field(
         None,
         description="External links to papers or other databases.",
     )
@@ -150,19 +145,19 @@ class DatasetModel(BaseModel):
     # ------------------------------------------------------------------
     # File-level metadata
     # ------------------------------------------------------------------
-    nb_files: int = Field(
-        ...,
+    nb_files: int | None = Field(
+        None,
         description="Total number of files in the dataset.",
     )
 
     # ------------------------------------------------------------------
     # Simulation metadata
     # ------------------------------------------------------------------
-    simulation_program_name: str | None = Field(
+    software_name: str | None = Field(
         None,
         description="Molecular dynamics engine used (e.g. GROMACS, NAMD).",
     )
-    simulation_program_version: str | None = Field(
+    software_version: str | None = Field(
         None,
         description="Version of the simulation engine.",
     )
@@ -182,15 +177,15 @@ class DatasetModel(BaseModel):
         None,
         description="Version of the forcefield model.",
     )
-    timestep: float | None = Field(
+    simulation_timestep: float | None = Field(
         None, description="The time interval between new positions computation (in fs)."
     )
-    delta: float | None = Field(
-        None, description="The time gap between frames (in ns)."
-    )
-    simulation_time: str | None = Field(
+    simulation_time: list[str] | None = Field(
         None, description="The accumulated simulation time (in μs)."
     )
+    simulation_temperature: list[str] | None = Field(
+        None, description="The temperature chosen for the simulations (in K ou °C)."
+    )
 
     # ------------------------------------------------------------------
     # Validators
@@ -215,28 +210,8 @@ def format_dates(cls, v: datetime | str) -> str:  # noqa: N805
         """
         return format_date(v)
 
-    # To uncomment if u won't take time to valid all the dataset urls
-    # @field_validator("url", mode="before")
-    def validate_url(cls, v: str) -> str:  # noqa: N805
-        """
-        Validate that the URL field is a properly formatted HTTP/HTTPS URL.
-
-        Parameters
-        ----------
-        cls : type[BaseDataset]
-            The Pydantic model class being validated.
-        v : str
-            The input value of the 'url' field to validate.
-
-        Returns
-        -------
-        str
-            The validated URL string.
-        """
-        return validate_http_url(v)
-
     @field_validator(
-        "description", "keywords", "links", "license", "author_names",
+        "description", "keywords", "external_links", "license", "author_names",
     "molecule_names", mode="before")
     def empty_to_none(cls, v: list | str) -> list | str | None:  # noqa: N805
         """
diff --git a/models/file_model.py b/models/file.py
similarity index 83%
rename from models/file_model.py
rename to models/file.py
index 7103d10..5844f10 100644
--- a/models/file_model.py
+++ b/models/file.py
@@ -20,17 +20,13 @@
 
 from pydantic import BaseModel, Field, computed_field, field_validator
 
-from scripts.toolbox import (
-    DatasetRepository,
-    format_date,
-    validate_http_url,
-)
+from scripts.toolbox import DatasetRepository, format_date
 
 
 # =====================================================================
 # Base file class
 # =====================================================================
-class FileModel(BaseModel):
+class FileMetadata(BaseModel):
     """
     Base Pydantic model for scraped molecular dynamics files.
 
@@ -55,6 +51,10 @@ class FileModel(BaseModel):
         ...,
         description="Unique identifier of the dataset in the source repository.",
     )
+    dataset_url_in_repository: str = Field(
+        ...,
+        description="Canonical URL to access the dataset in the repository.",
+    )
     file_url_in_repository: str = Field(
         ...,
         description="Direct URL to access the file.",
@@ -78,7 +78,7 @@ class FileModel(BaseModel):
     date_last_fetched: str = Field(
         ..., description="Date when the file was last fetched."
     )
-    containing_archive_fie_name: str | None = Field(
+    containing_archive_file_name: str | None = Field(
         None,
         description="Archive file name this file was extracted from, if applicable."
     )
@@ -104,29 +104,9 @@ def format_dates(cls, v: datetime | str) -> str:  # noqa: N805
         """
         return format_date(v)
 
-    # To uncomment if u won't take time to valid all the file urls
-    # @field_validator("file_url_in_repository", mode="before")
-    def valid_url(cls, v: str) -> str:  # noqa: N805
-        """
-        Validate that the URL field is a properly formatted HTTP/HTTPS URL.
-
-        Parameters
-        ----------
-        cls : type[NomadFiles]
-            The Pydantic model class being validated.
-        v : str
-            The input value of the 'url' field to validate.
-
-        Returns
-        -------
-        str
-            The validated URL string.
-        """
-        return validate_http_url(v)
-
     @computed_field
     @property
-    def file_size_with_readable_unit(self) -> str | None:
+    def file_size_with_human_readable_unit(self) -> str | None:
         """
         Convert the file size in bytes into a human-readable format.
 
@@ -143,3 +123,5 @@ def file_size_with_readable_unit(self) -> str | None:
                 size /= 1024
                 idx += 1
             return f"{size:.2f} {units[idx]}"
+        else:
+            return None

From 66b2b6b6e19e2a2c3ec30690ce9b809f207715ed Mon Sep 17 00:00:00 2001
From: essmaw <essmay.touami@etu.u-paris.fr>
Date: Thu, 8 Jan 2026 17:37:56 +0100
Subject: [PATCH 2/3] Rename dataset_repository fields to
 dataset_repository_name for clarity in DatasetMetadata model + Set dataset
 project name and id optional.

---
 models/dataset.py | 10 +++++-----
 models/file.py    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/models/dataset.py b/models/dataset.py
index eec8552..d7dd891 100644
--- a/models/dataset.py
+++ b/models/dataset.py
@@ -48,15 +48,15 @@ class DatasetMetadata(BaseModel):
     # ------------------------------------------------------------------
     # Core provenance
     # ------------------------------------------------------------------
-    dataset_repository: DatasetRepository = Field(
+    dataset_repository_name: DatasetRepository = Field(
         ...,
         description=(
             "Name of the source repository. "
             "Allowed values: ZENODO, FIGSHARE, OSF, NOMAD, ATLAS, GPCRMD."
         ),
     )
-    dataset_project: DatasetProject = Field(
-        ...,
+    dataset_project_name: DatasetProject | None = Field(
+        None,
         description=(
             "Name of the project."
             "Allowed values: ZENODO, FIGSHARE, OSF, NOMAD, ATLAS, GPCRMD."
@@ -66,8 +66,8 @@ class DatasetMetadata(BaseModel):
         ...,
         description="Unique identifier of the dataset in the source repository.",
     )
-    dataset_id_in_project: str = Field(
-        ...,
+    dataset_id_in_project: str | None = Field(
+        None,
         description="Unique identifier of the dataset in the project.",
     )
     dataset_url_in_repository: str = Field(
diff --git a/models/file.py b/models/file.py
index 5844f10..2f22781 100644
--- a/models/file.py
+++ b/models/file.py
@@ -40,7 +40,7 @@ class FileMetadata(BaseModel):
     # ------------------------------------------------------------------
     # Core provenance
     # ------------------------------------------------------------------
-    dataset_repository: DatasetRepository = Field(
+    dataset_repository_name: DatasetRepository = Field(
         ...,
         description=(
             "Name of the source repository. "

From 09c9843d5768d05c5159f502a0db1242bf89aa98 Mon Sep 17 00:00:00 2001
From: essmaw <essmay.touami@etu.u-paris.fr>
Date: Thu, 8 Jan 2026 19:00:37 +0100
Subject: [PATCH 3/3] Remove duplicata dataset_url_in_repository field from
 FileMetadata model.

---
 models/file.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/models/file.py b/models/file.py
index 2f22781..9eac32c 100644
--- a/models/file.py
+++ b/models/file.py
@@ -51,10 +51,6 @@ class FileMetadata(BaseModel):
         ...,
         description="Unique identifier of the dataset in the source repository.",
     )
-    dataset_url_in_repository: str = Field(
-        ...,
-        description="Canonical URL to access the dataset in the repository.",
-    )
     file_url_in_repository: str = Field(
         ...,
         description="Direct URL to access the file.",