aphp · percevalw · Sep 6, 2024 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -31,9 +31,9 @@ jobs:
 
     - name: Install dependencies
       run: |
-        pip install '.[docs]'
+        pip install . --group docs
 #        uv venv
-#        uv pip install '.[docs]'
+#        uv pip install . --group docs
 
     - name: Set up Git
       run: |

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -88,7 +88,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install '.[docs]'
+        pip install . --group docs
 
     - name: Set up Git
       run: |

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -61,16 +61,16 @@ jobs:
           cache: 'pip'
 
       - name: Install dependencies
-        run: pip install -e ".[dev]"
+        run: pip install -e . --group dev
         if: matrix.python-version != '3.9' && matrix.python-version != '3.10' && matrix.python-version != '3.11' && matrix.python-version != '3.12'
 
       - name: Install dependencies
-        run: pip install -e ".[dev,setup]"
+        run: pip install -e . --group dev --group setup
         if: matrix.python-version == '3.9'
 
       - name: Install dependencies
         # skip ML tests for 3.10 and 3.11
-        run: pip install -e ".[dev-no-ml]"
+        run: pip install -e . --group dev-no-ml
         if: matrix.python-version == '3.10' || matrix.python-version == '3.11' || matrix.python-version == '3.12'
 
       - name: Test with Pytest on Python ${{ matrix.python-version }}
@@ -118,7 +118,7 @@ jobs:
         cache: 'pip'
 
     - name: Install dependencies
-      run: pip install -e ".[docs]"
+      run: pip install -e . --group docs
 
     - name: Set up Git
       run: |

diff --git a/Makefile b/Makefile
diff --git a/changelog.md b/changelog.md
@@ -11,6 +11,10 @@
 - New `eds.explode` pipe that splits one document into multiple documents, one per span yielded by its `span_getter` parameter, each new document containing exactly that single span.
 - New `Training a span classifier` tutorial, and reorganized deep-learning docs
 - `ScheduledOptimizer` now warns when a parameter selector does not match any parameter.
+- New trainable `eds.relation_detector_ffn` component to detect relations between entities. These relations are stored in each entity: `head._.rel[relation_label] = [tail1, tail2, ...]`.
+- Load "Status" annotator notes as `status` dict attribute
+- New `attention` pooling mode in
+- Support different poolers for span embedding and inter-span embeddings in `eds.relation_detector_ffn`
 
 ## Fixed
 

diff --git a/contributing.md b/contributing.md
@@ -24,7 +24,7 @@ $ python -m venv venv
 $ source venv/bin/activate
 
 # Install the package with common, dev, setup dependencies in editable mode
-$ pip install -e '.[dev,setup]'
+$ pip install -e . --group dev --group setup
 # And build resources
 $ python scripts/conjugate_verbs.py
 ```
@@ -113,7 +113,7 @@ We use `MkDocs` for EDS-NLP's documentation. You can checkout the changes you ma
 
 ```console
 # Install the requirements
-$ pip install -e '.[docs]'
+$ pip install -e . --group docs
 ---> 100%
 color:green Installation successful
 

diff --git a/docs/tutorials/training-ner.md b/docs/tutorials/training-ner.md
@@ -41,7 +41,7 @@ dependencies = [
     "sentencepiece>=0.1.96"
 ]
 
-[project.optional-dependencies]
+[dependency-groups]
 dev = [
     "dvc>=2.37.0; python_version >= '3.8'",
     "pandas>=1.1.0,<2.0.0; python_version < '3.8'",
@@ -59,7 +59,7 @@ pip install uv
 # skip the next two lines if you do not want a venv
 uv venv .venv
 source .venv/bin/activate
-uv pip install -e ".[dev]" -p $(uv python find)
+uv pip install -e . --group dev -p $(uv python find)
 ```
 
 ## Training the model

diff --git a/docs/tutorials/training-span-classifier.md b/docs/tutorials/training-span-classifier.md
@@ -40,7 +40,7 @@ dependencies = [
     "sentencepiece>=0.1.96"
 ]
 
-[project.optional-dependencies]
+[dependency-groups]
 dev = [
     "dvc>=2.37.0; python_version >= '3.8'",
     "pandas>=1.4.0,<2.0.0; python_version >= '3.8'",
@@ -56,7 +56,7 @@ We recommend using a virtual environment and [uv](https://docs.astral.sh/uv/):
 pip install uv
 uv venv .venv
 source .venv/bin/activate
-uv pip install -e ".[dev]"
+uv pip install -e . --group dev
 ```
 
 ## Creating the dataset

diff --git a/docs/tutorials/tuning.md b/docs/tutorials/tuning.md
@@ -43,7 +43,7 @@ dependencies = [
     "configobj>=5.0.9",
 ]
 
-[project.optional-dependencies]
+[dependency-groups]
 dev = [
     "dvc>=2.37.0; python_version >= '3.8'",
     "pandas>=1.1.0,<2.0.0; python_version < '3.8'",
@@ -61,7 +61,7 @@ pip install uv
 # skip the next two lines if you do not want a venv
 uv venv .venv
 source .venv/bin/activate
-uv pip install -e ".[dev]" -p $(uv python find)
+uv pip install -e . --group dev -p $(uv python find)
 ```
 
 ## 2. Tuning a model

diff --git a/edsnlp/data/converters.py b/edsnlp/data/converters.py
@@ -243,76 +243,101 @@ def __init__(
 
     def __call__(self, obj, tokenizer=None):
         # tok = get_current_tokenizer() if self.tokenizer is None else self.tokenizer
-        tok = tokenizer or self.tokenizer or get_current_tokenizer()
-        doc = tok(obj["text"] or "")
-        doc._.note_id = obj.get("doc_id", obj.get(FILENAME))
-
-        spans = []
-
-        for dst in (
-            *(() if self.span_attributes is None else self.span_attributes.values()),
-            *self.default_attributes,
-        ):
-            if not Span.has_extension(dst):
-                Span.set_extension(dst, default=None)
-
-        for ent in obj.get("entities") or ():
-            fragments = (
-                [
-                    {
-                        "begin": min(f["begin"] for f in ent["fragments"]),
-                        "end": max(f["end"] for f in ent["fragments"]),
-                    }
-                ]
-                if not self.split_fragments
-                else ent["fragments"]
-            )
-            for fragment in fragments:
-                span = doc.char_span(
-                    fragment["begin"],
-                    fragment["end"],
-                    label=ent["label"],
-                    alignment_mode="expand",
-                )
-                attributes = (
-                    {a["label"]: a["value"] for a in ent["attributes"]}
-                    if isinstance(ent["attributes"], list)
-                    else ent["attributes"]
+        note_id = obj.get("doc_id", obj.get(FILENAME))
+        try:
+            tok = tokenizer or self.tokenizer or get_current_tokenizer()
+            doc = tok(obj["text"] or "")
+            doc._.note_id = note_id
+
+            entities = {}
+            spans = []
+
+            for dst in (
+                *(
+                    ()
+                    if self.span_attributes is None
+                    else self.span_attributes.values()
+                ),
+                *self.default_attributes,
+            ):
+                if not Span.has_extension(dst):
+                    Span.set_extension(dst, default=None)
+
+            for ent in obj.get("entities") or ():
+                fragments = (
+                    [
+                        {
+                            "begin": min(f["begin"] for f in ent["fragments"]),
+                            "end": max(f["end"] for f in ent["fragments"]),
+                        }
+                    ]
+                    if not self.split_fragments
+                    else ent["fragments"]
                 )
-                if self.notes_as_span_attribute and ent["notes"]:
-                    ent["attributes"][self.notes_as_span_attribute] = "|".join(
-                        note["value"] for note in ent["notes"]
+                for fragment in fragments:
+                    span = doc.char_span(
+                        fragment["begin"],
+                        fragment["end"],
+                        label=ent["label"],
+                        alignment_mode="expand",
                     )
-                for label, value in attributes.items():
-                    new_name = (
-                        self.span_attributes.get(label, None)
-                        if self.span_attributes is not None
-                        else label
+                    attributes = (
+                        {}
+                        if "attributes" not in ent
+                        else {a["label"]: a["value"] for a in ent["attributes"]}
+                        if isinstance(ent["attributes"], list)
+                        else ent["attributes"]
                     )
-                    if self.span_attributes is None and not Span.has_extension(
-                        new_name
-                    ):
-                        Span.set_extension(new_name, default=None)
-
-                    if new_name:
-                        value = True if value is None else value
-                        if not self.keep_raw_attribute_values:
-                            value = (
-                                True
-                                if value in ("True", "true")
-                                else False
-                                if value in ("False", "false")
-                                else value
-                            )
-                        span._.set(new_name, value)
-
-                spans.append(span)
+                    if self.notes_as_span_attribute and ent["notes"]:
+                        ent["attributes"][self.notes_as_span_attribute] = "|".join(
+                            note["value"] for note in ent["notes"]
+                        )
+                    for label, value in attributes.items():
+                        new_name = (
+                            self.span_attributes.get(label, None)
+                            if self.span_attributes is not None
+                            else label
+                        )
+                        if self.span_attributes is None and not Span.has_extension(
+                            new_name
+                        ):
+                            Span.set_extension(new_name, default=None)
+
+                        if new_name:
+                            value = True if value is None else value
+                            if not self.keep_raw_attribute_values:
+                                value = (
+                                    True
+                                    if value in ("True", "true")
+                                    else False
+                                    if value in ("False", "false")
+                                    else value
+                                )
+                            span._.set(new_name, value)
+
+                    entities.setdefault(ent["entity_id"], []).append(span)
+                    spans.append(span)
+
+            set_spans(doc, spans, span_setter=self.span_setter)
+            for attr, value in self.default_attributes.items():
+                for span in spans:
+                    if span._.get(attr) is None:
+                        span._.set(attr, value)
+
+            for relation in obj.get("relations", []):
+                relation_label = (
+                    relation["relation_label"]
+                    if "relation_label" in relation
+                    else relation["label"]
+                )
+                from_entity_id = relation["from_entity_id"]
+                to_entity_id = relation["to_entity_id"]
 
-        set_spans(doc, spans, span_setter=self.span_setter)
-        for attr, value in self.default_attributes.items():
-            for span in spans:
-                if span._.get(attr) is None:
-                    span._.set(attr, value)
+                for head in entities.get(from_entity_id, ()):
+                    for tail in entities.get(to_entity_id, ()):
+                        head._.rel.setdefault(relation_label, set()).add(tail)
+        except Exception:
+            raise ValueError(f"Error when processing {note_id}")
 
         return doc
 

diff --git a/edsnlp/data/standoff.py b/edsnlp/data/standoff.py
@@ -32,6 +32,7 @@
 REGEX_ATTRIBUTE = re.compile(r"^([AM]\d+)\t(.+?) ([TE]\d+)(?: (.+))?$")
 REGEX_EVENT = re.compile(r"^(E\d+)\t(.+)$")
 REGEX_EVENT_PART = re.compile(r"(\S+):([TE]\d+)")
+REGEX_STATUS = re.compile(r"^(#\d+)\tStatus ([^\t]+)\t(.*)$")
 
 
 class BratParsingError(ValueError):
@@ -71,6 +72,7 @@ def parse_standoff_file(
     entities = {}
     relations = []
     events = {}
+    doc = {}
 
     with fs.open(txt_path, "r", encoding="utf-8") as f:
         text = f.read()
@@ -178,6 +180,11 @@ def parse_standoff_file(
                             "arguments": arguments,
                         }
                     elif line.startswith("#"):
+                        match = REGEX_STATUS.match(line)
+                        if match:
+                            comment = match.group(3)
+                            doc["status"] = comment
+                            continue
                         match = REGEX_NOTE.match(line)
                         if match is None:
                             raise BratParsingError(ann_file, line)
@@ -201,6 +208,7 @@ def parse_standoff_file(
         "entities": list(entities.values()),
         "relations": relations,
         "events": list(events.values()),
+        **doc,
     }
 
 
@@ -260,19 +268,19 @@ def dump_standoff_file(
                                 )
                                 attribute_idx += 1
 
-                    # fmt: off
-                    # if "relations" in doc:
-                    #     for i, relation in enumerate(doc["relations"]):
-                    #         entity_from = entities_ids[relation["from_entity_id"]]
-                    #         entity_to = entities_ids[relation["to_entity_id"]]
-                    #         print(
-                    #             "R{}\t{} Arg1:{} Arg2:{}\t".format(
-                    #                 i + 1, str(relation["label"]), entity_from,
-                    #                 entity_to
-                    #             ),
-                    #             file=f,
-                    #         )
-                    # fmt: on
+            # fmt: off
+            if "relations" in doc:
+                for i, relation in enumerate(doc["relations"]):
+                    entity_from = entities_ids[relation["from_entity_id"]]
+                    entity_to = entities_ids[relation["to_entity_id"]]
+                    print(
+                        "R{}\t{} Arg1:{} Arg2:{}\t".format(
+                            i + 1, str(relation["label"]), entity_from,
+                            entity_to
+                        ),
+                        file=f,
+                    )
+            # fmt: on
 
 
 class StandoffReader(FileBasedReader):