CogStack · baixiac · Nov 28, 2025
diff --git a/app/config.py b/app/config.py
@@ -34,6 +34,7 @@ class Settings(BaseSettings):   # type: ignore
     TRAINING_METRICS_LOGGING_INTERVAL: int = 5        # the number of steps after which training metrics will be collected
     TRAINING_SAFE_MODEL_SERIALISATION: str = "false"  # if "true", serialise the trained model using safe tensors
     TRAINING_CACHE_DIR: str = os.path.join(os.path.abspath(os.path.dirname(__file__)), "cms_cache")           # the directory to cache the intermediate files created during training
+    TRAINING_HF_TAGGING_SCHEME: str = "flat"          # the tagging scheme during the Hugging Face NER model training, either "flat", "iob" or "iobes"
     HF_PIPELINE_AGGREGATION_STRATEGY: str = "simple"  # the strategy used for aggregating the predictions of the Hugging Face NER model
     LOG_PER_CONCEPT_ACCURACIES: str = "false"         # if "true", per-concept accuracies will be exposed to the metrics scrapper. Switch this on with caution due to the potentially high number of concepts
     MEDCAT2_MAPPED_ONTOLOGIES: str = ""               # the comma-separated names of ontologies for MedCAT2 to map to

diff --git a/app/domain.py b/app/domain.py
@@ -77,6 +77,12 @@ class Device(str, Enum):
     MPS = "mps"
 
 
+class TaggingScheme(str, Enum):
+    FLAT = "flat"
+    IOB = "iob"
+    IOBES = "iobes"
+
+
 class HfTransformerBackbone(Enum):
     ALBERT = "albert"
     BIG_BIRD = "bert"
@@ -110,20 +116,24 @@ class LlmEngine(Enum):
     CMS = "CMS"
     VLLM = "vLLM"
 
+
 class LlmRole(Enum):
     SYSTEM = "system"
     USER = "user"
     ASSISTANT = "assistant"
     TOOL = "tool"
 
+
 class LlmTrainerType(Enum):
     GRPO = "grpo"
     PPO = "ppo"
 
+
 class LlmDatasetType(Enum):
     JSON = "json"
     CSV = "csv"
 
+
 class Annotation(BaseModel):
     doc_name: Optional[str] = Field(default=None, description="The name of the document to which the annotation belongs")
     start: int = Field(description="The start index of the annotation span")

diff --git a/app/envs/.env b/app/envs/.env
@@ -73,6 +73,9 @@ TRAINING_SAFE_MODEL_SERIALISATION=false
 # The strategy used for aggregating the predictions of the Hugging Face NER model
 HF_PIPELINE_AGGREGATION_STRATEGY=simple
 
+# The tagging scheme during the Hugging Face NER model training, either "flat", "iob" or "iobes"
+TRAINING_HF_TAGGING_SCHEME=flat
+
 # The comma-separated names of ontologies for MedCAT2 to map to
 MEDCAT2_MAPPED_ONTOLOGIES=opcs4,icd10
 

diff --git a/app/model_services/huggingface_llm_model.py b/app/model_services/huggingface_llm_model.py
@@ -16,7 +16,7 @@
 from app.exception import ConfigurationException
 from app.model_services.base import AbstractModelService
 from app.trainers.huggingface_llm_trainer import HuggingFaceLlmSupervisedTrainer
-from app.domain import ModelCard, ModelType, Annotation
+from app.domain import ModelCard, ModelType, Annotation, Device
 from app.config import Settings
 from app.utils import (
     get_settings,
@@ -157,9 +157,19 @@ def load_model(
                         bnb_4bit_compute_dtype=torch.bfloat16,
                         bnb_4bit_use_double_quant=True,
                     )
-                    model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config)
+                    if get_settings().DEVICE == Device.DEFAULT.value:
+                        model = AutoModelForCausalLM.from_pretrained(
+                            model_path,
+                            quantization_config=bnb_config,
+                            device_map="auto",
+                        )
+                    else:
+                        model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config)
                 else:
-                    model = AutoModelForCausalLM.from_pretrained(model_path)
+                    if get_settings().DEVICE == Device.DEFAULT.value:
+                        model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
+                    else:
+                        model = AutoModelForCausalLM.from_pretrained(model_path)
                 ensure_tensor_contiguity(model)
                 tokenizer = AutoTokenizer.from_pretrained(
                     model_path,
@@ -242,8 +252,7 @@ def generate(
         self.model.eval()
 
         inputs = self.tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
-        if non_default_device_is_available(self._config.DEVICE):
-            inputs.to(get_settings().DEVICE)
+        inputs.to(self.model.device)
 
         generation_kwargs = dict(
             inputs=inputs.input_ids,
@@ -291,8 +300,7 @@ async def generate_async(
         self.model.eval()
 
         inputs = self.tokenizer(prompt, add_special_tokens=False, return_tensors="pt")
-        if non_default_device_is_available(self._config.DEVICE):
-            inputs.to(get_settings().DEVICE)
+        inputs.to(self.model.device)
 
         streamer = TextIteratorStreamer(
             self.tokenizer,
@@ -363,8 +371,7 @@ def create_embeddings(
             truncation=True,
         )
 
-        if non_default_device_is_available(self._config.DEVICE):
-            inputs.to(get_settings().DEVICE)
+        inputs.to(self.model.device)
 
         with torch.no_grad():
             outputs = self.model(**inputs, output_hidden_states=True)

diff --git a/app/model_services/huggingface_ner_model.py b/app/model_services/huggingface_ner_model.py
@@ -16,7 +16,7 @@
 from app.exception import ConfigurationException
 from app.model_services.base import AbstractModelService
 from app.trainers.huggingface_ner_trainer import HuggingFaceNerUnsupervisedTrainer, HuggingFaceNerSupervisedTrainer
-from app.domain import ModelCard, ModelType, Annotation
+from app.domain import ModelCard, ModelType, Annotation, Device, TaggingScheme
 from app.config import Settings
 from app.utils import (
     get_settings,
@@ -27,6 +27,7 @@
     get_model_data_package_base_name,
     load_pydantic_object_from_dict,
 )
+from app.processors.tagging import TagProcessor
 
 logger = logging.getLogger("cms")
 
@@ -41,7 +42,7 @@ def __init__(
         enable_trainer: Optional[bool] = None,
         model_name: Optional[str] = None,
         base_model_file: Optional[str] = None,
-        confidence_threshold: float = 0.5,
+        confidence_threshold: float = 0.7,
     ) -> None:
         """
         Initialises the HuggingFace NER model service with specified configurations.
@@ -52,7 +53,7 @@ def __init__(
             enable_trainer (Optional[bool]): The flag to enable or disable trainers. Defaults to None.
             model_name (Optional[str]): The name of the model. Defaults to None.
             base_model_file (Optional[str]): The model package file name. Defaults to None.
-            confidence_threshold (float): The threshold for the confidence score. Defaults to 0.5.
+            confidence_threshold (float): The threshold for the confidence score. Defaults to 0.7.
         """
 
         super().__init__(config)
@@ -123,19 +124,20 @@ def from_model(cls, model: PreTrainedModel, tokenizer: PreTrainedTokenizerBase)
             HuggingFaceNerModel: A HuggingFace NER model service.
         """
 
-        model_service = cls(get_settings(), enable_trainer=False)
+        _config = get_settings()
+        model_service = cls(_config, enable_trainer=False)
         model_service.model = model
         model_service.tokenizer = tokenizer
         _pipeline = partial(
             pipeline,
             task="ner",
             model=model_service.model,
             tokenizer=model_service.tokenizer,
-            stride=10,
-            aggregation_strategy=get_settings().HF_PIPELINE_AGGREGATION_STRATEGY,
+            stride=32,
+            aggregation_strategy=_config.HF_PIPELINE_AGGREGATION_STRATEGY,
         )
-        if non_default_device_is_available(get_settings().DEVICE):
-            model_service._ner_pipeline = _pipeline(device=get_hf_pipeline_device_id(get_settings().DEVICE))
+        if non_default_device_is_available(_config.DEVICE):
+            model_service._ner_pipeline = _pipeline(device=get_hf_pipeline_device_id(_config.DEVICE))
         else:
             model_service._ner_pipeline = _pipeline()
         return model_service
@@ -160,7 +162,10 @@ def load_model(model_file_path: str, *args: Tuple, **kwargs: Dict[str, Any]) ->
         model_path = os.path.join(os.path.dirname(model_file_path), get_model_data_package_base_name(model_file_path))
         if unpack_model_data_package(model_file_path, model_path):
             try:
-                model = AutoModelForTokenClassification.from_pretrained(model_path)
+                if get_settings().DEVICE == Device.DEFAULT.value:
+                    model = AutoModelForTokenClassification.from_pretrained(model_path, device_map="auto")
+                else:
+                    model = AutoModelForTokenClassification.from_pretrained(model_path)
                 ensure_tensor_contiguity(model)
                 tokenizer = AutoTokenizer.from_pretrained(
                     model_path,
@@ -197,7 +202,7 @@ def init_model(self, *args: Any, **kwargs: Any) -> None:
                 task="ner",
                 model=self._model,
                 tokenizer=self._tokenizer,
-                stride=10,
+                stride=32,
                 aggregation_strategy=self._config.HF_PIPELINE_AGGREGATION_STRATEGY,
             )
             if non_default_device_is_available(get_settings().DEVICE):
@@ -233,12 +238,29 @@ def annotate(self, text: str) -> List[Annotation]:
             List[Annotation]: A list of annotations containing the extracted named entities.
         """
 
-        entities = self._ner_pipeline(text)
+        if TaggingScheme(self._config.TRAINING_HF_TAGGING_SCHEME.lower()) == TaggingScheme.IOBES:
+            entities = self._ner_pipeline(text, aggregation_strategy="none")
+        else:
+            entities = self._ner_pipeline(text)
         df = pd.DataFrame(entities)
 
         if df.empty:
             columns = ["label_name", "label_id", "start", "end", "accuracy"]
             df = pd.DataFrame(columns=(columns + ["text"]) if self._config.INCLUDE_SPAN_TEXT == "true" else columns)
+        elif TaggingScheme(self._config.TRAINING_HF_TAGGING_SCHEME.lower()) == TaggingScheme.IOBES:
+            aggregated_entities = TagProcessor.aggregate_bioes_predictions(
+                df,
+                text,
+                self._config.INCLUDE_SPAN_TEXT == "true",
+            )
+            df = pd.DataFrame(aggregated_entities)
+            if df.empty:
+                columns = ["label_name", "label_id", "start", "end", "accuracy"]
+                df = pd.DataFrame(
+                    columns=(columns + ["text"]) if self._config.INCLUDE_SPAN_TEXT == "true" else columns
+                )
+            else:
+                df = df[df["accuracy"] >= self._confidence_threshold]
         else:
             for idx, row in df.iterrows():
                 df.loc[idx, "label_id"] = row["entity_group"]