InseeFrLab
diff --git a/‎README.md‎
Lines changed: 0 additions & 12 deletions b/‎README.md‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎examples/multiclass_classification.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/multiclass_classification.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎notebooks/example.ipynb‎
Lines changed: 145 additions & 10 deletions b/‎notebooks/example.ipynb‎
Lines changed: 145 additions & 10 deletions
diff --git a/‎tests/test_base_classes.py‎
Lines changed: 12 additions & 0 deletions b/‎tests/test_base_classes.py‎
Lines changed: 12 additions & 0 deletions
@@ -114,19 +114,7 @@ classifier.build(X_train, y_train)
 # ... continue with training
 ```
 
-### Model Persistence
 
-```python
-# Save configuration
-classifier.to_json('model_config.json')
-
-# Load configuration (creates new instance)
-new_classifier = torchTextClassifiers.from_json('model_config.json')
-
-# You'll need to retrain the loaded classifier
-new_classifier.build(X_train, y_train)
-new_classifier.train(X_train, y_train, X_val, y_val, ...)
-```
 
 ## 🔧 Advanced Usage
 
 
@@ -134,7 +134,7 @@ def main():
     from torchTextClassifiers import torchTextClassifiers
     loaded_classifier = torchTextClassifiers.from_json('multiclass_classifier_config.json')
     print("✅ Configuration loaded successfully!")
-    print(f"Loaded classifier type: {loaded_classifier.classifier_type}")
+    print(f"Loaded classifier wrapper: {type(loaded_classifier.classifier_wrapper).__name__}")
     print(f"Loaded num_classes: {loaded_classifier.config.num_classes}")
 
     print("\n🎉 Multi-class example completed successfully!")
 
@@ -42,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "37c042fe",
    "metadata": {},
    "outputs": [],
@@ -70,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "92402df7",
    "metadata": {},
    "outputs": [],
@@ -217,7 +217,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "1fd02895",
    "metadata": {},
    "outputs": [
@@ -258,10 +258,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "61b0252e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'torchTextClassifiers'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtorchTextClassifiers\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mutilities\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocess\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m clean_text_feature\n\u001b[32m      2\u001b[39m df[\u001b[33m\"\u001b[39m\u001b[33mlibelle_processed\u001b[39m\u001b[33m\"\u001b[39m] = clean_text_feature(df[\u001b[33m\"\u001b[39m\u001b[33mlibelle\u001b[39m\u001b[33m\"\u001b[39m])\n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'torchTextClassifiers'"
+     ]
+    }
+   ],
    "source": [
     "from torchTextClassifiers.utilities.preprocess import clean_text_feature\n",
     "df[\"libelle_processed\"] = clean_text_feature(df[\"libelle\"])"
@@ -445,7 +457,9 @@
    "outputs": [],
    "source": [
     "model.to_json('torchTextClassifiers_config.json')\n",
-    "# model = create_fasttext.from_json('torchTextClassifiers_config.json')"
+    "# Loading from JSON now works with the new API:\n",
+    "# from torchTextClassifiers import torchTextClassifiers\n",
+    "# loaded_model = torchTextClassifiers.from_json('torchTextClassifiers_config.json')"
    ]
   },
   {
@@ -698,6 +712,75 @@
     "model.to_json('torchTextClassifiers_config.json')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9amb3ku6gim",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Demonstrate the new JSON loading approach\n",
+    "from torchTextClassifiers import torchTextClassifiers\n",
+    "\n",
+    "# Load model from JSON (works with new wrapper-based approach)\n",
+    "loaded_model = torchTextClassifiers.from_json('torchTextClassifiers_config.json')\n",
+    "\n",
+    "print(\"✅ Model loaded from JSON successfully!\")\n",
+    "print(f\"Loaded wrapper type: {type(loaded_model.classifier_wrapper).__name__}\")\n",
+    "print(f\"Config parameters: embedding_dim={loaded_model.config.embedding_dim}, sparse={loaded_model.config.sparse}\")\n",
+    "\n",
+    "# The loaded model needs to be built before use\n",
+    "# loaded_model.build(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7rq00g68p",
+   "metadata": {},
+   "source": [
+    "## New API Features\n",
+    "\n",
+    "The updated `torchTextClassifiers` API provides more flexibility by allowing users to:\n",
+    "\n",
+    "### 1. **Direct Wrapper Usage**\n",
+    "Create classifiers directly using wrapper classes, enabling custom implementations:\n",
+    "\n",
+    "```python\n",
+    "from torchTextClassifiers import torchTextClassifiers\n",
+    "from torchTextClassifiers.classifiers.fasttext.wrapper import FastTextWrapper\n",
+    "from torchTextClassifiers.classifiers.fasttext.core import FastTextConfig\n",
+    "\n",
+    "config = FastTextConfig(...)\n",
+    "wrapper = FastTextWrapper(config)\n",
+    "classifier = torchTextClassifiers(wrapper)\n",
+    "```\n",
+    "\n",
+    "### 2. **Convenience Functions (Backward Compatible)**\n",
+    "The familiar convenience functions still work:\n",
+    "\n",
+    "```python\n",
+    "from torchTextClassifiers import create_fasttext\n",
+    "classifier = create_fasttext(embedding_dim=50, sparse=False, ...)\n",
+    "```\n",
+    "\n",
+    "### 3. **Enhanced JSON Support**\n",
+    "Improved serialization/deserialization that works with custom wrapper classes:\n",
+    "\n",
+    "```python\n",
+    "# Save configuration\n",
+    "classifier.to_json('config.json')\n",
+    "\n",
+    "# Load configuration (automatically detects wrapper type)\n",
+    "loaded_classifier = torchTextClassifiers.from_json('config.json')\n",
+    "\n",
+    "# Or specify wrapper class explicitly\n",
+    "loaded_classifier = torchTextClassifiers.from_json('config.json', FastTextWrapper)\n",
+    "```\n",
+    "\n",
+    "### 4. **Custom Classifier Support**\n",
+    "Users can now easily create their own classifier wrappers by inheriting from `BaseClassifierWrapper` and implementing the required methods."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "017f8d12-0be8-45df-a0e4-80919c89db2d",
@@ -713,10 +796,61 @@
     "where one can first build the tokenizer, and then build the model with\n",
     "custom architecture parameters.\n",
     "\n",
+    "**Note**: With the new API, you can also create classifiers directly using wrapper classes:\n",
+    "\n",
+    "```python\n",
+    "from torchTextClassifiers import torchTextClassifiers\n",
+    "from torchTextClassifiers.classifiers.fasttext.wrapper import FastTextWrapper\n",
+    "from torchTextClassifiers.classifiers.fasttext.core import FastTextConfig\n",
+    "\n",
+    "config = FastTextConfig(embedding_dim=50, sparse=False, ...)\n",
+    "wrapper = FastTextWrapper(config)\n",
+    "classifier = torchTextClassifiers(wrapper)\n",
+    "```\n",
+    "\n",
     "The tokenizer can be loaded **from the same JSON file** as the model\n",
     "parameters, or initialized using the right arguments."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "g0rmedya9eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example of the new direct wrapper approach\n",
+    "from torchTextClassifiers import torchTextClassifiers\n",
+    "from torchTextClassifiers.classifiers.fasttext.wrapper import FastTextWrapper\n",
+    "from torchTextClassifiers.classifiers.fasttext.core import FastTextConfig\n",
+    "\n",
+    "# Create configuration\n",
+    "config = FastTextConfig(\n",
+    "    embedding_dim=50,\n",
+    "    sparse=False,\n",
+    "    num_tokens=100000,\n",
+    "    min_count=1,\n",
+    "    min_n=3,\n",
+    "    max_n=6,\n",
+    "    len_word_ngrams=3,\n",
+    "    categorical_embedding_dims=10,\n",
+    "    num_classes=NUM_CLASSES,\n",
+    "    num_categorical_features=NUM_CAT_VAR,\n",
+    "    categorical_vocabulary_sizes=CAT_VOCAB_SIZE\n",
+    ")\n",
+    "\n",
+    "# Create wrapper and classifier\n",
+    "wrapper = FastTextWrapper(config)\n",
+    "direct_model = torchTextClassifiers(wrapper)\n",
+    "\n",
+    "# Build the model\n",
+    "direct_model.build(X_train, y_train, lightning=True, lr=parameters_train.get(\"lr\"))\n",
+    "\n",
+    "print(\"✅ Direct wrapper model created successfully!\")\n",
+    "print(f\"Model type: {type(direct_model.classifier_wrapper).__name__}\")\n",
+    "print(f\"Config type: {type(direct_model.config).__name__}\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 18,
@@ -1064,7 +1198,9 @@
    "id": "f84e6bff-8fa7-4896-b60a-005ae5f1d3eb",
    "metadata": {},
    "source": [
-    "# Explainability"
+    "# Explainability\n",
+    "\n",
+    "The `torchTextClassifiers` framework provides explainability features through the `predict_and_explain` method. This allows you to understand which parts of the input text contribute most to the model's predictions."
    ]
   },
   {
@@ -1093,10 +1229,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python3",
-   "path": "/opt/conda/share/jupyter/kernels/python3"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
 
@@ -88,6 +88,10 @@ def create_dataloader(self, dataset, batch_size, num_workers=0, shuffle=True):
 
             def load_best_model(self, checkpoint_path):
                 self.trained = True
+            
+            @classmethod
+            def get_config_class(cls):
+                return Mock
 
         mock_config = Mock()
         wrapper = ConcreteWrapper(mock_config)
@@ -137,6 +141,10 @@ def create_dataloader(self, dataset, batch_size, num_workers=0, shuffle=True):
             def load_best_model(self, checkpoint_path):
                 self.trained = True
                 self.pytorch_model = f"model_from_{checkpoint_path}"
+            
+            @classmethod
+            def get_config_class(cls):
+                return Mock
 
         mock_config = Mock()
         wrapper = ConcreteWrapper(mock_config)
@@ -238,6 +246,10 @@ def create_dataloader(self, dataset, batch_size: int, num_workers: int = 0, shuf
 
             def load_best_model(self, checkpoint_path: str) -> None:
                 pass
+            
+            @classmethod
+            def get_config_class(cls):
+                return Mock
 
         # Should be able to instantiate with all methods implemented
         mock_config = Mock()