arg-tech · zvonimir-delas · Feb 24, 2026 · Feb 21, 2026 · Feb 24, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -1,45 +1,38 @@
-# Use Python 3.8 base image
-FROM python:3.12
+FROM python:3.12-slim@sha256:9e01bf1ae5db7649a236da7be1e94ffbbbdd7a93f867dd0d8d5720d9e1f89fab AS builder
 
-# Install system dependencies (for spaCy and its dependencies)
 RUN apt-get update && apt-get install -y \
     build-essential \
     python3-dev \
-    libatlas-base-dev \
+    libopenblas-dev \
     gfortran \
     curl \
     git \
-    wget
+    && rm -rf /var/lib/apt/lists/*
 
-# Upgrade pip, setuptools, and wheel (to ensure we're using the latest version)
-RUN pip install --upgrade pip setuptools wheel
+WORKDIR /build
 
-# Install spaCy (make sure the compatible version with Python 3.8 is installed)
-RUN pip3 install spacy==3.7.5  # Change this to a specific compatible version, e.g. 2.2.4
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
 
-# Download the necessary spaCy language model (en_core_web_sm)
-RUN python -m spacy download en_core_web_sm
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
-# Install required Python dependencies
-RUN pip3 install tqdm
-RUN pip3 install Cython
-RUN pip install xaif_eval==0.0.9
-RUN pip3 install markdown2
-RUN pip3 install flask-cors
+FROM python:3.12-slim@sha256:9e01bf1ae5db7649a236da7be1e94ffbbbdd7a93f867dd0d8d5720d9e1f89fab AS runtime
 
+WORKDIR /app
 
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libopenblas-dev \
+    && rm -rf /var/lib/apt/lists/*
 
-# Copy the application files into the container
-COPY . /app
+COPY --from=builder /opt/venv /opt/venv
 
-# Set the working directory to /app
-WORKDIR /app
+ENV PATH="/opt/venv/bin:$PATH"
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
 
-# Install additional Python dependencies from requirements.txt
-RUN pip install -r requirements.txt
+COPY . /app
 
-# Expose port 5008 for the Flask app
 EXPOSE 5005
 
-# Set the default command to run the application
-CMD ["python", "./main.py"]
+CMD ["gunicorn", "--bind", "0.0.0.0:5005", "--workers", "4", "main:app"]
diff --git a/README copy.md b/README copy.md
diff --git a/README.md b/README.md
@@ -1,150 +1,33 @@
+# Default Segmenter
 
-
-# Default BERT Textual Entailment Service Documentation
-
-## Introduction
-This application provides an implementation of BART fine-tuned on NLI dataset for indetifying argument relations. It serves as a default AMF component designed for detecting argument relations between propositions. Entailemtnt relation is mapped to support relation,  conflicts, and non-relations between propositions.
-- It can be integrated into the argument mining pipeline alongside other AMF components for further analysis and processing.
-
-
-## Brief Overview of the Architecture/Method
-Brief overview of the architecture/method used.
-
-- **Dataset**: [Link to datasets](#)
-- **Model ID**: [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli)
-- **Repository**: [GitHub repository](https://github.com/arg-tech/bert-te)
-- **Paper**: [Link to published paper](https://arxiv.org/abs/1909.00161)
+It provides a default implementation of Segmenter, an AMF component that segments arguments into propositions. It utilises simple regular expressions for text segmentation.
 
 ## Endpoints
 
-### /bert-te
-
-#### Description
-- **Methods**: `GET`, `POST`
-  - **GET**: Returns information about the BERT Textual Entailment Service and its usage.
-  - **POST**: Expects a file upload (`file` parameter) in the xAIF format. The Flask route processes the uploaded file identify argument relation between I-nodes and update the xAIF node to represent the argument relations and returns the updated xAIF as a json file. 
-
-#### Details
-- **URL**: `/bert-te`
-- **Methods**: `GET`, `POST`
-- **Input**:
-  - **GET**: No parameters.
-  - **POST**: Expects a file upload (`file` parameter) in the xAIF format.
-- **Output**:
-  - **Response**: The inferred argument structure in xAIF json file format, containing nodes, edges, locutions, and other relevant information.
-  - **Example Response**: Example JSON response.
-
-## Input and Output Formats
-
-### Input Format
-- **Text File**: xAIF format input ([xAIF format details](https://wiki.arg.tech/books/amf/page/xaif)).
-
-### Output Format
-The inferred argument structure is returned in the xAIF format, containing nodes, edges, locutions, and other relevant information. In the xAIF:
-- Argument units are specified as type "I" nodes.
-- Argument relations are represented as "RA" type for support and "CA" type for attack relations.
-- The relations between the "I" nodes and argument relation nodes are presented as edges.
-
-## Installation
-
-### Requirements for Installation
-torch
-numpy
-transformers
-xaif_eval==0.0.9
-amf-fast-inference==0.0.3
-markdown2
-
-
-### Installation Setup
-
-#### Using Docker Container
-
-To set up the BERT Textual Entailment Service using Docker, follow these steps:
-
-1. **Clone the Repository:**
-   ```sh
-   git clone https://github.com/arg-tech/bert-te.git
-   ```
-
-2. **Navigate to the Project Root Directory:**
-   ```sh
-   cd bert-te
-   ```
-
-3. **Make Required Changes:**
-   - Edit the `Dockerfile`, `main.py`, and `docker-compose.yml` files to specify the container name, port number, and other settings as needed.
-
-4. **Build and Run the Docker Container:**
-   ```sh
-   docker-compose up
-   ```
-
-#### From Source Without Docker
-
-If you prefer to install without Docker:
-
-1. **Install Dependencies:**
-   - Ensure Python and necessary libraries are installed.
-
-2. **Configure and Run:**
-   - Configure the environment variables and settings in `main.py`.
-   - Run the application using Python:
-     ```sh
-     python main.py
-     ```
-
+### /segmenter-01
 
+- **Methods**: GET, POST
+- **Description**: 
+  - **GET**: Returns information about the Segmenter component and its usage.
+  - **POST**: Expects a file upload containing text data to segment. Parses the input and returns the segmented output in xIAF format.
 
 ## Usage
 
-### Using Programming Interface
-
-#### Example Python Code Snippet
-
-```python
-import requests
-import json
-
-url = 'http://your-server-url/bert-te'
-input_file_path = 'example_xAIF.json'
-
-with open(input_file_path, 'r', encoding='utf-8') as file:
-    files = {'file': (input_file_path, file, 'application/json')}
-
-response = requests.post(url, files=files)
-
-if response.status_code == 200:
-    output_file_path = 'output_xAIF.json'
-    with open(output_file_path, 'w', encoding='utf-8') as output_file:
-        json.dump(response.json(), output_file, ensure_ascii=False, indent=4)
-    print(f'Response saved to {output_file_path}')
-else:
-    print(f'Failed to make a POST request. Status code: {response.status_code}')
-    print(response.text)
-
-```
-
-### Using cURL
-
-- **Example Request**:
-
-```bash
-curl -X POST \
-  -F "file=@example_xAIF.json" \
-  http://your-server-url/bert-te
-```
-
-
+- Use the `/segmenter-01` endpoint to interact with the Segmenter:
+   - For GET requests, visit the endpoint URL to get information about Segmenter usage.
+   - For POST requests, upload a file containing text data to segment and receive the segmented output.
 
-### Using Web Interface
+## Input Format
 
-The service can also be used to create a pipeline on our n8n interface. Simply create an HTTP node, configure the node including the URL of the service and the parameter name of the file (`file`).
+The Segmenter accepts input in  xIAF formats:
 
+- **xIAF**: It segements the texts in the L-nodes.
 
+## Output Format
 
+The processed output is returned in xIAF format, containing segmented nodes, edges, locutions, and keeps the rest as they are.
 
-<div style="text-align:center;">
-    <img src="img/n8n_screnshot.png" alt="Image Description" width="100%">
-</div>
+## Notes
 
+- This app serves as a basic segmenter using regular expressions for text segmentation.
+- It can be connected to other components in an argument mining pipeline for further analysis.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,6 +1,3 @@
-version: '3'
-###
-
 services:
   default_segmenter:
     container_name: default-segmenter

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,55 @@
-flask
-flask_uploads
-prometheus_flask_exporter
-
-
+annotated-doc==0.0.4
+annotated-types==0.7.0
+blinker==1.9.0
+blis==0.7.11
+catalogue==2.0.10
+certifi==2026.1.4
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpathlib==0.23.0
+confection==0.1.5
+cymem==2.0.13
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
+Flask==3.1.3
+gunicorn==25.1.0
+flask-cors==6.0.2
+Flask-Uploads==0.2.1
+idna==3.11
+itsdangerous==2.2.0
+Jinja2==3.1.6
+langcodes==3.5.1
+markdown-it-py==4.0.0
+markdown2==2.5.4
+MarkupSafe==3.0.3
+mdurl==0.1.2
+murmurhash==1.0.15
+numpy==1.26.4
+packaging==26.0
+preshed==3.0.12
+prometheus_client==0.24.1
+prometheus_flask_exporter==0.23.2
+pydantic==2.12.5
+pydantic_core==2.41.5
+Pygments==2.19.2
+requests==2.32.5
+rich==14.3.3
+setuptools==82.0.0
+shellingham==1.5.4
+smart_open==7.5.0
+spacy==3.7.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.2
+thinc==8.2.5
+tqdm==4.67.3
+typer==0.24.0
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+wasabi==1.1.3
+weasel==0.4.3
+Werkzeug==3.1.6
+wheel==0.46.3
+wrapt==2.1.1
+xaif_eval==0.0.9
diff --git a/tests/api-requests/.gitignore b/tests/api-requests/.gitignore
@@ -0,0 +1,9 @@
+# Secrets
+.env*
+
+# Dependencies
+node_modules
+
+# OS files
+.DS_Store
+Thumbs.db
diff --git a/tests/api-requests/Basic get info.yml b/tests/api-requests/Basic get info.yml
@@ -0,0 +1,31 @@
+info:
+  name: Basic get info
+  type: http
+  seq: 1
+
+http:
+  method: GET
+  url: "{{baseUrl}}/segmenter-01"
+  auth: inherit
+
+runtime:
+  scripts:
+    - type: tests
+      code: |-
+        const expectedPartialData = "Default Segmenter";
+        const maxTimeToReturnInMilliseconds = 300;
+
+          test("Response should match partial expected value", function () {
+            const receivedData = res.getBody();
+            expect(receivedData).to.contain(expectedPartialData)
+          });
+
+          test(`Response time should be faster than ${maxTimeToReturnInMilliseconds} milliseconds`, function () {
+            expect(res.responseTime).to.be.below(maxTimeToReturnInMilliseconds);
+          });
+
+settings:
+  encodeUrl: true
+  timeout: 0
+  followRedirects: true
+  maxRedirects: 5