diff --git a/Dockerfile b/Dockerfile index 89fbc64..8edca5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,45 +1,38 @@ -# Use Python 3.8 base image -FROM python:3.12 +FROM python:3.12-slim@sha256:9e01bf1ae5db7649a236da7be1e94ffbbbdd7a93f867dd0d8d5720d9e1f89fab AS builder -# Install system dependencies (for spaCy and its dependencies) RUN apt-get update && apt-get install -y \ build-essential \ python3-dev \ - libatlas-base-dev \ + libopenblas-dev \ gfortran \ curl \ git \ - wget + && rm -rf /var/lib/apt/lists/* -# Upgrade pip, setuptools, and wheel (to ensure we're using the latest version) -RUN pip install --upgrade pip setuptools wheel +WORKDIR /build -# Install spaCy (make sure the compatible version with Python 3.8 is installed) -RUN pip3 install spacy==3.7.5 # Change this to a specific compatible version, e.g. 2.2.4 +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" -# Download the necessary spaCy language model (en_core_web_sm) -RUN python -m spacy download en_core_web_sm +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt -# Install required Python dependencies -RUN pip3 install tqdm -RUN pip3 install Cython -RUN pip install xaif_eval==0.0.9 -RUN pip3 install markdown2 -RUN pip3 install flask-cors +FROM python:3.12-slim@sha256:9e01bf1ae5db7649a236da7be1e94ffbbbdd7a93f867dd0d8d5720d9e1f89fab AS runtime +WORKDIR /app +RUN apt-get update && apt-get install -y --no-install-recommends \ + libopenblas-dev \ + && rm -rf /var/lib/apt/lists/* -# Copy the application files into the container -COPY . /app +COPY --from=builder /opt/venv /opt/venv -# Set the working directory to /app -WORKDIR /app +ENV PATH="/opt/venv/bin:$PATH" +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 -# Install additional Python dependencies from requirements.txt -RUN pip install -r requirements.txt +COPY . /app -# Expose port 5008 for the Flask app EXPOSE 5005 -# Set the default command to run the application -CMD ["python", "./main.py"] +CMD ["gunicorn", "--bind", "0.0.0.0:5005", "--workers", "4", "main:app"] \ No newline at end of file diff --git a/README copy.md b/README copy.md deleted file mode 100644 index 8f10866..0000000 --- a/README copy.md +++ /dev/null @@ -1,33 +0,0 @@ -# Default Segmenter - -It provides a default implementation of Segmenter, an AMF component that segments arguments into propositions. It utilises simple regular expressions for text segmentation. - -## Endpoints - -### /segmenter-01 - -- **Methods**: GET, POST -- **Description**: - - **GET**: Returns information about the Segmenter component and its usage. - - **POST**: Expects a file upload containing text data to segment. Parses the input and returns the segmented output in xIAF format. - -## Usage - -- Use the `/segmenter-01` endpoint to interact with the Segmenter: - - For GET requests, visit the endpoint URL to get information about Segmenter usage. - - For POST requests, upload a file containing text data to segment and receive the segmented output. - -## Input Format - -The Segmenter accepts input in xIAF formats: - -- **xIAF**: It segements the texts in the L-nodes. - -## Output Format - -The processed output is returned in xIAF format, containing segmented nodes, edges, locutions, and keeps the rest as they are. - -## Notes - -- This app serves as a basic segmenter using regular expressions for text segmentation. -- It can be connected to other components in an argument mining pipeline for further analysis. diff --git a/README.md b/README.md index e2ca1b7..8f10866 100644 --- a/README.md +++ b/README.md @@ -1,150 +1,33 @@ +# Default Segmenter - -# Default BERT Textual Entailment Service Documentation - -## Introduction -This application provides an implementation of BART fine-tuned on NLI dataset for indetifying argument relations. It serves as a default AMF component designed for detecting argument relations between propositions. Entailemtnt relation is mapped to support relation, conflicts, and non-relations between propositions. -- It can be integrated into the argument mining pipeline alongside other AMF components for further analysis and processing. - - -## Brief Overview of the Architecture/Method -Brief overview of the architecture/method used. - -- **Dataset**: [Link to datasets](#) -- **Model ID**: [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli) -- **Repository**: [GitHub repository](https://github.com/arg-tech/bert-te) -- **Paper**: [Link to published paper](https://arxiv.org/abs/1909.00161) +It provides a default implementation of Segmenter, an AMF component that segments arguments into propositions. It utilises simple regular expressions for text segmentation. ## Endpoints -### /bert-te - -#### Description -- **Methods**: `GET`, `POST` - - **GET**: Returns information about the BERT Textual Entailment Service and its usage. - - **POST**: Expects a file upload (`file` parameter) in the xAIF format. The Flask route processes the uploaded file identify argument relation between I-nodes and update the xAIF node to represent the argument relations and returns the updated xAIF as a json file. - -#### Details -- **URL**: `/bert-te` -- **Methods**: `GET`, `POST` -- **Input**: - - **GET**: No parameters. - - **POST**: Expects a file upload (`file` parameter) in the xAIF format. -- **Output**: - - **Response**: The inferred argument structure in xAIF json file format, containing nodes, edges, locutions, and other relevant information. - - **Example Response**: Example JSON response. - -## Input and Output Formats - -### Input Format -- **Text File**: xAIF format input ([xAIF format details](https://wiki.arg.tech/books/amf/page/xaif)). - -### Output Format -The inferred argument structure is returned in the xAIF format, containing nodes, edges, locutions, and other relevant information. In the xAIF: -- Argument units are specified as type "I" nodes. -- Argument relations are represented as "RA" type for support and "CA" type for attack relations. -- The relations between the "I" nodes and argument relation nodes are presented as edges. - -## Installation - -### Requirements for Installation -torch -numpy -transformers -xaif_eval==0.0.9 -amf-fast-inference==0.0.3 -markdown2 - - -### Installation Setup - -#### Using Docker Container - -To set up the BERT Textual Entailment Service using Docker, follow these steps: - -1. **Clone the Repository:** - ```sh - git clone https://github.com/arg-tech/bert-te.git - ``` - -2. **Navigate to the Project Root Directory:** - ```sh - cd bert-te - ``` - -3. **Make Required Changes:** - - Edit the `Dockerfile`, `main.py`, and `docker-compose.yml` files to specify the container name, port number, and other settings as needed. - -4. **Build and Run the Docker Container:** - ```sh - docker-compose up - ``` - -#### From Source Without Docker - -If you prefer to install without Docker: - -1. **Install Dependencies:** - - Ensure Python and necessary libraries are installed. - -2. **Configure and Run:** - - Configure the environment variables and settings in `main.py`. - - Run the application using Python: - ```sh - python main.py - ``` - +### /segmenter-01 +- **Methods**: GET, POST +- **Description**: + - **GET**: Returns information about the Segmenter component and its usage. + - **POST**: Expects a file upload containing text data to segment. Parses the input and returns the segmented output in xIAF format. ## Usage -### Using Programming Interface - -#### Example Python Code Snippet - -```python -import requests -import json - -url = 'http://your-server-url/bert-te' -input_file_path = 'example_xAIF.json' - -with open(input_file_path, 'r', encoding='utf-8') as file: - files = {'file': (input_file_path, file, 'application/json')} - -response = requests.post(url, files=files) - -if response.status_code == 200: - output_file_path = 'output_xAIF.json' - with open(output_file_path, 'w', encoding='utf-8') as output_file: - json.dump(response.json(), output_file, ensure_ascii=False, indent=4) - print(f'Response saved to {output_file_path}') -else: - print(f'Failed to make a POST request. Status code: {response.status_code}') - print(response.text) - -``` - -### Using cURL - -- **Example Request**: - -```bash -curl -X POST \ - -F "file=@example_xAIF.json" \ - http://your-server-url/bert-te -``` - - +- Use the `/segmenter-01` endpoint to interact with the Segmenter: + - For GET requests, visit the endpoint URL to get information about Segmenter usage. + - For POST requests, upload a file containing text data to segment and receive the segmented output. -### Using Web Interface +## Input Format -The service can also be used to create a pipeline on our n8n interface. Simply create an HTTP node, configure the node including the URL of the service and the parameter name of the file (`file`). +The Segmenter accepts input in xIAF formats: +- **xIAF**: It segements the texts in the L-nodes. +## Output Format +The processed output is returned in xIAF format, containing segmented nodes, edges, locutions, and keeps the rest as they are. -
-