arg-tech · debelatesfaye · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text
+backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -0,0 +1,22 @@
+# Only modify variables that have a comment above them
+# Contact IWG if you wish to alter the template otherwise
+
+name: Deploy
+on:
+  push:
+    branches: main
+  pull_request:
+      branches: ['*']
+
+jobs:
+  deploy:
+    name: Environments
+    uses: arg-tech/deployment-templates/.github/workflows/default-deploy-template.yml@main
+    secrets: inherit
+    with: 
+      # Specify the target production server
+      target_production_server_nickname: argand
+      # Define a URL for your app, without the http:// or www prefixes
+      full_app_url: targer.amfws.arg.tech
+      # The port that is exposed on localhost (must be the same as in docker-compose.yml)
+      app_port: 10600
diff --git a/backend/BiLSTM.py b/backend/BiLSTM.py
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -0,0 +1,43 @@
+# Use an official Python runtime as a parent image
+FROM python:3.5
+
+# Add all Data
+ADD . /
+
+# Set the working directory to /
+WORKDIR /
+
+# Install any needed packages specified in requirements.txt
+RUN pip install -r requirements.txt
+RUN pip install torch==0.4.1 -f https://download.pytorch.org/whl/torch_stable.html
+RUN pip install torchvision==0.2.1 -f https://download.pytorch.org/whl/torch_stable.html
+
+RUN git clone https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf.git
+
+RUN mv backend.py emnlp2017-bilstm-cnn-crf/ && mv Model.py emnlp2017-bilstm-cnn-crf/ && mv ModelNewES.py emnlp2017-bilstm-cnn-crf/ && mv ModelNewWD.py emnlp2017-bilstm-cnn-crf/ && mv Segmenter.py emnlp2017-bilstm-cnn-crf/
+
+RUN mv models/* emnlp2017-bilstm-cnn-crf/models/
+
+RUN mv -f BiLSTM.py emnlp2017-bilstm-cnn-crf/neuralnets/
+
+RUN mkdir emnlp2017-bilstm-cnn-crf/lstm
+
+RUN git clone https://github.com/achernodub/bilstm-cnn-crf-tagger.git emnlp2017-bilstm-cnn-crf/lstm
+RUN pip install prometheus-flask-exporter==0.1.2
+
+# Make port 6000 available to the world outside this container
+EXPOSE 6000
+
+WORKDIR /emnlp2017-bilstm-cnn-crf
+
+# Run app.py when the container launches
+CMD ["python3", "backend.py"]
+
+
+
+
+
+
+
+
+
diff --git a/backend/Model.py b/backend/Model.py
@@ -0,0 +1,74 @@
+from __future__ import print_function
+import nltk
+from util.preprocessing import addCharInformation, createMatrices, addCasingInformation
+from neuralnets.BiLSTM import BiLSTM
+import sys
+import json
+
+class Model:
+    def __init__(self, path):
+        print(path)
+        self.lstmModel = BiLSTM.loadModel("models/" + path)
+        try:
+            nltk.data.find('tokenizers/punkt')
+        except LookupError:
+            nltk.download('punkt')
+
+    def label(self, input):
+        #prepare input
+        sentences = [{'tokens': nltk.word_tokenize(sent)} for sent in nltk.sent_tokenize(input)]
+        addCharInformation(sentences)
+        addCasingInformation(sentences)
+        dataMatrix = createMatrices(sentences, self.lstmModel.mappings, True)
+
+        #tag input
+        tags = self.lstmModel.tagSentences(dataMatrix)
+
+        #prepare output
+        result = []
+        for sentenceIdx in range(len(sentences)):
+            tokens = sentences[sentenceIdx]['tokens']
+            sentence = []
+            for tokenIdx in range(len(tokens)):
+                tokenTags = []
+                currentWord = {}
+                for modelName in sorted(tags.keys()):
+                    tokenTags.append(tags[modelName][sentenceIdx][tokenIdx])
+
+                currentWord['token'] = tokens[tokenIdx]
+                currentWord['label'] = tokenTags[0]
+                sentence.append(currentWord)
+            result.append(sentence)
+
+        return json.dumps(result)
+
+    def label_with_probs(self, input):
+        #prepare input
+        sentences = [{'tokens': nltk.word_tokenize(sent)} for sent in nltk.sent_tokenize(input)]
+        addCharInformation(sentences)
+        addCasingInformation(sentences)
+        dataMatrix = createMatrices(sentences, self.lstmModel.mappings, True)
+
+        #tag input
+        tags, probs = self.lstmModel.tagSentences_with_probs(dataMatrix)
+
+        #prepare output
+        result = []
+        for sentenceIdx in range(len(sentences)):
+            tokens = sentences[sentenceIdx]['tokens']
+            sentence = []
+            for tokenIdx in range(len(tokens)):
+                tokenTags = []
+                probTags = []
+                currentWord = {}
+                for modelName in sorted(tags.keys()):
+                    tokenTags.append(tags[modelName][sentenceIdx][tokenIdx])
+                    probTags.append(probs[modelName][sentenceIdx][tokenIdx])
+
+                currentWord['token'] = tokens[tokenIdx]
+                currentWord['label'] = tokenTags[0]
+                currentWord['prob'] = probTags[0]
+                sentence.append(currentWord)
+            result.append(sentence)
+
+        return result
diff --git a/backend/ModelNewES.py b/backend/ModelNewES.py
@@ -0,0 +1,42 @@
+import sys
+sys.path.insert(0, "./lstm")
+
+from backend.lstm.src.factories.factory_tagger import TaggerFactory
+
+import nltk
+import json
+
+path = "models/model_new_es.hdf5"
+
+def replace_labels(input):
+    return input.replace('B-Premise','P-B').replace('I-Premise','P-I').replace('I-Claim','C-I').replace('B-Claim','C-B').replace("B-MajorClaim", "C-B").replace("I-MajorClaim", "C-I")
+
+class ModelNewES:
+
+    tagger =TaggerFactory.load(path, -1)
+
+    def __init__(self):
+        try:
+            nltk.data.find('tokenizers/punkt')
+        except LookupError:
+            nltk.download('punkt')
+
+    def label(self, input):
+        sentences = [ nltk.word_tokenize(input) ]
+
+        output = self.tagger.predict_tags_from_words(sentences, batch_size=200)
+
+        output = [[replace_labels(t) for t in s] for s in output]
+
+        result = []
+        for sentenceIdx in range(len(sentences)):
+            tokens = sentences[sentenceIdx]
+            sentence = []
+            for tokenIdx in range(len(tokens)):
+                currentWord = {}
+                currentWord['token'] = tokens[tokenIdx]
+                currentWord['label'] = output[sentenceIdx][tokenIdx]
+                sentence.append(currentWord)
+            result.append(sentence)
+
+        return result
diff --git a/backend/ModelNewWD.py b/backend/ModelNewWD.py
@@ -0,0 +1,38 @@
+import sys
+sys.path.insert(0, "./lstm")
+
+from lstm.src.factories.factory_tagger import TaggerFactory
+
+import nltk
+import json
+
+path = "models/model_new_wd.hdf5"
+
+
+class ModelNewWD:
+
+    tagger =TaggerFactory.load(path, -1)
+
+    def __init__(self):
+        try:
+            nltk.data.find('tokenizers/punkt')
+        except LookupError:
+            nltk.download('punkt')
+
+    def label(self, input):
+        sentences = [ nltk.word_tokenize(input) ]
+
+        output = self.tagger.predict_tags_from_words(sentences, batch_size=200)
+
+        result = []
+        for sentenceIdx in range(len(sentences)):
+            tokens = sentences[sentenceIdx]
+            sentence = []
+            for tokenIdx in range(len(tokens)):
+                currentWord = {}
+                currentWord['token'] = tokens[tokenIdx]
+                currentWord['label'] = output[sentenceIdx][tokenIdx]
+                sentence.append(currentWord)
+            result.append(sentence)
+
+        return result
diff --git a/backend/README.md b/backend/README.md
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text
		backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text