Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text
backend/models/IBM.h5 filter=lfs diff=lfs merge=lfs -text
22 changes: 22 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Only modify variables that have a comment above them
# Contact IWG if you wish to alter the template otherwise

name: Deploy
on:
push:
branches: main
pull_request:
branches: ['*']

jobs:
deploy:
name: Environments
uses: arg-tech/deployment-templates/.github/workflows/default-deploy-template.yml@main
secrets: inherit
with:
# Specify the target production server
target_production_server_nickname: argand
# Define a URL for your app, without the http:// or www prefixes
full_app_url: targer.amfws.arg.tech
# The port that is exposed on localhost (must be the same as in docker-compose.yml)
app_port: 10600
677 changes: 677 additions & 0 deletions backend/BiLSTM.py

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Use an official Python runtime as a parent image
FROM python:3.5

# Add all Data
ADD . /

# Set the working directory to /
WORKDIR /

# Install any needed packages specified in requirements.txt
RUN pip install -r requirements.txt
RUN pip install torch==0.4.1 -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install torchvision==0.2.1 -f https://download.pytorch.org/whl/torch_stable.html

RUN git clone https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf.git

RUN mv backend.py emnlp2017-bilstm-cnn-crf/ && mv Model.py emnlp2017-bilstm-cnn-crf/ && mv ModelNewES.py emnlp2017-bilstm-cnn-crf/ && mv ModelNewWD.py emnlp2017-bilstm-cnn-crf/ && mv Segmenter.py emnlp2017-bilstm-cnn-crf/

RUN mv models/* emnlp2017-bilstm-cnn-crf/models/

RUN mv -f BiLSTM.py emnlp2017-bilstm-cnn-crf/neuralnets/

RUN mkdir emnlp2017-bilstm-cnn-crf/lstm

RUN git clone https://github.com/achernodub/bilstm-cnn-crf-tagger.git emnlp2017-bilstm-cnn-crf/lstm
RUN pip install prometheus-flask-exporter==0.1.2

# Make port 6000 available to the world outside this container
EXPOSE 6000

WORKDIR /emnlp2017-bilstm-cnn-crf

# Run app.py when the container launches
CMD ["python3", "backend.py"]









74 changes: 74 additions & 0 deletions backend/Model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from __future__ import print_function
import nltk
from util.preprocessing import addCharInformation, createMatrices, addCasingInformation
from neuralnets.BiLSTM import BiLSTM
import sys
import json

class Model:
def __init__(self, path):
print(path)
self.lstmModel = BiLSTM.loadModel("models/" + path)
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')

def label(self, input):
#prepare input
sentences = [{'tokens': nltk.word_tokenize(sent)} for sent in nltk.sent_tokenize(input)]
addCharInformation(sentences)
addCasingInformation(sentences)
dataMatrix = createMatrices(sentences, self.lstmModel.mappings, True)

#tag input
tags = self.lstmModel.tagSentences(dataMatrix)

#prepare output
result = []
for sentenceIdx in range(len(sentences)):
tokens = sentences[sentenceIdx]['tokens']
sentence = []
for tokenIdx in range(len(tokens)):
tokenTags = []
currentWord = {}
for modelName in sorted(tags.keys()):
tokenTags.append(tags[modelName][sentenceIdx][tokenIdx])

currentWord['token'] = tokens[tokenIdx]
currentWord['label'] = tokenTags[0]
sentence.append(currentWord)
result.append(sentence)

return json.dumps(result)

def label_with_probs(self, input):
#prepare input
sentences = [{'tokens': nltk.word_tokenize(sent)} for sent in nltk.sent_tokenize(input)]
addCharInformation(sentences)
addCasingInformation(sentences)
dataMatrix = createMatrices(sentences, self.lstmModel.mappings, True)

#tag input
tags, probs = self.lstmModel.tagSentences_with_probs(dataMatrix)

#prepare output
result = []
for sentenceIdx in range(len(sentences)):
tokens = sentences[sentenceIdx]['tokens']
sentence = []
for tokenIdx in range(len(tokens)):
tokenTags = []
probTags = []
currentWord = {}
for modelName in sorted(tags.keys()):
tokenTags.append(tags[modelName][sentenceIdx][tokenIdx])
probTags.append(probs[modelName][sentenceIdx][tokenIdx])

currentWord['token'] = tokens[tokenIdx]
currentWord['label'] = tokenTags[0]
currentWord['prob'] = probTags[0]
sentence.append(currentWord)
result.append(sentence)

return result
42 changes: 42 additions & 0 deletions backend/ModelNewES.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import sys
sys.path.insert(0, "./lstm")

from backend.lstm.src.factories.factory_tagger import TaggerFactory

import nltk
import json

path = "models/model_new_es.hdf5"

def replace_labels(input):
return input.replace('B-Premise','P-B').replace('I-Premise','P-I').replace('I-Claim','C-I').replace('B-Claim','C-B').replace("B-MajorClaim", "C-B").replace("I-MajorClaim", "C-I")

class ModelNewES:

tagger =TaggerFactory.load(path, -1)

def __init__(self):
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')

def label(self, input):
sentences = [ nltk.word_tokenize(input) ]

output = self.tagger.predict_tags_from_words(sentences, batch_size=200)

output = [[replace_labels(t) for t in s] for s in output]

result = []
for sentenceIdx in range(len(sentences)):
tokens = sentences[sentenceIdx]
sentence = []
for tokenIdx in range(len(tokens)):
currentWord = {}
currentWord['token'] = tokens[tokenIdx]
currentWord['label'] = output[sentenceIdx][tokenIdx]
sentence.append(currentWord)
result.append(sentence)

return result
38 changes: 38 additions & 0 deletions backend/ModelNewWD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import sys
sys.path.insert(0, "./lstm")

from lstm.src.factories.factory_tagger import TaggerFactory

import nltk
import json

path = "models/model_new_wd.hdf5"


class ModelNewWD:

tagger =TaggerFactory.load(path, -1)

def __init__(self):
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')

def label(self, input):
sentences = [ nltk.word_tokenize(input) ]

output = self.tagger.predict_tags_from_words(sentences, batch_size=200)

result = []
for sentenceIdx in range(len(sentences)):
tokens = sentences[sentenceIdx]
sentence = []
for tokenIdx in range(len(tokens)):
currentWord = {}
currentWord['token'] = tokens[tokenIdx]
currentWord['label'] = output[sentenceIdx][tokenIdx]
sentence.append(currentWord)
result.append(sentence)

return result
Empty file added backend/README.md
Empty file.
Loading