From d86fccbc50b217dac7974cc24db5ca6ace193b0c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Sep 2021 18:58:32 -0700
Subject: [PATCH 001/134] init code

---
 main/src/main/python/__init__.py              |   0
 main/src/main/python/pytorch/__init__.py      |   0
 main/src/main/python/pytorch/metal.py         |   0
 main/src/main/python/pytorch/taskManager.py   | 213 ++++++++++++++++++
 main/src/main/python/run.py                   |  25 ++
 main/src/main/python/sequences/__init__.py    |   0
 .../src/main/python/sequences/columnReader.py |  48 ++++
 7 files changed, 286 insertions(+)
 create mode 100644 main/src/main/python/__init__.py
 create mode 100644 main/src/main/python/pytorch/__init__.py
 create mode 100644 main/src/main/python/pytorch/metal.py
 create mode 100644 main/src/main/python/pytorch/taskManager.py
 create mode 100644 main/src/main/python/run.py
 create mode 100644 main/src/main/python/sequences/__init__.py
 create mode 100644 main/src/main/python/sequences/columnReader.py

diff --git a/main/src/main/python/__init__.py b/main/src/main/python/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/pytorch/__init__.py b/main/src/main/python/pytorch/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
new file mode 100644
index 000000000..f9fede902
--- /dev/null
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -0,0 +1,213 @@
+import random
+import math
+from sequences.columnReader import ColumnReader
+
+TYPE_BASIC = 0
+TYPE_DUAL = 1
+
+class TaskManager:
+
+  def __init__(self, config, seed):
+
+    self.config = config
+    self.random = seed
+
+    # How many shards to have per epoch
+    self.shardsPerEpoch = config.get_int("mtl.shardsPerEpoch", 10)
+
+    # Total number of epochs 
+    self.maxEpochs:Int = config.get_int("mtl.maxEpochs", 100)
+
+    # Training patience in number of epochs 
+    self.epochPatience:Int = config.get_int("mtl.epochPatience", 5)
+
+    # Array of all tasks to be managed 
+    self.tasks = self.readTasks()
+
+    self.taskCount = len(self.tasks)
+    self.indices = range(self.taskCount)
+
+    # Training shards from all tasks 
+    self.shards = self.mkShards()
+
+  # Construct training shards by interleaving shards from all tasks 
+  def mkShards(self):
+    shardsByTasks = list()
+
+    # construct the shards for each task
+    for i in self.indices:
+      shardsByTasks += [self.tasks[i].mkShards()]
+      assert(len(shardsByTasks[i]) == self.shardsPerEpoch)
+
+    # now interleave the tasks
+    interleavedShards = list()
+    for i in range(self.shardsPerEpoch):
+      for j in self.indices:
+        crtShard = shardsByTasks[j][i]
+        interleavedShards += [crtShard]
+
+    
+    # print ("All shards:")
+    # for(i <- interleavedShards.indices)
+    #   print (s"${interleavedShards(i)}")
+
+
+    return interleavedShards
+
+  # Iterator over all sentences coming from all interleaved shards 
+  def getSentences(self):
+    return SentenceIterator(self.tasks, self.shards, self.random)
+
+  # Reads all tasks from disk in memory 
+  def readTasks(self):
+    numberOfTasks = self.config.get_int("mtl.numberOfTasks", None)
+    tasks = list()
+    for i in range(numberOfTasks):
+      tasks += [self.readTask(i + 1)]
+
+    print (f"Read {numberOfTasks} tasks from config file.")
+    return tasks
+
+  def readTask(self, taskNumber):
+    taskName = self.config.get_string(f"mtl.task{taskNumber}.name", None)
+    train = self.config.get_string(f"mtl.task{taskNumber}.train", None)
+
+    dev = self.config.get_string(f"mtl.task{taskNumber}.dev", None) if f"mtl.task{taskNumber}.dev" in self.config else None
+    test = self.config.get_string(f"mtl.task{taskNumber}.test", None) if f"mtl.task{taskNumber}.test" in self.config else None
+
+    taskType = self.parseType(self.config.get_string(f"mtl.task{taskNumber}.type", "basic"))
+
+    weight = self.config.get_float(f"mtl.task{taskNumber}.weight", 1.0)
+
+    return Task(taskNumber - 1, taskName, taskType, self.shardsPerEpoch, weight, train, dev, test)
+
+  def parseType(self, inf):
+    if inf == "basic": return TYPE_BASIC
+    elif inf == "dual": return TYPE_DUAL
+    else: raise ValueError(f"ERROR: unknown task type {inf}!")
+
+  def debugTraversal(self):
+    for epoch in range(self.maxEpochs):
+      print (f"Started epoch {epoch}")
+      sentCount = 0
+      taskId = 0
+      totalSents = 0
+      for sentence in getSentences():
+        totalSents += 1
+        if(sentence[0] != taskId):
+          print (f"Read {sentCount} sentences from task {taskId}")
+          taskId = sentence[0]
+          sentCount = 1
+        else:
+          sentCount += 1
+      print (f"Read {sentCount} sentences from task {taskId}")
+      print (f"Read {totalSents} sentences in epoch {epoch}.")
+
+class SentenceIterator(object):
+  def __init__(tasks, shards, random):
+
+    self.tasks = tasks
+    self.shards = shards
+    self.random = random #random seed
+
+    # Offset in randomizedSentencePositions array 
+    self.sentenceOffset = 0
+    self.randomizedSentencePositions = randomizeSentences()
+
+  class Sentence:
+    def __init__(self, taskId, sentencePosition):
+      self.taskId = taskId
+      self.sentencePosition = sentencePosition
+
+  # Randomizes all sentences across all tasks 
+  def randomizeSentences():
+    # first, randomize the shards
+    random.seed(self.random)
+    randomizedShards = random.shuffle(self.shards)
+    randomizedSents = list()
+    for shard in randomizedShards:
+      # second, randomize the sentences inside each shard
+      sents = random.shuffle(list(range(shard.startPosition, shard.endPosition)))
+      for sent in sents:
+        # store the randomized sentences
+        randomizedSents += [Sentence(shard.taskId, sent)]
+    return randomizedSents
+
+  def __len__(self):
+    return len(self.randomizedSentencePositions)
+
+  def __iter__(self):
+    return self
+
+  def hasNext(self): return self.sentenceOffset < len(self.randomizedSentencePositions)
+
+  def __next__(self):
+    assert(self.sentenceOffset >= 0 and self.sentenceOffset < len(self.randomizedSentencePositions))
+
+    s = self.randomizedSentencePositions[sentenceOffset]
+    tid = s.taskId
+    sentence = self.tasks[tid].trainSentences[s.sentencePosition]
+    self.sentenceOffset += 1
+
+    #print ("shardPosition = $shardPosition, sentencePosition = $sentencePosition")
+
+    return (tid, sentence)
+
+class Shard:
+  def __init__(self, taskId, startPosition, endPosition):
+    self.taskId = taskId
+    self.startPosition = startPosition 
+    self.endPosition = endPosition
+
+class Task:
+  def __init__(self,
+  taskId, # this starts at 0 so we can use it as an index in the array of tasks
+  taskName:str,
+  taskType:int,
+  shardsPerEpoch:int,
+  taskWeight:float,
+  trainFileName:str,
+  devFileName:str = None,
+  testFileName:str = None):
+    self.taskId = taskId
+    taskNumber = taskId + 1
+    print (f"Reading task {taskNumber} ({taskName})...")
+    self.trainSentences = ColumnReader.readColumns(trainFileName)
+    self.devSentences = ColumnReader.readColumns(devFileName) if devFileName else None
+    self.testSentences = ColumnReader.readColumns(testFileName) if testFileName else None
+
+    self.isBasic:Boolean = taskType == TYPE_BASIC
+    self.isDual:Boolean = taskType == TYPE_DUAL
+
+    if taskType == TYPE_BASIC: 
+      self.prettyType = "basic"
+    elif taskType == TYPE_DUAL: 
+      self.prettyType = "dual"
+    else: 
+      self.prettyType = "unknown"
+
+    # The size of the training shard for this task
+    self.shardSize = math.ceil(len(self.trainSentences) / shardsPerEpoch)
+
+    # Current position in the training sentences when we iterate during training
+    currentTrainingSentencePosition = 0
+
+    print (f"============ starting task {taskNumber} ============")
+    print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.")
+    if(self.devSentences is not None):
+      print (f"Read {len(self.devSentences)} development sentences for task {taskNumber}.")
+    if(self.testSentences is not None):
+      print (f"Read {len(self.testSentences)} testing sentences for task {taskNumber}.")
+    print (f"Using taskWeight = {taskWeight}")
+    print (f"Task type = {self.prettyType}.")
+    print (f"============ completed task {taskNumber} ============")
+
+  # Construct the shards from all training sentences in this task 
+  def mkShards(self):
+    shards = list()
+    crtPos = 0
+    while(crtPos < len(self.trainSentences)):
+      endPos = min(crtPos + self.shardSize, len(self.trainSentences))
+      shards += [Shard(self.taskId, crtPos, endPos)]
+      crtPos = endPos
+    return shards
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
new file mode 100644
index 000000000..5d6ea1586
--- /dev/null
+++ b/main/src/main/python/run.py
@@ -0,0 +1,25 @@
+from pyhocon import ConfigFactory
+import argparse
+from pytorch.taskManager import TaskManager
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--train', action='store_true', help='Set the code to training purpose.')
+    parser.add_argument('--test', action='store_true', help='Set the code to testing purpose.')
+    parser.add_argument('--shell', action='store_true', help='Set the code to shell mode.')
+    parser.add_argument('--config', type=str, help='Filename of the configuration.')
+    parser.add_argument('--seed', type=int, default=1234)
+    args = parser.parse_args()
+
+    if args.train:
+        config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
+        taskManager = TaskManager(config, args.seed)
+        # modelName = args.model_file
+        # mtl = Metal(taskManager, parameters, None)
+        # mtl.train(modelName)
+    elif args.test:
+        pass
+    elif args.shell:
+        pass
\ No newline at end of file
diff --git a/main/src/main/python/sequences/__init__.py b/main/src/main/python/sequences/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py
new file mode 100644
index 000000000..c38aa66c7
--- /dev/null
+++ b/main/src/main/python/sequences/columnReader.py
@@ -0,0 +1,48 @@
+#-----------------------------------------------------------
+#  Reads the CoNLL-like column format
+#-----------------------------------------------------------
+class ColumnReader:
+
+  def readColumns(source):
+    if type(source) is str:
+      source = open(source)
+    sentence = list()
+    sentences = list()
+    for line in source:
+      print (line)
+      l = line.strip()
+      if (l is ""):
+        # end of sentence
+        if (sentence):
+          sentences += [sentence]
+          sentence = list()
+      else:
+        # within the same sentence
+        bits = l.split("\\s")
+        if (len(bits) < 2):
+          raise RuntimeError(f"ERROR: invalid line {l}!")
+        sentence += Row(bits)
+
+    if (sentence):
+      sentences += [sentence]
+
+    source.close()
+    return sentences
+
+# -----------------------------------------------------------
+# Stores training data for sequence modeling
+# Mandatory columns: 0 - word, 1 - label
+# Optional columns: 2 - POS tag, 3+ SRL arguments
+# @param tokens
+# -----------------------------------------------------------
+
+class Row:
+
+  def __init__(self, tokens): 
+    self.tokens = tokens
+    self.length = len(tokens)
+
+  def get(self, idx):
+    if(idx >= self.length):
+      raise RuntimeError(f"ERROR: trying to read field #{idx}, which does not exist in this row: {tokens}!")
+    return tokens[idx]

From 07c4142ebc8a804a09ceb207e4d1070eaee620c2 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Sep 2021 19:12:17 -0700
Subject: [PATCH 002/134] Update columnReader.py

---
 main/src/main/python/sequences/columnReader.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py
index c38aa66c7..0f8c04610 100644
--- a/main/src/main/python/sequences/columnReader.py
+++ b/main/src/main/python/sequences/columnReader.py
@@ -9,7 +9,6 @@ def readColumns(source):
     sentence = list()
     sentences = list()
     for line in source:
-      print (line)
       l = line.strip()
       if (l is ""):
         # end of sentence
@@ -18,10 +17,10 @@ def readColumns(source):
           sentence = list()
       else:
         # within the same sentence
-        bits = l.split("\\s")
+        bits = l.split("\t")
         if (len(bits) < 2):
           raise RuntimeError(f"ERROR: invalid line {l}!")
-        sentence += Row(bits)
+        sentence += [Row(bits)]
 
     if (sentence):
       sentences += [sentence]

From c9ec5b8d398232c61cf552eb479b615a2e348239 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 16 Sep 2021 12:14:25 -0700
Subject: [PATCH 003/134] refined the code and fixed few bugs

---
 main/src/main/python/pytorch/taskManager.py | 65 +++------------------
 1 file changed, 7 insertions(+), 58 deletions(-)

diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index f9fede902..069600d93 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -46,17 +46,16 @@ def mkShards(self):
         crtShard = shardsByTasks[j][i]
         interleavedShards += [crtShard]
 
-    
-    # print ("All shards:")
-    # for(i <- interleavedShards.indices)
-    #   print (s"${interleavedShards(i)}")
-
-
     return interleavedShards
 
   # Iterator over all sentences coming from all interleaved shards 
   def getSentences(self):
-    return SentenceIterator(self.tasks, self.shards, self.random)
+    random.seed(self.random)
+    randomizedShards = random.sample(self.shards, len(self.shards))
+    for shard in randomizedShards:
+      sents = random.sample(range(shard.startPosition, shard.endPosition), shard.endPosition-shard.startPosition)
+      for sent in sents:
+        yield (shard.taskId, self.tasks[shard.taskId].trainSentences[sent])
 
   # Reads all tasks from disk in memory 
   def readTasks(self):
@@ -92,7 +91,7 @@ def debugTraversal(self):
       sentCount = 0
       taskId = 0
       totalSents = 0
-      for sentence in getSentences():
+      for sentence in self.getSentences():
         totalSents += 1
         if(sentence[0] != taskId):
           print (f"Read {sentCount} sentences from task {taskId}")
@@ -103,56 +102,6 @@ def debugTraversal(self):
       print (f"Read {sentCount} sentences from task {taskId}")
       print (f"Read {totalSents} sentences in epoch {epoch}.")
 
-class SentenceIterator(object):
-  def __init__(tasks, shards, random):
-
-    self.tasks = tasks
-    self.shards = shards
-    self.random = random #random seed
-
-    # Offset in randomizedSentencePositions array 
-    self.sentenceOffset = 0
-    self.randomizedSentencePositions = randomizeSentences()
-
-  class Sentence:
-    def __init__(self, taskId, sentencePosition):
-      self.taskId = taskId
-      self.sentencePosition = sentencePosition
-
-  # Randomizes all sentences across all tasks 
-  def randomizeSentences():
-    # first, randomize the shards
-    random.seed(self.random)
-    randomizedShards = random.shuffle(self.shards)
-    randomizedSents = list()
-    for shard in randomizedShards:
-      # second, randomize the sentences inside each shard
-      sents = random.shuffle(list(range(shard.startPosition, shard.endPosition)))
-      for sent in sents:
-        # store the randomized sentences
-        randomizedSents += [Sentence(shard.taskId, sent)]
-    return randomizedSents
-
-  def __len__(self):
-    return len(self.randomizedSentencePositions)
-
-  def __iter__(self):
-    return self
-
-  def hasNext(self): return self.sentenceOffset < len(self.randomizedSentencePositions)
-
-  def __next__(self):
-    assert(self.sentenceOffset >= 0 and self.sentenceOffset < len(self.randomizedSentencePositions))
-
-    s = self.randomizedSentencePositions[sentenceOffset]
-    tid = s.taskId
-    sentence = self.tasks[tid].trainSentences[s.sentencePosition]
-    self.sentenceOffset += 1
-
-    #print ("shardPosition = $shardPosition, sentencePosition = $sentencePosition")
-
-    return (tid, sentence)
-
 class Shard:
   def __init__(self, taskId, startPosition, endPosition):
     self.taskId = taskId

From 9311763f54504602b6813fe10e6c0ae9bf5ef19a Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 20 Sep 2021 12:34:23 -0700
Subject: [PATCH 004/134] initial code for metal

---
 main/src/main/python/pytorch/metal.py        | 58 ++++++++++++
 main/src/main/python/pytorch/taskManager.py  |  9 +-
 main/src/main/python/pytorch/utils.py        | 22 +++++
 main/src/main/python/sequences/rowReaders.py | 96 ++++++++++++++++++++
 4 files changed, 181 insertions(+), 4 deletions(-)
 create mode 100644 main/src/main/python/pytorch/utils.py
 create mode 100644 main/src/main/python/sequences/rowReaders.py

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index e69de29bb..d9f13a02e 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -0,0 +1,58 @@
+from utils import Utils
+from collections import Counter
+from sequences.rowReader import *
+
+class Metal():
+    """docstring for Metal"""
+    def __init__(self, taskManager, parameters, modelOpt):
+        # One Layers object per task; model(0) contains the Layers shared between all tasks (if any)
+        if modelOpt:
+            self.model = modelOpt
+        else:
+            self.model = self.initialize()
+        self.taskManager = taskManager
+
+    def initialize(self):
+
+        taskWords, taskLabels = mkVocabularies()
+
+        layersPerTask = [None for _ in range(taskManager.taskCount + 1)]
+
+        layersPerTask[0] = Layers(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None)
+
+        inputSize = layersPerTask[0].outDim
+
+        for i in taskManager.indices:
+            layersPerTask[i+1] = Layers(taskManager, s"mtl.task${i + 1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize)
+
+        for i in range(len(layersPerTask)):
+            print (s"Summary of layersPerTask({i}):")
+            print (layersPerTask[i])
+
+        return layersPerTask
+    
+    def mkVocabularies(self):
+        # index 0 reserved for the shared Layers; tid + 1 corresponds to each task
+        labels = [Counter() for _ in range(taskManager.taskCount + 1)]
+        for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer
+          labels[i][Utils.START_TAG] += 1
+          labels[i][Utils.STOP_TAG] += 1
+
+        words = [Counter() for _ in range(taskManager.taskCount + 1)]
+
+        reader = MetalRowReader()
+
+        for tid in taskManager.indices:
+          for sentence in taskManager.tasks[tid].trainSentences
+            annotatedSentences = reader.toAnnotatedSentences(sentence)
+
+            for asent in annotatedSentences:
+              annotatedSentence = asent[0]
+              sentenceLabels = asent[1]
+              for i in annotatedSentence.indices:
+                words[tid + 1][annotatedSentence.words[i]] += 1
+                words[0][annotatedSentence.words[i]] += 1
+                labels[tid + 1][sentenceLabels[i]] += 1
+
+        return words, labels
+
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index 069600d93..ef4c18bbf 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -1,6 +1,7 @@
 import random
 import math
 from sequences.columnReader import ColumnReader
+from dataclasses import dataclass
 
 TYPE_BASIC = 0
 TYPE_DUAL = 1
@@ -102,11 +103,11 @@ def debugTraversal(self):
       print (f"Read {sentCount} sentences from task {taskId}")
       print (f"Read {totalSents} sentences in epoch {epoch}.")
 
+@dataclass
 class Shard:
-  def __init__(self, taskId, startPosition, endPosition):
-    self.taskId = taskId
-    self.startPosition = startPosition 
-    self.endPosition = endPosition
+  taskId: int
+  startPosition: int
+  endPosition: int
 
 class Task:
   def __init__(self,
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
new file mode 100644
index 000000000..bbe4bfb93
--- /dev/null
+++ b/main/src/main/python/pytorch/utils.py
@@ -0,0 +1,22 @@
+
+class Utils:
+    def __init__(self):
+        self.concatenateCount = 0
+
+        self.UNK_WORD = "<UNK>"
+        self.EOS_WORD = "<EOS>"
+
+        self.UNK_EMBEDDING = 0
+
+        self.START_TAG = "<START>"
+        self.STOP_TAG = "<STOP>"
+
+        self.RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data
+        self.WEIGHT_DECAY = 1e-5
+
+        self.LOG_MIN_VALUE = -10000.0
+
+        self.DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by  default
+
+        self.IS_DYNET_INITIALIZED = False
+
diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py
new file mode 100644
index 000000000..c11c39865
--- /dev/null
+++ b/main/src/main/python/sequences/rowReaders.py
@@ -0,0 +1,96 @@
+
+class AnnotatedSentence:
+
+    def __init__(self, words, posTags = None, neTags = None, headPositions = None):
+        self.words = words
+        self.posTags = posTags
+        self.neTags = neTags
+        self.headPositions = headPositions
+        self.size = len(words)
+        self.indicies = range(self.size)
+
+class RowReader(object):
+
+    def __init__(self):
+        raise NotImplementedError
+
+    def toAnnotatedSentences(self, rows):
+        raise NotImplementedError
+
+class MetalRowReader(RowReader):
+
+    def __init__(self):
+        self.WORD_POSITION = 0
+        self.POS_TAG_POSITION = 1
+        self.NE_LABEL_POSITION = 2
+        self.LABEL_START_OFFSET = 3
+
+    def toAnnotatedSentences(self, rows):
+        if (len(rows.head) == 2):
+            self.parseSimple(rows)
+        elif (len(rows.head) == 4):
+            self.parseSimpleExtended(rows)
+        elif (len(rows.head) >= 5):
+            self.parseFull(rows)
+        else:
+            raise RuntimeError("ERROR: the Metal format expects 2, 4, or 5+ columns!")
+
+    # Parser for the simple format: word, label 
+    def parseSimple(rows):
+        assert(len(rows.head) == 2)
+        words = list()
+        labels = list()
+
+        for row in rows:
+            words += [row.get(self.WORD_POSITION)]
+            labels += [row.get(self.WORD_POSITION + 1)]
+
+        return AnnotatedSentence(words), labels
+
+    # Parser for the simple extended format: word, POS tag, NE label, label
+    def parseSimpleExtended(rows):
+        assert(len(rows.head) == 4)
+        words = list()
+        posTags = list()
+        neLabels = list()
+        labels = list()
+
+        for row in rows:
+            words += [row.get(self.WORD_POSITION)]
+            posTags += [row.get(self.POS_TAG_POSITION)]
+            neLabels += [row.get(self.NE_LABEL_POSITION)]
+            labels += [row.get(self.LABEL_START_OFFSET)]
+
+        return AnnotatedSentence(words), posTags, neLabels, labels
+
+    # Parser for the full format: word, POS tag, NE label, (label head)+ 
+    def parseFull(rows: IndexedSeq[Row]):
+        assert(len(rows.head) >= 5)
+        numSent = (len(rows.head) - 3) / 2
+        assert(numSent >= 1)
+
+        words = list()
+        posTags = list()
+        neLabels = list()
+        headPositions = [list() for i in range(numSent)]
+        labels = [list() for i in range(numSent)]
+
+        for row in rows:
+            words += [row.get(self.WORD_POSITION)]
+            posTags += [row.get(self.POS_TAG_POSITION)]
+            neLabels += [row.get(self.NE_LABEL_POSITION)]
+
+            for j in range(numSent):
+                labels[j]+= [row.get(self.LABEL_START_OFFSET + (j * 2))]
+                try:
+                    headPositions[j] += [int(row.get(self.LABEL_START_OFFSET + (j * 2) + 1))]
+                except:
+                    raise RuntimeError # not sure about this part
+
+        sentences = list()
+        for i in range(numSent):
+            annotatedSent = AnnotatedSentence(words, posTags, neLabels, headPositions[i])
+            sentLabels = labels[i]
+            sentences += [(annotatedSent, sentLabels)]
+
+        return sentences

From a33fe369f162409247393276bde85ce11469c592 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 20 Sep 2021 20:21:00 -0700
Subject: [PATCH 005/134] refine metal, added layers(partial)

---
 main/src/main/python/pytorch/layers.py | 151 +++++++++++++++++++++++++
 main/src/main/python/pytorch/metal.py  |  20 ++--
 main/src/main/python/pytorch/utils.py  |  33 +++---
 3 files changed, 181 insertions(+), 23 deletions(-)
 create mode 100644 main/src/main/python/pytorch/layers.py

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
new file mode 100644
index 000000000..c33f3b3e1
--- /dev/null
+++ b/main/src/main/python/pytorch/layers.py
@@ -0,0 +1,151 @@
+import torch.nn as nn
+from utils import *
+
+class Layers(nn.Module):
+    def __init__(self, initialLayer, intermediateLayers, finalLayer):
+        super().__init__()
+        if finalLayer:
+            self.outDim = finalLayer.outDim
+        elif intermediateLayers:
+            self.outDim = intermediateLayers[-1].outDim
+        elif initialLayer:
+            self.outDim = initialLayer.outDim
+        else:
+            self.outDim = None
+
+        if initialLayer and intermediateLayers and finalLayer:
+            self.nonEmpty = True
+        self.isEmpty = not self.nonEmpty
+
+        self.initialLayer = initialLayer
+        self.intermediateLayers = intermediateLayers
+        self.finalLayer = finalLayer
+
+    def __str__(self):
+        s = ""
+        started = False
+        if(initialLayer.nonEmpty):
+            s += "initial = " + initialLayer
+            started = True
+        for i in intermediateLayers.indices:
+            if(started) s += " "
+            s += s"intermediate ({i+1}) = " + intermediateLayers[i]
+            started = True
+        if(finalLayer.nonEmpty):
+          if(started) s += " "
+          s += "final = " + finalLayer
+        return s
+
+    def forward(self, sentence, constEmnbeddings, doDropout):
+        if self.initialLayer.isEmpty:
+            raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!")
+        states = self.initialLayer(sentence, constEmnbeddings, doDropout)
+        for intermediateLayer in self.intermediateLayers:
+            states = intermediateLayer(states, doDropout)
+        if self.finalLayer.nonEmpty:
+            states = self.finalLayer(states, sentence.headPositions, doDropout)
+
+        return states
+
+    def forwardFrom(self, inStates, headPositions, doDropout):
+        if self.initialLayer.nonEmpty:
+            raise RuntimeError(f"ERROR: you can't call forwardFrom() on a Layers object that has an initial layer: {self}")
+        states = inStates
+        for intermediateLayer in self.intermediateLayers:
+            states = intermediateLayer(states, doDropout)
+        if self.finalLayer.nonEmpty:
+            states = self.finalLayer(states, sentence.headPositions, doDropout)
+
+        return states
+
+    def saveX2i(self):
+        x2i = dict()
+        if self.initialLayer.nonEmpty:
+            x2i['hasInitial'] = 1
+            x2i['initialLayer'] = self.initialLayer.saveX2i()
+        else:
+            x2i['hasInitial'] = 0
+        x2i['intermediateCount'] = len(intermediateLayers)
+        for il in self.intermediateLayers:
+            il.saveX2i()
+        if self.finalLayer.nonEmpty:
+            x2i['hasFinal'] = 1
+            x2i['finalLayer'] = self.finalLayer.saveX2i()
+        else:
+            x2i['finalLayer'] = 0
+
+        return x2i
+
+    @classmethod
+    def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDual, providedInputSize):
+        initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", parameters, wordCounter)
+
+        if(initialLayer):
+            inputSize = initialLayer.outDim
+        elif(providedInputSize):
+            inputSize = providedInputSize
+        else:
+            inputSize = None
+
+        intermediateLayers = list()
+        done = False
+        MAX_INTERMEDIATE_LAYERS = 10
+
+        for i in range(1, MAX_INTERMEDIATE_LAYERS):
+            if done:
+                break
+            if inputSize is None:
+                raise RuntimeError("ERROR: trying to construct an intermediate layer without a known input size!")
+
+            intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", parameters, inputSize)
+
+            if intermediateLayer:
+                intermediateLayers.append(intermediateLayer)
+                inputSize = intermediateLayer.outDim
+            else:
+                done = True
+
+        if labelCounter:
+            if inputSize is None:
+                raise RuntimeError("ERROR: trying to construct a final layer without a known input size!")
+            else:
+                finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", parameters, labelCounter, isDual, inputSize)
+        else:
+            finalLayer = None
+
+        return cls(initialLayer, intermediateLayers, finalLayer)
+
+    @classmethod
+    def loadX2i(cls, models, x2i):
+        hasInitial = x2i['hasInitial']
+        initialLayer = EmbeddingLayer.load(models, x2i) if hasInitial == 1 else None
+
+        intermediateLayers = list()
+        intermediateCount = x2i['intermediateCount']
+        for _ in range(intermediateCount):
+            il = RnnLayer.load(models, x2i)
+            intermediateLayers.append(il)
+
+        hasFinal = x2i['hasFinal']
+        finalLayer = ForwardLayer.load(models, x2i) if hasFinal == 1 else none
+
+        return cls(initialLayer, intermediateLayers, finalLayer)
+
+    def predictJointly(layers, sentence, constEmnbeddings):
+        TODO
+    def forwardForTask(layers, taskId, sentence, constEmnbeddings, doDropout):
+        TODO
+    def predict(layers, taskId, sentence, constEmnbeddings):
+        TODO
+    def predictWithScores(layers, taskId, sentence, constEmnbeddings):
+        TODO
+    def parse(layers, sentence, constEmnbeddings):
+        TODO
+    def loss(layers, taskId, sentence, goldLabels):
+        TODO
+
+
+
+
+
+
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index d9f13a02e..23f3f2b16 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -1,4 +1,4 @@
-from utils import Utils
+from utils import *
 from collections import Counter
 from sequences.rowReader import *
 
@@ -18,15 +18,15 @@ def initialize(self):
 
         layersPerTask = [None for _ in range(taskManager.taskCount + 1)]
 
-        layersPerTask[0] = Layers(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None)
+        layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None)
 
         inputSize = layersPerTask[0].outDim
 
         for i in taskManager.indices:
-            layersPerTask[i+1] = Layers(taskManager, s"mtl.task${i + 1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize)
+            layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize)
 
         for i in range(len(layersPerTask)):
-            print (s"Summary of layersPerTask({i}):")
+            print (f"Summary of layersPerTask({i}):")
             print (layersPerTask[i])
 
         return layersPerTask
@@ -35,23 +35,23 @@ def mkVocabularies(self):
         # index 0 reserved for the shared Layers; tid + 1 corresponds to each task
         labels = [Counter() for _ in range(taskManager.taskCount + 1)]
         for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer
-          labels[i][Utils.START_TAG] += 1
-          labels[i][Utils.STOP_TAG] += 1
+          labels[i][START_TAG] += 1
+          labels[i][STOP_TAG] += 1
 
         words = [Counter() for _ in range(taskManager.taskCount + 1)]
 
         reader = MetalRowReader()
 
         for tid in taskManager.indices:
-          for sentence in taskManager.tasks[tid].trainSentences
+          for sentence in taskManager.tasks[tid].trainSentences:
             annotatedSentences = reader.toAnnotatedSentences(sentence)
 
             for asent in annotatedSentences:
               annotatedSentence = asent[0]
               sentenceLabels = asent[1]
-              for i in annotatedSentence.indices:
-                words[tid + 1][annotatedSentence.words[i]] += 1
-                words[0][annotatedSentence.words[i]] += 1
+              for i, word in enumerate(annotatedSentence.words):
+                words[tid + 1][word] += 1
+                words[0][word] += 1
                 labels[tid + 1][sentenceLabels[i]] += 1
 
         return words, labels
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index bbe4bfb93..907ba8fd3 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -1,22 +1,29 @@
 
-class Utils:
-    def __init__(self):
-        self.concatenateCount = 0
+concatenateCount = 0
 
-        self.UNK_WORD = "<UNK>"
-        self.EOS_WORD = "<EOS>"
+UNK_WORD = "<UNK>"
+EOS_WORD = "<EOS>"
 
-        self.UNK_EMBEDDING = 0
+UNK_EMBEDDING = 0
 
-        self.START_TAG = "<START>"
-        self.STOP_TAG = "<STOP>"
+START_TAG = "<START>"
+STOP_TAG = "<STOP>"
 
-        self.RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data
-        self.WEIGHT_DECAY = 1e-5
+RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data
+WEIGHT_DECAY = 1e-5
+
+LOG_MIN_VALUE = -10000.0
+
+DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by  default
+
+IS_DYNET_INITIALIZED = False
+
+def save(file, values, comment):
+    file.write("# " + comment + "\n")
+    for key, value in values.items():
+        file.write(f"{key}\t{value}\n")
+    file.write("\n")
 
-        self.LOG_MIN_VALUE = -10000.0
 
-        self.DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by  default
 
-        self.IS_DYNET_INITIALIZED = False
 

From 822f6c2ef97895568ae0839ae7cb156d99bf070e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 21 Sep 2021 02:23:29 -0700
Subject: [PATCH 006/134] fixed some bugs, init code for embeddings

---
 .../python/pytorch/constEmbeddingsGlove.py    |   7 +
 .../src/main/python/pytorch/embeddingLayer.py | 336 ++++++++++++++++++
 main/src/main/python/pytorch/initialLayer.py  |  11 +
 main/src/main/python/pytorch/layers.py        |  22 +-
 main/src/main/python/pytorch/metal.py         |  10 +-
 main/src/main/python/pytorch/taskManager.py   |   4 +-
 main/src/main/python/pytorch/utils.py         |   5 +-
 main/src/main/python/run.py                   |   7 +-
 main/src/main/python/sequences/rowReaders.py  |   2 +-
 9 files changed, 383 insertions(+), 21 deletions(-)
 create mode 100644 main/src/main/python/pytorch/constEmbeddingsGlove.py
 create mode 100644 main/src/main/python/pytorch/embeddingLayer.py
 create mode 100644 main/src/main/python/pytorch/initialLayer.py

diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
new file mode 100644
index 000000000..d6129393d
--- /dev/null
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -0,0 +1,7 @@
+from dataclasses import dataclass
+import torch.nn as nn
+
+@dataclass
+class ConstEmbeddingParameters:
+    emb: nn.Embedding.from_pretrianed("....")
+    w2i: dict
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
new file mode 100644
index 000000000..386d581cf
--- /dev/null
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -0,0 +1,336 @@
+from initialLayer import InitialLayer
+import random
+from utils import *
+
+DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY
+DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128
+DEFAULT_CHAR_EMBEDDING_SIZE: int = 32
+DEFAULT_CHAR_RNN_STATE_SIZE: int = 16
+DEFAULT_POS_TAG_EMBEDDING_SIZE: int = -1 # no POS tag embeddings by default
+DEFAULT_NE_TAG_EMBEDDING_SIZE: int = -1 # no NE tag embeddings by default
+DEFAULT_DISTANCE_EMBEDDING_SIZE: int = -1 # no distance embeddings by default
+DEFAULT_POSITION_EMBEDDING_SIZE: int = -1 # no position embeddings by default
+DEFAULT_DISTANCE_WINDOW_SIZE: int = -1
+DEFAULT_USE_IS_PREDICATE: int = -1
+
+class EmbeddingLayer(InitialLayer):
+    def __init__(w2i, # word to index
+                 w2f, # word to frequency
+                 c2i, # character to index
+                 tag2i, # POS tag to index
+                 ne2i, # NE tag to index
+                 learnedWordEmbeddingSize, # size of the learned word embedding
+                 charEmbeddingSize, # size of the character embedding
+                 charRnnStateSize, # size of each one of the char-level RNNs
+                 posTagEmbeddingSize, # size of the POS tag embedding
+                 neTagEmbeddingSize, # size of the NE tag embedding
+                 distanceEmbeddingSize,
+                 distanceWindowSize, # window considered for distance values (relative to predicate)
+                 positionEmbeddingSize,
+                 useIsPredicate, # if true, add a Boolean bit to indicate if current word is the predicate
+                 wordLookupParameters,
+                 charLookupParameters,
+                 charRnnBuilder, # RNNs for the character representation
+                 posTagLookupParameters,
+                 neTagLookupParameters,
+                 distanceLookupParameters,
+                 positionLookupParameters,
+                 dropoutProb):
+        super().__init__()
+        self.w2i = w2i
+        self.w2f = w2f
+        self.c2i = c2i
+        self.tag2i = tag2i
+        self.ne2i = ne2i
+        self.learnedWordEmbeddingSize = learnedWordEmbeddingSize
+        self.charEmbeddingSize = charEmbeddingSize
+        self.charRnnStateSize = charRnnStateSize
+        self.posTagEmbeddingSize = posTagEmbeddingSize
+        self.neTagEmbeddingSize = neTagEmbeddingSize
+        self.distanceEmbeddingSize = distanceEmbeddingSize
+        self.distanceWindowSize = distanceWindowSize
+        self.positionEmbeddingSize = positionEmbeddingSize
+        self.useIsPredicate = useIsPredicate
+        self.wordLookupParameters = wordLookupParameters
+        self.charLookupParameters = charLookupParameters
+        self.charRnnBuilder = charRnnBuilder
+        self.posTagLookupParameters = posTagLookupParameters
+        self.neTagLookupParameters = neTagLookupParameters
+        self.distanceLookupParameters = distanceLookupParameters
+        self.positionLookupParameters = positionLookupParameters
+        self.dropoutProb = dropoutProb
+
+        posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0
+        neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0
+        distanceDim = distanceWindowSize if distanceLookupParameters else 0
+        positionDim = 1 if distanceLookupParameters and useIsPredicate else 0
+        predicateDim = positionEmbeddingSize if positionLookupParameters else 0
+
+        self.outDim =    TODO:ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
+        random.seed(RANDOM_SEED)
+    
+    def forward(self, sentence, constEmbeddings, doDropout):
+
+        words = sentence.words
+        tags = sentence.posTags
+        nes = sentence.neTags
+        headPositions = sentence.headPositions
+
+        # const word embeddings such as GloVe
+        constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings)
+        assert(constEmbeddingsExpressions.size(0) == len(words))
+        if(tags) assert(len(tags) == len(words))
+        if(nes) assert(len(nes) == len(words))
+        if(headPositions) assert(len(headPositions) == len(words))
+
+        # build the word embeddings one by one
+        embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions)
+
+        return embeddings
+
+    def mkConstEmbeddings(self, words, constEmbeddings):
+        idxs = [constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]
+        embeddings = self.constEmbeddings.emb(idxs)
+        return embeddings
+
+    def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPositions=None):
+        #
+        # Learned word embeddings
+        # These are initialized randomly, and updated during backprop
+        #
+        ids = []
+        wordPositions = []
+        for i, word in enumerate(words):
+            wordPositions.append(i)
+            id = self.w2i.get(word, 0) # 0 reserved for UNK in the vocab
+            # sample uniformly with prob 0.5 from singletons; move all other singletons to UNK
+            if(self.doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0
+            ids.append(id) 
+        learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids))
+
+        #
+        # biLSTM over character embeddings
+        #
+        TODO: charEmbedding = mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder)
+
+        #
+        # POS tag embedding
+        #
+        if tags:
+            posTagEmbed = self.posTagLookupParameters(torch.LongTensor([self.tag2i.get(tag, 0) for tag in tags]))
+        else:
+            posTagEmbed = None
+        #
+        # NE tag embedding
+        #
+        if nes:
+            neTagEmbed = self.neTagLookupParameters(torch.LongTensor([self.ne2i.get(ne, 0) for ne in nes]))
+        else:
+            neTagEmbed = None
+        #
+        # 1 if this word is the predicate
+        #
+        if headPositions and self.useIsPredicate:
+            predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)])
+        else:
+            predEmbed = None
+
+        #
+        # Distance embedding, relative to the distance to the predicate
+        # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize]
+        #
+        if headPositions and self.distanceLookupParameters:
+            dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)]
+            for i in range(dists):
+                if dists[i] < -self.distanceWindowSize:
+                    dists[i] = self.distanceWindowSize-1
+                if dists[i] > self.distanceWindowSize:
+                    dist[i] = self.distanceWindowSize+1
+            distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists))
+        else:
+            distanceEmbedding = None
+
+        #
+        # Embedding that captures the absolute position of the token in the sentence
+        #
+        if self.positionLookupParameters:
+            values = [i if i<100 else 100 for i, word in enumerate(words)]
+            positionEmbedding = self.positionLookupParameters(torch.LongTensor(values))
+        else:
+            positionEmbedding = None
+
+        # The final word embedding is a concatenation of all these
+        embedParts = [constEmbeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+        embedParts = [ep for ep in embedParts if ep is not None]
+        embed = torch.cat(embedParts, dim=1)
+        return embed
+
+    def saveX2i(self):
+        x2i = dict()
+        x2i['w2i'] = self.w2i
+        x2i['w2f'] = self.w2f
+        x2i['c2i'] = self.c2i
+        if self.tag2i:
+            x2i['hasTag2i'] = 1
+            x2i['tag2i'] = self.tag2i
+        else:
+            x2i['hasTag2i'] = 0
+        if self.ne2i:
+            x2i['hasNe2i'] = 1
+            x2i['ne2i'] = self.ne2i
+        else:
+            x2i['hasNe2i'] = 0
+        x2i['learnedWordEmbeddingSize'] = self.learnedWordEmbeddingSize
+        x2i['charEmbeddingSize']        = self.charEmbeddingSize
+        x2i['charRnnStateSize']         = self.charRnnStateSize
+        x2i['posTagEmbeddingSize']      = self.posTagEmbeddingSize
+        x2i['neTagEmbeddingSize']       = self.neTagEmbeddingSize
+        x2i['distanceEmbeddingSize']    = self.distanceEmbeddingSize
+        x2i['distanceWindowSize']       = self.distanceWindowSize
+        x2i['useIsPredicate']           = 1 if self.useIsPredicate else 0
+        x2i['positionEmbeddingSize']    = self.positionEmbeddingSize
+        x2i['dropoutProb']              = self.dropoutProb
+
+        return x2i
+
+    def __str__(self):
+        return f"EmbeddingLayer({self.outDim})"
+
+    @classmethod
+    def load(cls, x2i):
+        w2i = x2i['w2i']
+        w2f = x2i['w2f']
+        c2i = x2i['c2i']
+        tag2i = x2i['tag2i'] if x2i['hasTag2i'] == 1 else None
+        ne2i = x2i['ne2i'] if x2i['hasNe2i'] == 1 else None
+
+        learnedWordEmbeddingSize = x2i.get('learnedWordEmbeddingSize', DEFAULT_LEARNED_WORD_EMBEDDING_SIZE)
+        charEmbeddingSize        = x2i.get('charEmbeddingSize', DEFAULT_CHAR_EMBEDDING_SIZE)
+        charRnnStateSize         = x2i.get('charRnnStateSize', DEFAULT_CHAR_RNN_STATE_SIZE)
+        posTagEmbeddingSize      = x2i.get('posTagEmbeddingSize', DEFAULT_POS_TAG_EMBEDDING_SIZE)
+        neTagEmbeddingSize       = x2i.get('neTagEmbeddingSize', DEFAULT_NE_TAG_EMBEDDING_SIZE)
+        distanceEmbeddingSize    = x2i.get('distanceEmbeddingSize', DEFAULT_DISTANCE_EMBEDDING_SIZE)
+        distanceWindowSize       = x2i.get('distanceWindowSize', DEFAULT_DISTANCE_WINDOW_SIZE)
+        useIsPredicate           = x2i.get('useIsPredicate', DEFAULT_USE_IS_PREDICATE) == 1
+        positionEmbeddingSize    = x2i.get('positionEmbeddingSize', DEFAULT_POSITION_EMBEDDING_SIZE)
+        dropoutProb              = x2i.get('dropoutProb', DEFAULT_DROPOUT_PROB)
+
+        #  make the loadable parameters
+        wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
+        charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
+        
+        #????? The following line would normally provoke construction of the initial ComputationGraph
+        #????? and do that outside of a synchronized area.  This is avoided by ensuring that construction
+        #????? happens in Utils.initializeDyNet instead, just to be safe.
+        charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+
+        posTagLookupParameters   = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None
+        neTagLookupParameters    = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None
+        distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None
+        positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None
+
+        return cls(w2i, w2f, c2i, tag2i, ne2i,
+                  learnedWordEmbeddingSize,
+                  charEmbeddingSize,
+                  charRnnStateSize,
+                  posTagEmbeddingSize,
+                  neTagEmbeddingSize,
+                  distanceEmbeddingSize,
+                  distanceWindowSize,
+                  positionEmbeddingSize,
+                  useIsPredicate,
+                  wordLookupParameters,
+                  charLookupParameters,
+                  charRnnBuilder,
+                  posTagLookupParameters,
+                  neTagLookupParameters,
+                  distanceLookupParameters,
+                  positionLookupParameters,
+                  dropoutProb)
+
+    @classmethod
+    def initialize(cls, config, paramPrefix, wordCounter):
+
+        if(not config.__contains__(paramPrefix)):
+            return None
+
+        learnedWordEmbeddingSize = config.get_int(paramPrefix + ".learnedWordEmbeddingSize",DEFAULT_LEARNED_WORD_EMBEDDING_SIZE)
+        charEmbeddingSize        = config.get_int(paramPrefix + ".charEmbeddingSize",DEFAULT_CHAR_EMBEDDING_SIZE)
+        charRnnStateSize         = config.get_int(paramPrefix + ".charRnnStateSize",DEFAULT_CHAR_RNN_STATE_SIZE)
+        posTagEmbeddingSize      = config.get_int(paramPrefix + ".posTagEmbeddingSize",DEFAULT_POS_TAG_EMBEDDING_SIZE)
+        neTagEmbeddingSize       = config.get_int(paramPrefix + ".neTagEmbeddingSize",DEFAULT_NE_TAG_EMBEDDING_SIZE)
+        distanceEmbeddingSize    = config.get_int(paramPrefix + ".distanceEmbeddingSize",DEFAULT_DISTANCE_EMBEDDING_SIZE)
+        distanceWindowSize       = config.get_int(paramPrefix + ".distanceWindowSize",DEFAULT_DISTANCE_WINDOW_SIZE)
+        useIsPredicate           = config.getArgBoolean(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1)
+        positionEmbeddingSize    = config.get_int(paramPrefix + ".positionEmbeddingSize",DEFAULT_POSITION_EMBEDDING_SIZE)
+        dropoutProb              = config.get_float(paramPrefix + ".dropoutProb",EmbeddingLayer.DEFAULT_DROPOUT_PROB)
+
+        wordList = [UNK_WORD] + sorted(wordCounter.keys())
+        w2i = {w:i for i, w in enumerate(wordList)}
+
+        wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
+
+        c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt")
+        c2i = TODO
+
+        charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
+        charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+
+        if(posTagEmbeddingSize > 0):
+            tag2i = TODO
+            posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize)
+        else:
+            tag2i = None
+            posTagLookupParameters = None
+
+        if(neTagEmbeddingSize > 0):
+            ne2i = TODO
+            neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize)
+        else:
+            ne2i = None
+            neTagLookupParameters = None
+
+        distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None
+        positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None
+
+        return cls(w2i, w2f, c2i, tag2i, ne2i,
+                  learnedWordEmbeddingSize,
+                  charEmbeddingSize,
+                  charRnnStateSize,
+                  posTagEmbeddingSize,
+                  neTagEmbeddingSize,
+                  distanceEmbeddingSize,
+                  distanceWindowSize,
+                  positionEmbeddingSize,
+                  useIsPredicate,
+                  wordLookupParameters,
+                  charLookupParameters,
+                  charRnnBuilder,
+                  posTagLookupParameters,
+                  neTagLookupParameters,
+                  distanceLookupParameters,
+                  positionLookupParameters,
+                  dropoutProb)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/src/main/python/pytorch/initialLayer.py b/main/src/main/python/pytorch/initialLayer.py
new file mode 100644
index 000000000..39db90d28
--- /dev/null
+++ b/main/src/main/python/pytorch/initialLayer.py
@@ -0,0 +1,11 @@
+import torch
+import torch.nn as nn
+
+class InitialLayer(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.outDim = None
+
+    def forward(self, sentence, constEmbeddings, doDropout):
+        raise NotImplementedError
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index c33f3b3e1..d5a3000f4 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -1,5 +1,6 @@
 import torch.nn as nn
 from utils import *
+from embeddingLayer import EmbeddingLayer
 
 class Layers(nn.Module):
     def __init__(self, initialLayer, intermediateLayers, finalLayer):
@@ -66,8 +67,9 @@ def saveX2i(self):
         else:
             x2i['hasInitial'] = 0
         x2i['intermediateCount'] = len(intermediateLayers)
+        x2i['intermediateLayers'] = list()
         for il in self.intermediateLayers:
-            il.saveX2i()
+            x2i['intermediateLayers'].append(il.saveX2i())
         if self.finalLayer.nonEmpty:
             x2i['hasFinal'] = 1
             x2i['finalLayer'] = self.finalLayer.saveX2i()
@@ -77,8 +79,8 @@ def saveX2i(self):
         return x2i
 
     @classmethod
-    def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDual, providedInputSize):
-        initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", parameters, wordCounter)
+    def apply(cls, config, paramPrefix, wordCounter, labelCounter, isDual, providedInputSize):
+        initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", wordCounter)
 
         if(initialLayer):
             inputSize = initialLayer.outDim
@@ -97,7 +99,7 @@ def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDua
             if inputSize is None:
                 raise RuntimeError("ERROR: trying to construct an intermediate layer without a known input size!")
 
-            intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", parameters, inputSize)
+            intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", inputSize)
 
             if intermediateLayer:
                 intermediateLayers.append(intermediateLayer)
@@ -109,25 +111,25 @@ def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDua
             if inputSize is None:
                 raise RuntimeError("ERROR: trying to construct a final layer without a known input size!")
             else:
-                finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", parameters, labelCounter, isDual, inputSize)
+                finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", labelCounter, isDual, inputSize)
         else:
             finalLayer = None
 
         return cls(initialLayer, intermediateLayers, finalLayer)
 
     @classmethod
-    def loadX2i(cls, models, x2i):
+    def loadX2i(cls, x2i):
         hasInitial = x2i['hasInitial']
-        initialLayer = EmbeddingLayer.load(models, x2i) if hasInitial == 1 else None
+        initialLayer = EmbeddingLayer.load(x2i['initialLayer']) if hasInitial == 1 else None
 
         intermediateLayers = list()
         intermediateCount = x2i['intermediateCount']
-        for _ in range(intermediateCount):
-            il = RnnLayer.load(models, x2i)
+        for i in range(intermediateCount):
+            il = RnnLayer.load(x2i['intermediateLayers'][i])
             intermediateLayers.append(il)
 
         hasFinal = x2i['hasFinal']
-        finalLayer = ForwardLayer.load(models, x2i) if hasFinal == 1 else none
+        finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else none
 
         return cls(initialLayer, intermediateLayers, finalLayer)
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 23f3f2b16..ace00e73e 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -1,10 +1,10 @@
-from utils import *
+from pytorch.utils import *
 from collections import Counter
-from sequences.rowReader import *
+from sequences.rowReaders import *
 
 class Metal():
     """docstring for Metal"""
-    def __init__(self, taskManager, parameters, modelOpt):
+    def __init__(self, taskManager, modelOpt):
         # One Layers object per task; model(0) contains the Layers shared between all tasks (if any)
         if modelOpt:
             self.model = modelOpt
@@ -18,12 +18,12 @@ def initialize(self):
 
         layersPerTask = [None for _ in range(taskManager.taskCount + 1)]
 
-        layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None)
+        layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", taskWords[0], None, False, None)
 
         inputSize = layersPerTask[0].outDim
 
         for i in taskManager.indices:
-            layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize)
+            layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], taskManager.tasks[i].isDual, inputSize)
 
         for i in range(len(layersPerTask)):
             print (f"Summary of layersPerTask({i}):")
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index ef4c18bbf..f5d1ae868 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -126,8 +126,8 @@ def __init__(self,
     self.devSentences = ColumnReader.readColumns(devFileName) if devFileName else None
     self.testSentences = ColumnReader.readColumns(testFileName) if testFileName else None
 
-    self.isBasic:Boolean = taskType == TYPE_BASIC
-    self.isDual:Boolean = taskType == TYPE_DUAL
+    self.isBasic:bool = taskType == TYPE_BASIC
+    self.isDual:bool = taskType == TYPE_DUAL
 
     if taskType == TYPE_BASIC: 
       self.prettyType = "basic"
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 907ba8fd3..06a2902ff 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -9,7 +9,7 @@
 START_TAG = "<START>"
 STOP_TAG = "<STOP>"
 
-RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data
+RANDOM_SEED = 2522620396 # used for both DyNet, and the JVM seed for shuffling data
 WEIGHT_DECAY = 1e-5
 
 LOG_MIN_VALUE = -10000.0
@@ -24,6 +24,9 @@ def save(file, values, comment):
         file.write(f"{key}\t{value}\n")
     file.write("\n")
 
+def mkCharacterEmbedding(word, c2i, charLookupParameters, charFwRnnBuilder, charBwRnnBuilder):
+    TODO
+
 
 
 
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index 5d6ea1586..f07a30832 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -1,6 +1,7 @@
 from pyhocon import ConfigFactory
 import argparse
 from pytorch.taskManager import TaskManager
+from pytorch.metal import Metal
 
 if __name__ == '__main__':
 
@@ -16,8 +17,10 @@
     if args.train:
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
-        # modelName = args.model_file
-        # mtl = Metal(taskManager, parameters, None)
+        modelName = args.model_file
+        print (taskManager.debugTraversal())
+
+        # mtl = Metal(taskManager, None, None)
         # mtl.train(modelName)
     elif args.test:
         pass
diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py
index c11c39865..0aa409756 100644
--- a/main/src/main/python/sequences/rowReaders.py
+++ b/main/src/main/python/sequences/rowReaders.py
@@ -64,7 +64,7 @@ def parseSimpleExtended(rows):
         return AnnotatedSentence(words), posTags, neLabels, labels
 
     # Parser for the full format: word, POS tag, NE label, (label head)+ 
-    def parseFull(rows: IndexedSeq[Row]):
+    def parseFull(rows):
         assert(len(rows.head) >= 5)
         numSent = (len(rows.head) - 3) / 2
         assert(numSent >= 1)

From 8ef31d2941db04340889be3a3e719207df7fbfae Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Fri, 24 Sep 2021 23:33:02 -0700
Subject: [PATCH 007/134] more implementation for embedding layer

---
 .../python/pytorch/constEmbeddingsGlove.py    | 24 +++++++++++++++--
 .../src/main/python/pytorch/embeddingLayer.py | 16 +++++------
 main/src/main/python/pytorch/utils.py         | 27 ++++++++++++++++---
 3 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
index d6129393d..be32c2f39 100644
--- a/main/src/main/python/pytorch/constEmbeddingsGlove.py
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -1,7 +1,27 @@
 from dataclasses import dataclass
 import torch.nn as nn
+from embeddings.wordEmbeddingMap import *
 
 @dataclass
 class ConstEmbeddingParameters:
-    emb: nn.Embedding.from_pretrianed("....")
-    w2i: dict
\ No newline at end of file
+    emb: nn.Embedding
+    w2i: dict
+
+def ConstEmbeddingsGlove:
+    def __init__(self):
+        self.SINGLETON_WORD_EMBEDDING_MAP = None
+        self.load('../resources/org/clulab/glove.conf')
+        self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim
+
+    def load(self, config):
+        if self.SINGLETON_WORD_EMBEDDING_MAP is None:
+            self.SINGLETON_WORD_EMBEDDING_MAP = WordEmbeddingMap(config)
+
+    def mkConstLookupParams(self, words):
+        w2i = dict()
+        for i,w  in enumerate(words):
+            weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict[0])
+            w2i[w] = i
+        emd = nn.Embedding.from_pretrained(weight)
+        emd.weight.requires_grad=False
+        return ConstEmbeddingParameters(emb ,w2i)
diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 386d581cf..6a4de8217 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -1,6 +1,8 @@
 from initialLayer import InitialLayer
 import random
 from utils import *
+import torch.nn as nn
+import torch
 
 DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY
 DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128
@@ -66,7 +68,7 @@ def __init__(w2i, # word to index
         positionDim = 1 if distanceLookupParameters and useIsPredicate else 0
         predicateDim = positionEmbeddingSize if positionLookupParameters else 0
 
-        self.outDim =    TODO:ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
+        self.outDim =    ConstEmbeddingsGlove().dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
         random.seed(RANDOM_SEED)
     
     def forward(self, sentence, constEmbeddings, doDropout):
@@ -111,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition
         #
         # biLSTM over character embeddings
         #
-        TODO: charEmbedding = mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder)
+        charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder, self.charRnnStateSize) for word in words])
 
         #
         # POS tag embedding
@@ -219,9 +221,6 @@ def load(cls, x2i):
         wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         
-        #????? The following line would normally provoke construction of the initial ComputationGraph
-        #????? and do that outside of a synchronized area.  This is avoided by ensuring that construction
-        #????? happens in Utils.initializeDyNet instead, just to be safe.
         charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         posTagLookupParameters   = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None
@@ -271,20 +270,21 @@ def initialize(cls, config, paramPrefix, wordCounter):
         wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
 
         c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt")
-        c2i = TODO
+        c2i = readChar2Ids(c2iFilename)
 
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         if(posTagEmbeddingSize > 0):
-            tag2i = TODO
+
+            tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt"))
             posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize)
         else:
             tag2i = None
             posTagLookupParameters = None
 
         if(neTagEmbeddingSize > 0):
-            ne2i = TODO
+            ne2i = readString2Ids(config.get_string(paramPrefix + ".ne2i", "../resources/org/clulab/ne2i-en.txt"))
             neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize)
         else:
             ne2i = None
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 06a2902ff..8c1c45d70 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -1,3 +1,5 @@
+import torch.nn as nn
+import torch
 
 concatenateCount = 0
 
@@ -24,9 +26,26 @@ def save(file, values, comment):
         file.write(f"{key}\t{value}\n")
     file.write("\n")
 
-def mkCharacterEmbedding(word, c2i, charLookupParameters, charFwRnnBuilder, charBwRnnBuilder):
-    TODO
-
-
+def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder, hidden_dim):
+    charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word]))
+    (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
+    output, (result, c) = charRnnBuilder(charEmbeddings.view(len(word), 1, -1), (h, c))
+    return result.view(1, hidden_dim*2)
+
+def readString2Ids(s2iFilename):
+    s2i = dict()
+    with open(s2iFilename) as f:
+        for line in f:
+            if not line.startswith("#"):
+                k, v = line.strip().split('\t')
+                s2i[k] = int(v)
+
+def readChar2Ids(s2iFilename):
+    s2i = dict()
+    with open(s2iFilename) as f:
+        for line in f:
+            if not line.startswith("#"):
+                k, v = line.strip().split('\t')
+                s2i[char(int(k))] = int(v)
 
 

From ddcf223af45904ac7876368bcd9f8c32cfb5ebc2 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Sat, 25 Sep 2021 14:35:15 -0700
Subject: [PATCH 008/134] init code for rnnLayer

also refined some functions in embedding layer
---
 main/src/main/python/embeddings/__init__.py   |  0
 .../python/embeddings/wordEmbeddingMap.py     | 29 ++++++
 .../src/main/python/pytorch/embeddingLayer.py |  2 +-
 .../main/python/pytorch/intermediateLayer.py  | 12 +++
 main/src/main/python/pytorch/rnnLayer.py      | 91 +++++++++++++++++++
 main/src/main/python/pytorch/utils.py         | 30 +++++-
 main/src/main/python/run.py                   |  2 +-
 7 files changed, 161 insertions(+), 5 deletions(-)
 create mode 100644 main/src/main/python/embeddings/__init__.py
 create mode 100644 main/src/main/python/embeddings/wordEmbeddingMap.py
 create mode 100644 main/src/main/python/pytorch/intermediateLayer.py
 create mode 100644 main/src/main/python/pytorch/rnnLayer.py

diff --git a/main/src/main/python/embeddings/__init__.py b/main/src/main/python/embeddings/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
new file mode 100644
index 000000000..a82c2108a
--- /dev/null
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+class WordEmbeddingMap:
+    def __init__(self, config):
+        self.emb_dict = self.load(config)
+        self.dim = self.emb_dict.shape[-1]
+
+    def load(self):
+        emb_matrix = None
+        emb_dict = dict()
+        for line in open(config.get_string("glove.matrixResourceName")):
+            if not len(line.split()) == 2:
+                if "\t" in line:
+                    delimiter = "\t"
+                else:
+                    delimiter = " "
+                line_split = line.rstrip().split(delimiter)
+                # extract word and vector
+                word = line_split[0]
+                x = np.array([float(i) for i in line_split[1:]])
+                vector = (x /np.linalg.norm(x))
+                embedding_size = vector.shape[0]
+                emb_dict[word] = vector
+        base = math.sqrt(6/embedding_size)
+        emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
+        return emb_dict
+
+    def isOutOfVocabulary(self, word):
+        return word not in self.emb_dict
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 6a4de8217..95b4ef894 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -113,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition
         #
         # biLSTM over character embeddings
         #
-        charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder, self.charRnnStateSize) for word in words])
+        charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
 
         #
         # POS tag embedding
diff --git a/main/src/main/python/pytorch/intermediateLayer.py b/main/src/main/python/pytorch/intermediateLayer.py
new file mode 100644
index 000000000..48ea53377
--- /dev/null
+++ b/main/src/main/python/pytorch/intermediateLayer.py
@@ -0,0 +1,12 @@
+import torch
+import torch.nn as nn
+
+class IntermediateLayer(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.inDim = None
+        self.outDim = None
+
+    def forward(self, inputExpressions, doDropout):
+        raise NotImplementedError
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
new file mode 100644
index 000000000..c5aef820a
--- /dev/null
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -0,0 +1,91 @@
+from intermediateLayer import IntermediateLayer
+from utils import *
+import torch
+import torch.nn as nn
+
+class RnnLayer(IntermediateLayer):
+    def __init__(self, 
+        inputSize, 
+        numLayers, 
+        rnnStateSize, 
+        useHighwayConnections, 
+        rnnType, 
+        wordRnnBuilder, 
+        dropoutProb):
+
+        self.inDim = self.inputSize = inputSize
+        self.numLayers = numLayers
+        self.rnnStateSize = rnnStateSize 
+        self.useHighwayConnections = useHighwayConnections
+        self.rnnType = rnnType
+        self.wordRnnBuilder = wordRnnBuilder
+        self.dropoutProb = dropoutProb
+
+        highwaySize = inputSize if useHighwayConnections else 0
+        self.outDim = 2 * rnnStateSize + highwaySize
+
+    def forward(self, inputExpressions, dropout):
+
+        assert(inputExpressions is not None)
+
+        States, _ = transduce(inputExpressions, self.wordRnnBuilder)
+
+        if self.useHighwayConnections:
+            States = torch.cat([States, inputExpressions], dim=1)
+
+        return States
+
+    def saveX2i(self):
+        x2i = dict()
+        x2i['inputSize'] = self.inputSize
+        x2i['numLayers'] = self.numLayers
+        x2i['rnnStateSize'] = self.rnnStateSize
+        x2i['useHighwayConnections'] = 1 if useHighwayConnections else 0
+        x2i['rnnType'] = self.rnnType
+        x2i['dropoutProb'] = self.dropoutProb
+        return x2i
+
+    def __str__(self):
+        return f"RnnLayer({self.rnnType}, {self.inDim}, {self.outDim})"
+
+    @classmethod
+    def load(cls, x2i):
+        inputSize = x2i['inputSize']
+        numLayers = x2i['numLayers']
+        rnnType = x2i.get('rnnType', 'lstm')
+        rnnStateSize = x2i['rnnStateSize']
+        useHighwayConnections = x2i['useHighwayConnections'] == 1
+        dropoutProb = x2i['dropoutProb']
+
+        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
+
+        return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
+
+    @classmethod
+    def initialize(cls, config, paramPrefix, inputSize):
+
+        if(not config.__contains__(paramPrefix)):
+            return None
+
+        numLayers = config.get_int(paramPrefix + ".numLayers", 1)
+        rnnStateSize = config.get_int(paramPrefix + ".rnnStateSize", None)
+        useHighwayConnections = config.get_bool(paramPrefix + '.useHighwayConnections', False)
+        rnnType = config.get_string(paramPrefix + ".type", "lstm")
+        dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
+
+        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize)
+
+        return (inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
+
+def mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb):
+    if rnnType == 'gru':
+        return nn.GRU(inputSize, rnnStateSize, numLayers, bidirectional=True, dropout=dropoutProb)
+    elif rnnType == 'lstm':
+        return nn.LSTM(inputSize, rnnStateSize, numLayers, bidirectional=True, dropout=dropoutProb)
+    else:
+        raise RuntimeError(f'ERROR: unknown rnnType "{rnnType}"!')
+
+
+
+
+
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 8c1c45d70..c808d5421 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -26,10 +26,10 @@ def save(file, values, comment):
         file.write(f"{key}\t{value}\n")
     file.write("\n")
 
-def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder, hidden_dim):
+def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
+    hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word]))
-    (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
-    output, (result, c) = charRnnBuilder(charEmbeddings.view(len(word), 1, -1), (h, c))
+    _, result = transduce(charEmbeddings, charRnnBuilder, True)
     return result.view(1, hidden_dim*2)
 
 def readString2Ids(s2iFilename):
@@ -48,4 +48,28 @@ def readChar2Ids(s2iFilename):
                 k, v = line.strip().split('\t')
                 s2i[char(int(k))] = int(v)
 
+def transduce(embeddings, builder):
+
+    hidden_dim = builder.hidden_size
+    bi_direct = builder.bidirectional
+    mode = build.mode
+    
+    if mode == 'LSTM':
+        if bi_direct:
+            (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
+            output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
+        else;
+            (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
+            output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
+    elif mode == 'GRU':
+        if bi_direct:
+            h =  torch.zeros(2, 1, hidden_dim) 
+            output, result = builder(embeddings.view(len(word), 1, -1), h)
+        else;
+            h =  torch.zeros(1, 1, hidden_dim)
+            output, result = builder(embeddings.view(len(word), 1, -1), h)
+
+    return output, result
+
+
 
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index f07a30832..c75532f0e 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -20,7 +20,7 @@
         modelName = args.model_file
         print (taskManager.debugTraversal())
 
-        # mtl = Metal(taskManager, None, None)
+        mtl = Metal(taskManager, None, None)
         # mtl.train(modelName)
     elif args.test:
         pass

From 03229a42bb781fb9800e15c3943c24b2c48a22ff Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 29 Sep 2021 13:00:00 -0700
Subject: [PATCH 009/134] forward layer implementation

---
 .../src/main/python/pytorch/embeddingLayer.py |   2 +-
 main/src/main/python/pytorch/finalLayer.py    |  21 +++
 main/src/main/python/pytorch/forwardLayer.py  | 173 ++++++++++++++++++
 .../main/python/pytorch/greedyForwardLayer.py |   0
 main/src/main/python/pytorch/utils.py         |  21 +++
 .../python/pytorch/viterbiForwardLayer.py     |   0
 6 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 main/src/main/python/pytorch/finalLayer.py
 create mode 100644 main/src/main/python/pytorch/forwardLayer.py
 create mode 100644 main/src/main/python/pytorch/greedyForwardLayer.py
 create mode 100644 main/src/main/python/pytorch/viterbiForwardLayer.py

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 95b4ef894..a506bfa02 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -113,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition
         #
         # biLSTM over character embeddings
         #
-        charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
+        charEmbedding = torch.stack([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
 
         #
         # POS tag embedding
diff --git a/main/src/main/python/pytorch/finalLayer.py b/main/src/main/python/pytorch/finalLayer.py
new file mode 100644
index 000000000..0f2b63d87
--- /dev/null
+++ b/main/src/main/python/pytorch/finalLayer.py
@@ -0,0 +1,21 @@
+import torch
+import torch.nn as nn
+
+class FinalLayer(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.inDim = None
+        self.outDim = None
+
+    def forward(self, inputExpressions, headPositionsOpt, doDropout):
+        raise NotImplementedError
+
+    def loss(self, emissionScoresAsExpression, goldLabels):
+        raise NotImplementedError
+
+    def inference(self, emissionScores):
+        raise NotImplementedError
+
+    def inferenceWithScores(self, emissionScores):
+        raise NotImplementedError
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
new file mode 100644
index 000000000..4b3ea489f
--- /dev/null
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -0,0 +1,173 @@
+import torch
+import torch.nn
+
+from finalLayer import FinalLayer
+from greedyForwardLayer import GreedyForwardLayer
+from viterbiForwardLayer import ViterbiForwardLayer
+
+from utils import *
+
+def ForwardLayer(FinalLayer):
+    def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
+        self.inputSize = inputSize
+        self.isDual = isDual
+        self.t2i = t2i
+        self.i2t = i2t
+        self.spans = spans
+        self.nonlinearity = nonlinearity
+
+        self.pH = nn.Linear(actualInputSize, len(t2i))
+        self.pRoot = torch.rand(inputSize) #TODO: Not sure about the shape here
+        self.dropoutProb = dropoutProb
+
+        self.inDim = spanLength(spans) if spans is not None else inputSize
+        self.outDim = len(t2i)
+
+
+    def pickSpan(self, v):
+        if self.spans is None:
+            return v
+        else:
+            # Zheng: Will spans overlap?
+            vs = list()
+            for span in self.spans:
+                e = torch.index_select(v, 0, torch.tensor([span[0], span[1]]))
+                vs.append(e)
+            return torch.cat(vs)
+
+    def forward(inputExpressions, doDropout, headPositionsOpt = None):
+        emissionScores = list()
+        if not self.isDual:
+            # Zheng: Why the for loop here? Can we just use matrix manipulation?
+            for i, e in enumerate(inputExpressions):
+                argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
+                l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
+                if nonlinearity == NONLIN_TANH:
+                    l1 = torch.tanh(l1)
+                elif nonlinearity == NONLIN_RELU:
+                    l1 = torch.relu(l1)
+                emissionScores.append(l1)
+        else:
+            if headPositionsOpt is None:
+                raise RuntimeError("ERROR: dual task without information about head positions!")
+            for i, e in enumerate(inputExpressions):
+                headPosition = headPositionsOpt[i]
+                argExp = expressionDropout(pickSpan(e), self.dropoutProb, doDropout)
+                if headPosition >= 0:
+                    # there is an explicit head in the sentence
+                    predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout)
+                else:
+                    # the head is root. we used a dedicated Parameter for root
+                    # Zheng: Why not add root node to the input sequence at the beginning?
+                    predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout)
+                ss = torch.cat([argExp, predExp])
+                l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
+                if nonlinearity == NONLIN_TANH:
+                    l1 = torch.tanh(l1)
+                elif nonlinearity == NONLIN_RELU:
+                    l1 = torch.relu(l1)
+                emissionScores.append(l1)
+        return torch.stack(emissionScores)
+
+    @staticmethod
+    def load(x2i):
+        inferenceType = x2i["inferenceType"]
+        if inferenceType == TYPE_VITERBI:
+            pass
+            # TODO
+            # return ViterbiForwardLayer.load(x2i)
+        elif inferenceType == TYPE_GREEDY:
+            return GreedyForwardLayer.load(x2i)
+        else:
+            raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
+
+    @staticmethod
+    def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
+        if(not config.__contains__(paramPrefix)):
+            return None
+
+        inferenceType = config.get_string(paramPrefix + ".inference", "greedy")
+        dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
+
+        nonlinAsString = config.get_string(paramPrefix + ".nonlinearity", "")
+        if nonlinAsString in nonlin_map:
+            nonlin = nonlin_map[nonlinAsString]
+        else:
+            raise RuntimeError(f"ERROR: unknown non-linearity {nonlinAsString}!")
+
+        t2i = {t:i for i, t in enumerate(labelCounter.keys())}
+        i2t = {i:t for t, i in t2i.items()}
+
+        spanConfig = config.get_string(paramPrefix + ".span", "")
+        if spanConfig is "":
+            span = None
+        else:
+            span = parseSpan(spanConfig)
+
+        if span:
+            l = spanLength(span)
+            actualInputSize = 2*l if isDual else l
+        else:
+            actualInputSize = 2*inputSize if isDual else inputSize
+
+        if inferenceType == TYPE_GREEDY_STRING:
+            return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb)
+        elif inferenceType == TYPE_VITERBI_STRING:
+            pass
+            # TODO
+            # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb)
+            # layer.initializeTransitions()
+            # return layer
+        else:
+            raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!")
+    
+def spanLength(spans):
+    s = 0
+    for x in spans:
+        s += x[1] - x[0]
+    return s
+
+def parseSpan(spanParam, inputSize):
+    spans = list()
+    spanParamTokens = spanParam.split(",")
+    for spanParamToken in spanParamTokens:
+        spanTokens = spanParamToken.split('-')
+        assert(len(spanTokens) == 2)
+        spans.append((int(spanTokens[0]), int(spanTokens[1])))
+    return spans
+
+def spanToString(spans):
+    s = ""
+    first = True
+    for span in spans:
+        if not first:
+            s += ","
+        s += f"{span[0]}-{span[1]}"
+        first = False
+    return s
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index c808d5421..e15fe2a3f 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -20,6 +20,20 @@
 
 IS_DYNET_INITIALIZED = False
 
+TYPE_VITERBI = 1
+TYPE_GREEDY = 2
+
+NONLIN_NONE = 0
+NONLIN_RELU = 1
+NONLIN_TANH = 2
+
+nonlin_map = {"relu":NONLIN_RELU, "tanh":NONLIN_TANH, "":NONLIN_NONE}
+
+TYPE_GREEDY_STRING = "greedy"
+TYPE_VITERBI_STRING = "viterbi"
+
+DEFAULT_IS_DUAL = 0
+
 def save(file, values, comment):
     file.write("# " + comment + "\n")
     for key, value in values.items():
@@ -71,5 +85,12 @@ def transduce(embeddings, builder):
 
     return output, result
 
+def expressionDropout(expression, dropoutProb, doDropout):
+    if doDropout and dropoutProb > 0:
+        dropout = nn.Dropout(dropoutProb)
+        return dropout(expression)
+    else:
+        return expression
+
 
 
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
new file mode 100644
index 000000000..e69de29bb

From 5a6b1284aee4955fba582ee89afd2fbc131f4e2e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 29 Sep 2021 15:33:38 -0700
Subject: [PATCH 010/134] greedy forward layer

---
 main/src/main/python/pytorch/forwardLayer.py  | 41 ++++++--------
 .../main/python/pytorch/greedyForwardLayer.py | 53 +++++++++++++++++++
 main/src/main/python/pytorch/utils.py         | 16 ++++++
 3 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 4b3ea489f..2f43be5a7 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -1,5 +1,7 @@
 import torch
 import torch.nn
+from torch.autograd import Variable
+import torch.nn.functional as F
 
 from finalLayer import FinalLayer
 from greedyForwardLayer import GreedyForwardLayer
@@ -7,7 +9,7 @@
 
 from utils import *
 
-def ForwardLayer(FinalLayer):
+class ForwardLayer(FinalLayer):
     def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
         self.inputSize = inputSize
         self.isDual = isDual
@@ -17,7 +19,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
         self.nonlinearity = nonlinearity
 
         self.pH = nn.Linear(actualInputSize, len(t2i))
-        self.pRoot = torch.rand(inputSize) #TODO: Not sure about the shape here
+        self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here
         self.dropoutProb = dropoutProb
 
         self.inDim = spanLength(spans) if spans is not None else inputSize
@@ -43,9 +45,9 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None):
                 argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
                 l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
                 if nonlinearity == NONLIN_TANH:
-                    l1 = torch.tanh(l1)
+                    l1 = F.tanh(l1)
                 elif nonlinearity == NONLIN_RELU:
-                    l1 = torch.relu(l1)
+                    l1 = F.relu(l1)
                 emissionScores.append(l1)
         else:
             if headPositionsOpt is None:
@@ -63,9 +65,9 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None):
                 ss = torch.cat([argExp, predExp])
                 l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
                 if nonlinearity == NONLIN_TANH:
-                    l1 = torch.tanh(l1)
+                    l1 = F.tanh(l1)
                 elif nonlinearity == NONLIN_RELU:
-                    l1 = torch.relu(l1)
+                    l1 = F.relu(l1)
                 emissionScores.append(l1)
         return torch.stack(emissionScores)
 
@@ -111,40 +113,27 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
             actualInputSize = 2*inputSize if isDual else inputSize
 
         if inferenceType == TYPE_GREEDY_STRING:
-            return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb)
+            return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
         elif inferenceType == TYPE_VITERBI_STRING:
             pass
             # TODO
-            # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb)
+            # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
             # layer.initializeTransitions()
             # return layer
         else:
             raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!")
     
 def spanLength(spans):
-    s = 0
-    for x in spans:
-        s += x[1] - x[0]
-    return s
+    sum(end - start for start, end in spans)
 
 def parseSpan(spanParam, inputSize):
-    spans = list()
-    spanParamTokens = spanParam.split(",")
-    for spanParamToken in spanParamTokens:
-        spanTokens = spanParamToken.split('-')
-        assert(len(spanTokens) == 2)
-        spans.append((int(spanTokens[0]), int(spanTokens[1])))
+    # Zheng: Why do we need inputSize here?
+    token1, token2 = map(int, spanParamToken.split('-'))
+    spans.append((token1, token2))
     return spans
 
 def spanToString(spans):
-    s = ""
-    first = True
-    for span in spans:
-        if not first:
-            s += ","
-        s += f"{span[0]}-{span[1]}"
-        first = False
-    return s
+    return ','.join(f'{start}-{end}' for start, end in spans)
 
 
 
diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index e69de29bb..be776a6b5 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -0,0 +1,53 @@
+from forwardLayer import *
+from utils import *
+import numpy as np
+
+class GreedyForwardLayer(ForwardLayer):
+    def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
+        super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
+
+    def loss(self, finalStates, goldLabelStrings):
+        goldLabels = [self.t2i[gs] for gs in goldLabelStrings]
+        return sentenceLossGreedy(finalStates, goldLabels)
+
+    def saveX2i(self):
+        x2i = dict()
+        x2i["inferenceType"] = TYPE_GREEDY
+        x2i["inputSize"] = self.inputSize
+        x2i["isDual"] = 1 if self.isDual else 0
+        x2i["span"] = spanToString(span) if self.spans else ""
+        x2i["nonlinearity"] = self.nonlinearity
+        x2i["t2i"] = self.t2i
+        x2i["dropoutProb"] = self.dropoutProb
+
+        return x2i
+
+    def __str__(self):
+        return f"GreedyForwardLayer({inDim}, {outDim})"
+
+    def inference(self, emissionScores):
+        labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist()
+        return [self.i2t[i] for i in labelIds]
+
+    def inferenceWithScores(self, emissionScores):
+        return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores]
+
+    @classmethod
+    def load(cls, x2i):
+        inputSize = x2i["inputSize"]
+        isDual = x2i.get("isDual", DEFAULT_IS_DUAL) == 1
+        sapnValue = x2i.get("span", "")
+        spans = None if sapnValue == "" else parseSpan(sapnValue, inputSize)
+        nonlinearity = x2i.get("nonlinearity", NONLIN_NONE)
+        t2i = x2i["t2i"]
+        i2t = {i:t for t, i in t2i.items()}
+        dropoutProb = x2i.get("dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
+
+        if spans:
+            l = spanLength(spans)
+            actualInputSize = 2*l if isDual else l
+        else:
+            actualInputSize = 2*inputSize if isDual else inputSize
+
+        return cls(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
+    
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index e15fe2a3f..c1b43a6f1 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -1,5 +1,8 @@
 import torch.nn as nn
 import torch
+from torch.autograd import Variable
+
+import numpy as np
 
 concatenateCount = 0
 
@@ -92,5 +95,18 @@ def expressionDropout(expression, dropoutProb, doDropout):
     else:
         return expression
 
+def sentenceLossGreedy(emissionScoresForSeq, golds):
+    assert(emissionScoresForSeq.shape(0) == len(golds))
+    criterion = nn.CrossEntropyLoss()
+    golds = Variable(torch.LongTensor(golds))
+    return criterion(emissionScoresForSeq, golds)
+    
+
+
+
+
+
+
+
 
 

From 0aa3aaabb4b05952f43b19f8503fe769f5969b7e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 29 Sep 2021 17:35:08 -0700
Subject: [PATCH 011/134] add more functions to layers, init viterbi layer

---
 main/src/main/python/pytorch/layers.py        | 115 +++++++++++++++---
 main/src/main/python/pytorch/utils.py         |   7 ++
 .../python/pytorch/viterbiForwardLayer.py     |  32 +++++
 3 files changed, 139 insertions(+), 15 deletions(-)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index d5a3000f4..363ff29a9 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -1,10 +1,10 @@
 import torch.nn as nn
 from utils import *
 from embeddingLayer import EmbeddingLayer
+from constEmbeddingsGlove import ConstEmbeddingsGlove
 
-class Layers(nn.Module):
+class Layers(object):
     def __init__(self, initialLayer, intermediateLayers, finalLayer):
-        super().__init__()
         if finalLayer:
             self.outDim = finalLayer.outDim
         elif intermediateLayers:
@@ -37,10 +37,10 @@ def __str__(self):
           s += "final = " + finalLayer
         return s
 
-    def forward(self, sentence, constEmnbeddings, doDropout):
+    def forward(self, sentence, constEmbeddings, doDropout):
         if self.initialLayer.isEmpty:
             raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!")
-        states = self.initialLayer(sentence, constEmnbeddings, doDropout)
+        states = self.initialLayer(sentence, constEmbeddings, doDropout)
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer.nonEmpty:
@@ -133,18 +133,103 @@ def loadX2i(cls, x2i):
 
         return cls(initialLayer, intermediateLayers, finalLayer)
 
-    def predictJointly(layers, sentence, constEmnbeddings):
-        TODO
-    def forwardForTask(layers, taskId, sentence, constEmnbeddings, doDropout):
-        TODO
-    def predict(layers, taskId, sentence, constEmnbeddings):
-        TODO
-    def predictWithScores(layers, taskId, sentence, constEmnbeddings):
-        TODO
-    def parse(layers, sentence, constEmnbeddings):
-        TODO
+    @staticmethod
+    def predictJointly(layers, sentence, constEmbeddings):
+        labelsPerTask = list()
+        # layers(0) contains the shared layers
+        if layers[0]:
+            sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False)
+            for i in range(1, len(layers)):
+                states = layers[i].forwardFrom(sharedStates, sentence.headPositions, doDropout=False)
+                emissionScores = emissionScoresToArrays(states)
+                labels = layers[i].finalLayer.inference(emissionScores)
+                labelsPerTask += [labels]
+        # no shared layer
+        else:
+            for i in range(1, len(layers)):
+                states = layers[i].forward(sentence, sentence.headPositions, doDropout=False)
+                emissionScores = emissionScoresToArrays(states)
+                labels = layers[i].finalLayer.inference(emissionScores)
+                labelsPerTask += [labels]
+
+        return labelsPerTask
+
+    @staticmethod
+    def forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout):
+        if layers[0]:
+            sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout)
+            states = layers[taskId+1].forwardFrom(sharedStates, sentence.headPositions, doDropout)
+        else:
+            states = layers[taskId+1].forward(sentence, constEmbeddings, doDropout)
+        return states
+
+    @staticmethod
+    def predict(layers, taskId, sentence, constEmbeddings):
+        states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False)
+        emissionScores = emissionScoresToArrays(states)
+        return layers[taskId+1].finalLayer.inference(emissionScores)
+
+    @staticmethod
+    def predictWithScores(layers, taskId, sentence, constEmbeddings):
+        states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False)
+        emissionScores = emissionScoresToArrays(states)
+        return layers[taskId+1].finalLayer.inferenceWithScores(emissionScores)
+
+    @staticmethod
+    def parse(layers, sentence, constEmbeddings):
+        #
+        # first get the output of the layers that are shared between the two tasks
+        #
+        assert(layers[0].nonEmpty)
+        sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False)
+
+        #
+        # now predict the heads (first task)
+        #
+        headStates = layers[1].forwardFrom(sharedStates, None, doDropout=False)
+        headEmissionScores = emissionScoresToArrays(headStates)
+        headScores = layers[1].finalLayer.inference(headEmissionScores)
+
+        # store the head values here
+        heads = list()
+        for wi, predictionsForThisWord in enumerate(headScores):
+            # pick the prediction with the highest score, which is within the boundaries of the current sentence
+            done = False
+            for hi, relative in enumerate(predictionsForThisWord):
+                if done:
+                    break
+                try:
+                    relativeHead = int(relative[0])
+                    if relativeHead == 0:
+                        heads.append(1)
+                        done = True
+                    else:
+                        headPosition = wi + relativeHead
+                        heads.append(headPosition)
+                        done = True
+                except:
+                    raise RuntimeError('''some valid predictions may not be integers, e.g., "<STOP>" may be predicted by the sequence model''')
+            if not done:
+                # we should not be here, but let's be safe
+                # if nothing good was found, assume root
+                heads.append(1)
+        
+        #
+        # next, predict the labels using the predicted heads
+        #
+        labelStates = layers[2].forwardFrom(sharedStates, heads, doDropout=False)
+        emissionScores = emissionScoresToArrays(labelStates)
+        labels = layers[2].finalLayer.inference(emissionScores)
+        assert(len(labels)==len(heads))
+
+        return zip(heads, labels)
+
+    @staticmethod
     def loss(layers, taskId, sentence, goldLabels):
-        TODO
+        # Zheng: I am not sure this is the suitable way to load embeddings or not, need help...
+        constEmbeddings = ConstEmbeddingsGlove().mkConstLookupParams(sentence.words)
+        states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training!
+        return layers[taskId+1].finalLayer.loss(states, goldLabels)
 
 
 
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index c1b43a6f1..dd1709a8f 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -100,6 +100,13 @@ def sentenceLossGreedy(emissionScoresForSeq, golds):
     criterion = nn.CrossEntropyLoss()
     golds = Variable(torch.LongTensor(golds))
     return criterion(emissionScoresForSeq, golds)
+
+def emissionScoresToArrays(expressions):
+    lattice = list()
+    for expr in expressions:
+        probs = expr.data.tolist()
+        lattice += [probs]
+    return lattice
     
 
 
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index e69de29bb..636b130c4 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -0,0 +1,32 @@
+from forwardLayer import *
+from utils import *
+
+class GreedyForwardLayer(ForwardLayer):
+    def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
+        super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
+
+    def initializeTransitions(self):
+        #TODO
+        pass
+    def initTransitionsTo(self, dst, size, startTag, stopTag):
+        #TODO
+        pass
+    def loss(self, finalStates, goldLabelStrings):
+        #TODO
+        pass
+    def saveX2i(self):
+        #TODO
+        pass
+    def __str__(self):
+        #TODO
+        pass
+    def inference(emissionScores):
+        #TODO
+        pass
+    def inferenceWithScores(emissionScores):
+        #TODO
+        pass
+    @classmethod
+    def load(cls, x2i):
+        #TODO
+        pass
\ No newline at end of file

From e12161839b89bbdb26d8c1678b3c7bd523183be9 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 30 Sep 2021 02:18:03 -0700
Subject: [PATCH 012/134] traverse the code, fixed bugs

Now, the model initialization part is working
---
 .../python/embeddings/wordEmbeddingMap.py     | 46 ++++++-------
 .../python/pytorch/constEmbeddingsGlove.py    |  8 ++-
 .../src/main/python/pytorch/embeddingLayer.py | 25 ++++---
 main/src/main/python/pytorch/forwardLayer.py  | 13 ++--
 .../main/python/pytorch/greedyForwardLayer.py |  6 +-
 main/src/main/python/pytorch/layers.py        | 49 ++++++++-----
 main/src/main/python/pytorch/metal.py         | 69 ++++++++++++++++---
 main/src/main/python/pytorch/rnnLayer.py      | 12 ++--
 main/src/main/python/pytorch/taskManager.py   | 23 ++++++-
 main/src/main/python/pytorch/utils.py         | 10 +--
 .../python/pytorch/viterbiForwardLayer.py     |  6 +-
 main/src/main/python/run.py                   |  4 +-
 .../src/main/python/sequences/columnReader.py |  2 +-
 main/src/main/python/sequences/rowReaders.py  | 30 ++++----
 14 files changed, 197 insertions(+), 106 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index a82c2108a..327b4befc 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -1,29 +1,29 @@
 import numpy as np
+import math
 
 class WordEmbeddingMap:
     def __init__(self, config):
-        self.emb_dict = self.load(config)
-        self.dim = self.emb_dict.shape[-1]
-
-    def load(self):
-        emb_matrix = None
-        emb_dict = dict()
-        for line in open(config.get_string("glove.matrixResourceName")):
-            if not len(line.split()) == 2:
-                if "\t" in line:
-                    delimiter = "\t"
-                else:
-                    delimiter = " "
-                line_split = line.rstrip().split(delimiter)
-                # extract word and vector
-                word = line_split[0]
-                x = np.array([float(i) for i in line_split[1:]])
-                vector = (x /np.linalg.norm(x))
-                embedding_size = vector.shape[0]
-                emb_dict[word] = vector
-        base = math.sqrt(6/embedding_size)
-        emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
-        return emb_dict
+        self.emb_dict, self.dim = load(config)
 
     def isOutOfVocabulary(self, word):
-        return word not in self.emb_dict
\ No newline at end of file
+        return word not in self.emb_dict
+
+def load(config):
+    emb_matrix = None
+    emb_dict = dict()
+    for line in open(config.get_string("glove.matrixResourceName")):
+        if not len(line.split()) == 2:
+            if "\t" in line:
+                delimiter = "\t"
+            else:
+                delimiter = " "
+            line_split = line.rstrip().split(delimiter)
+            # extract word and vector
+            word = line_split[0]
+            x = np.array([float(i) for i in line_split[1:]])
+            vector = (x /np.linalg.norm(x))
+            embedding_size = vector.shape[0]
+            emb_dict[word] = vector
+    base = math.sqrt(6/embedding_size)
+    emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
+    return emb_dict, embedding_size
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
index be32c2f39..52547b6a8 100644
--- a/main/src/main/python/pytorch/constEmbeddingsGlove.py
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -1,16 +1,18 @@
 from dataclasses import dataclass
 import torch.nn as nn
 from embeddings.wordEmbeddingMap import *
+from pyhocon import ConfigFactory
 
 @dataclass
 class ConstEmbeddingParameters:
     emb: nn.Embedding
     w2i: dict
 
-def ConstEmbeddingsGlove:
+class _ConstEmbeddingsGlove:
     def __init__(self):
         self.SINGLETON_WORD_EMBEDDING_MAP = None
-        self.load('../resources/org/clulab/glove.conf')
+        config = ConfigFactory.parse_file('../resources/org/clulab/glove.conf')
+        self.load(config)
         self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim
 
     def load(self, config):
@@ -25,3 +27,5 @@ def mkConstLookupParams(self, words):
         emd = nn.Embedding.from_pretrained(weight)
         emd.weight.requires_grad=False
         return ConstEmbeddingParameters(emb ,w2i)
+
+ConstEmbeddingsGlove = _ConstEmbeddingsGlove()
diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index a506bfa02..2ebea6f7b 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -1,8 +1,9 @@
-from initialLayer import InitialLayer
+from pytorch.initialLayer import InitialLayer
 import random
-from utils import *
+from pytorch.utils import *
 import torch.nn as nn
 import torch
+from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 
 DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY
 DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128
@@ -16,7 +17,7 @@
 DEFAULT_USE_IS_PREDICATE: int = -1
 
 class EmbeddingLayer(InitialLayer):
-    def __init__(w2i, # word to index
+    def __init__(self, w2i, # word to index
                  w2f, # word to frequency
                  c2i, # character to index
                  tag2i, # POS tag to index
@@ -68,7 +69,7 @@ def __init__(w2i, # word to index
         positionDim = 1 if distanceLookupParameters and useIsPredicate else 0
         predicateDim = positionEmbeddingSize if positionLookupParameters else 0
 
-        self.outDim =    ConstEmbeddingsGlove().dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
+        self.outDim =    ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
         random.seed(RANDOM_SEED)
     
     def forward(self, sentence, constEmbeddings, doDropout):
@@ -81,9 +82,9 @@ def forward(self, sentence, constEmbeddings, doDropout):
         # const word embeddings such as GloVe
         constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings)
         assert(constEmbeddingsExpressions.size(0) == len(words))
-        if(tags) assert(len(tags) == len(words))
-        if(nes) assert(len(nes) == len(words))
-        if(headPositions) assert(len(headPositions) == len(words))
+        if(tags): assert(len(tags) == len(words))
+        if(nes): assert(len(nes) == len(words))
+        if(headPositions): assert(len(headPositions) == len(words))
 
         # build the word embeddings one by one
         embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions)
@@ -250,7 +251,7 @@ def load(cls, x2i):
     @classmethod
     def initialize(cls, config, paramPrefix, wordCounter):
 
-        if(not config.__contains__(paramPrefix)):
+        if(not config.contains(paramPrefix)):
             return None
 
         learnedWordEmbeddingSize = config.get_int(paramPrefix + ".learnedWordEmbeddingSize",DEFAULT_LEARNED_WORD_EMBEDDING_SIZE)
@@ -260,9 +261,9 @@ def initialize(cls, config, paramPrefix, wordCounter):
         neTagEmbeddingSize       = config.get_int(paramPrefix + ".neTagEmbeddingSize",DEFAULT_NE_TAG_EMBEDDING_SIZE)
         distanceEmbeddingSize    = config.get_int(paramPrefix + ".distanceEmbeddingSize",DEFAULT_DISTANCE_EMBEDDING_SIZE)
         distanceWindowSize       = config.get_int(paramPrefix + ".distanceWindowSize",DEFAULT_DISTANCE_WINDOW_SIZE)
-        useIsPredicate           = config.getArgBoolean(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1)
+        useIsPredicate           = config.get_bool(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1)
         positionEmbeddingSize    = config.get_int(paramPrefix + ".positionEmbeddingSize",DEFAULT_POSITION_EMBEDDING_SIZE)
-        dropoutProb              = config.get_float(paramPrefix + ".dropoutProb",EmbeddingLayer.DEFAULT_DROPOUT_PROB)
+        dropoutProb              = config.get_float(paramPrefix + ".dropoutProb",DEFAULT_DROPOUT_PROB)
 
         wordList = [UNK_WORD] + sorted(wordCounter.keys())
         w2i = {w:i for i, w in enumerate(wordList)}
@@ -293,7 +294,7 @@ def initialize(cls, config, paramPrefix, wordCounter):
         distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None
         positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None
 
-        return cls(w2i, w2f, c2i, tag2i, ne2i,
+        return cls(w2i, wordCounter, c2i, tag2i, ne2i,
                   learnedWordEmbeddingSize,
                   charEmbeddingSize,
                   charRnnStateSize,
@@ -331,6 +332,8 @@ def initialize(cls, config, paramPrefix, wordCounter):
 
 
 
+
+
 
 
 
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 2f43be5a7..92b7a133d 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -3,14 +3,13 @@
 from torch.autograd import Variable
 import torch.nn.functional as F
 
-from finalLayer import FinalLayer
-from greedyForwardLayer import GreedyForwardLayer
-from viterbiForwardLayer import ViterbiForwardLayer
+from pytorch.finalLayer import FinalLayer
 
-from utils import *
+from pytorch.utils import *
 
 class ForwardLayer(FinalLayer):
     def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
+        super().__init__()
         self.inputSize = inputSize
         self.isDual = isDual
         self.t2i = t2i
@@ -73,6 +72,8 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None):
 
     @staticmethod
     def load(x2i):
+        from pytorch.greedyForwardLayer import GreedyForwardLayer
+        from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
         if inferenceType == TYPE_VITERBI:
             pass
@@ -85,7 +86,9 @@ def load(x2i):
 
     @staticmethod
     def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
-        if(not config.__contains__(paramPrefix)):
+        from pytorch.greedyForwardLayer import GreedyForwardLayer
+        from pytorch.viterbiForwardLayer import ViterbiForwardLayer
+        if(not config.contains(paramPrefix)):
             return None
 
         inferenceType = config.get_string(paramPrefix + ".inference", "greedy")
diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index be776a6b5..2d9ddeeae 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -1,5 +1,5 @@
-from forwardLayer import *
-from utils import *
+from pytorch.forwardLayer import *
+from pytorch.utils import *
 import numpy as np
 
 class GreedyForwardLayer(ForwardLayer):
@@ -23,7 +23,7 @@ def saveX2i(self):
         return x2i
 
     def __str__(self):
-        return f"GreedyForwardLayer({inDim}, {outDim})"
+        return f"GreedyForwardLayer({self.inDim}, {self.outDim})"
 
     def inference(self, emissionScores):
         labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist()
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 363ff29a9..4c1a1889d 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -1,7 +1,9 @@
 import torch.nn as nn
-from utils import *
-from embeddingLayer import EmbeddingLayer
-from constEmbeddingsGlove import ConstEmbeddingsGlove
+from pytorch.utils import *
+from pytorch.embeddingLayer import EmbeddingLayer
+from pytorch.rnnLayer import RnnLayer
+from pytorch.forwardLayer import ForwardLayer
+from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 
 class Layers(object):
     def __init__(self, initialLayer, intermediateLayers, finalLayer):
@@ -14,8 +16,7 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer):
         else:
             self.outDim = None
 
-        if initialLayer and intermediateLayers and finalLayer:
-            self.nonEmpty = True
+        self.nonEmpty = initialLayer is not None and intermediateLayers is not None and finalLayer is not None
         self.isEmpty = not self.nonEmpty
 
         self.initialLayer = initialLayer
@@ -25,43 +26,53 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer):
     def __str__(self):
         s = ""
         started = False
-        if(initialLayer.nonEmpty):
-            s += "initial = " + initialLayer
+        if(self.initialLayer is not None):
+            s += "initial = " + str(self.initialLayer)
             started = True
-        for i in intermediateLayers.indices:
-            if(started) s += " "
-            s += s"intermediate ({i+1}) = " + intermediateLayers[i]
+        for i in range(len(self.intermediateLayers)):
+            if(started): s += " "
+            s += f"intermediate ({i+1}) = " + str(self.intermediateLayers[i])
             started = True
-        if(finalLayer.nonEmpty):
-          if(started) s += " "
-          s += "final = " + finalLayer
+        if(self.finalLayer is not None):
+          if(started): s += " "
+          s += "final = " + str(self.finalLayer)
         return s
 
+    def get_parameters(self):
+        parameters = list()
+        if self.initialLayer is not None:
+            parameters += [p for p in self.initialLayer.parameters() if p.requires_grad]
+        for il in self.intermediateLayers:
+            parameters += [p for p in il.parameters() if p.requires_grad]
+        if self.finalLayer is not None:
+            parameters += [p for p in self.finalLayer.parameters() if p.requires_grad]
+        return parameters
+
     def forward(self, sentence, constEmbeddings, doDropout):
         if self.initialLayer.isEmpty:
             raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!")
         states = self.initialLayer(sentence, constEmbeddings, doDropout)
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
-        if self.finalLayer.nonEmpty:
+        if self.finalLayer is not None:
             states = self.finalLayer(states, sentence.headPositions, doDropout)
 
         return states
 
     def forwardFrom(self, inStates, headPositions, doDropout):
-        if self.initialLayer.nonEmpty:
+        if self.initialLayer is not None:
             raise RuntimeError(f"ERROR: you can't call forwardFrom() on a Layers object that has an initial layer: {self}")
         states = inStates
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
-        if self.finalLayer.nonEmpty:
+        if self.finalLayer is not None:
             states = self.finalLayer(states, sentence.headPositions, doDropout)
 
         return states
 
     def saveX2i(self):
         x2i = dict()
-        if self.initialLayer.nonEmpty:
+        if self.initialLayer is not None:
             x2i['hasInitial'] = 1
             x2i['initialLayer'] = self.initialLayer.saveX2i()
         else:
@@ -70,7 +81,7 @@ def saveX2i(self):
         x2i['intermediateLayers'] = list()
         for il in self.intermediateLayers:
             x2i['intermediateLayers'].append(il.saveX2i())
-        if self.finalLayer.nonEmpty:
+        if self.finalLayer is not None:
             x2i['hasFinal'] = 1
             x2i['finalLayer'] = self.finalLayer.saveX2i()
         else:
@@ -227,7 +238,7 @@ def parse(layers, sentence, constEmbeddings):
     @staticmethod
     def loss(layers, taskId, sentence, goldLabels):
         # Zheng: I am not sure this is the suitable way to load embeddings or not, need help...
-        constEmbeddings = ConstEmbeddingsGlove().mkConstLookupParams(sentence.words)
+        constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words)
         states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training!
         return layers[taskId+1].finalLayer.loss(states, goldLabels)
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index ace00e73e..c95e7747f 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -1,29 +1,33 @@
 from pytorch.utils import *
 from collections import Counter
 from sequences.rowReaders import *
+from pytorch.layers import Layers
 
-class Metal():
+from torch.optim import SGD, Adam
+
+class Metal(object):
     """docstring for Metal"""
     def __init__(self, taskManager, modelOpt):
+        self.taskManager = taskManager
+
         # One Layers object per task; model(0) contains the Layers shared between all tasks (if any)
         if modelOpt:
             self.model = modelOpt
         else:
             self.model = self.initialize()
-        self.taskManager = taskManager
 
     def initialize(self):
 
-        taskWords, taskLabels = mkVocabularies()
+        taskWords, taskLabels = self.mkVocabularies()
 
-        layersPerTask = [None for _ in range(taskManager.taskCount + 1)]
+        layersPerTask = [None for _ in range(self.taskManager.taskCount + 1)]
 
-        layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", taskWords[0], None, False, None)
+        layersPerTask[0] = Layers.apply(self.taskManager, "mtl.layers", taskWords[0], None, False, None)
 
         inputSize = layersPerTask[0].outDim
 
-        for i in taskManager.indices:
-            layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], taskManager.tasks[i].isDual, inputSize)
+        for i in self.taskManager.indices:
+            layersPerTask[i+1] = Layers.apply(self.taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], self.taskManager.tasks[i].isDual, inputSize)
 
         for i in range(len(layersPerTask)):
             print (f"Summary of layersPerTask({i}):")
@@ -33,17 +37,17 @@ def initialize(self):
     
     def mkVocabularies(self):
         # index 0 reserved for the shared Layers; tid + 1 corresponds to each task
-        labels = [Counter() for _ in range(taskManager.taskCount + 1)]
+        labels = [Counter() for _ in range(self.taskManager.taskCount + 1)]
         for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer
           labels[i][START_TAG] += 1
           labels[i][STOP_TAG] += 1
 
-        words = [Counter() for _ in range(taskManager.taskCount + 1)]
+        words = [Counter() for _ in range(self.taskManager.taskCount + 1)]
 
         reader = MetalRowReader()
 
-        for tid in taskManager.indices:
-          for sentence in taskManager.tasks[tid].trainSentences:
+        for tid in self.taskManager.indices:
+          for sentence in self.taskManager.tasks[tid].trainSentences:
             annotatedSentences = reader.toAnnotatedSentences(sentence)
 
             for asent in annotatedSentences:
@@ -56,3 +60,46 @@ def mkVocabularies(self):
 
         return words, labels
 
+    def train(self, modelNamePrefix):
+        learningRate = self.taskManager.get_float("mtl.learningRate", 0.001)
+        trainerType = self.taskManager.get_string("mtl.trainer", "adam")
+        batchSize = self.taskManager.get_int("mtl.batchSize", 1)
+        assert(batchSize>0)
+
+        parameters = list()
+        for layers in self.model:
+            parameters += layers.get_parameters()
+
+        if trainerType == "adam":
+            trainer = Adam(parameters, lr=learningRate)
+        elif trainerType == "rmsprop":
+            trainer = RMSprop(parameters, lr=learningRate)
+        elif trainerType == "sgd":
+            trainer = SDG(parameters, lr=learningRate)
+        else:
+            raise RuntimeError(f"ERROR: unknown trainer {trainerType}!")
+
+        reader = MetalRowReader()
+
+        cummulativeLoss = 0.0
+        numTagged = 0
+        
+        maxAvgAcc = 0.0
+        maxAvgF1 = 0.0
+        bestEpoch = 0
+
+        allEpochScores = list()
+        epochPatience = self.taskManager.epochPatience
+
+        for epoch in range(0, self.taskManager.maxEpochs):
+            if epochPatience <= 0:
+                break
+            
+
+
+
+
+
+
+
+
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index c5aef820a..ee1896f8e 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -1,5 +1,5 @@
-from intermediateLayer import IntermediateLayer
-from utils import *
+from pytorch.intermediateLayer import IntermediateLayer
+from pytorch.utils import *
 import torch
 import torch.nn as nn
 
@@ -12,7 +12,7 @@ def __init__(self,
         rnnType, 
         wordRnnBuilder, 
         dropoutProb):
-
+        super().__init__()
         self.inDim = self.inputSize = inputSize
         self.numLayers = numLayers
         self.rnnStateSize = rnnStateSize 
@@ -64,7 +64,7 @@ def load(cls, x2i):
     @classmethod
     def initialize(cls, config, paramPrefix, inputSize):
 
-        if(not config.__contains__(paramPrefix)):
+        if(not config.contains(paramPrefix)):
             return None
 
         numLayers = config.get_int(paramPrefix + ".numLayers", 1)
@@ -73,9 +73,9 @@ def initialize(cls, config, paramPrefix, inputSize):
         rnnType = config.get_string(paramPrefix + ".type", "lstm")
         dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
 
-        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize)
+        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
 
-        return (inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
+        return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
 
 def mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb):
     if rnnType == 'gru':
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index f5d1ae868..25e669eb0 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -6,7 +6,7 @@
 TYPE_BASIC = 0
 TYPE_DUAL = 1
 
-class TaskManager:
+class TaskManager():
 
   def __init__(self, config, seed):
 
@@ -31,6 +31,27 @@ def __init__(self, config, seed):
     # Training shards from all tasks 
     self.shards = self.mkShards()
 
+  def contains(self, paramPrefix):
+    return self.config.__contains__(paramPrefix)
+
+  def get_int(self, x, defualt=None):
+    return self.config.get_int(x, defualt)
+
+  def get_string(self, x, defualt=None):
+    return self.config.get_string(x, defualt)
+
+  def get_float(self, x, defualt=None):
+    return self.config.get_float(x, defualt)
+
+  def get_bool(self, x, defualt=None):
+    return self.config.get_bool(x, defualt)
+
+  def get_list(self, x, defualt=None):
+    return self.config.get_list(x, defualt)
+
+  def get_config(self, x, defualt=None):
+    return self.config.get_config(x, defualt)
+
   # Construct training shards by interleaving shards from all tasks 
   def mkShards(self):
     shardsByTasks = list()
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index dd1709a8f..049ca8845 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -56,14 +56,16 @@ def readString2Ids(s2iFilename):
             if not line.startswith("#"):
                 k, v = line.strip().split('\t')
                 s2i[k] = int(v)
+    return s2i
 
 def readChar2Ids(s2iFilename):
     s2i = dict()
     with open(s2iFilename) as f:
         for line in f:
-            if not line.startswith("#"):
+            if not line.startswith("#") and line.rstrip():
                 k, v = line.strip().split('\t')
-                s2i[char(int(k))] = int(v)
+                s2i[chr(int(k))] = int(v)
+    return s2i
 
 def transduce(embeddings, builder):
 
@@ -75,14 +77,14 @@ def transduce(embeddings, builder):
         if bi_direct:
             (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
             output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
-        else;
+        else:
             (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
             output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
             h =  torch.zeros(2, 1, hidden_dim) 
             output, result = builder(embeddings.view(len(word), 1, -1), h)
-        else;
+        else:
             h =  torch.zeros(1, 1, hidden_dim)
             output, result = builder(embeddings.view(len(word), 1, -1), h)
 
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 636b130c4..4b025293b 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -1,7 +1,7 @@
-from forwardLayer import *
-from utils import *
+from pytorch.forwardLayer import *
+from pytorch.utils import *
 
-class GreedyForwardLayer(ForwardLayer):
+class ViterbiForwardLayer(ForwardLayer):
     def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
         super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
 
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index c75532f0e..fc4e1385a 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -20,8 +20,8 @@
         modelName = args.model_file
         print (taskManager.debugTraversal())
 
-        mtl = Metal(taskManager, None, None)
-        # mtl.train(modelName)
+        mtl = Metal(taskManager, None)
+        mtl.train(modelName)
     elif args.test:
         pass
     elif args.shell:
diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py
index 0f8c04610..e162316f7 100644
--- a/main/src/main/python/sequences/columnReader.py
+++ b/main/src/main/python/sequences/columnReader.py
@@ -44,4 +44,4 @@ def __init__(self, tokens):
   def get(self, idx):
     if(idx >= self.length):
       raise RuntimeError(f"ERROR: trying to read field #{idx}, which does not exist in this row: {tokens}!")
-    return tokens[idx]
+    return self.tokens[idx]
diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py
index 0aa409756..58a15cb71 100644
--- a/main/src/main/python/sequences/rowReaders.py
+++ b/main/src/main/python/sequences/rowReaders.py
@@ -26,18 +26,18 @@ def __init__(self):
         self.LABEL_START_OFFSET = 3
 
     def toAnnotatedSentences(self, rows):
-        if (len(rows.head) == 2):
-            self.parseSimple(rows)
-        elif (len(rows.head) == 4):
-            self.parseSimpleExtended(rows)
-        elif (len(rows.head) >= 5):
-            self.parseFull(rows)
+        if (rows[0].length == 2):
+            return self.parseSimple(rows)
+        elif (rows[0].length == 4):
+            return self.parseSimpleExtended(rows)
+        elif (rows[0].length >= 5):
+            return self.parseFull(rows)
         else:
             raise RuntimeError("ERROR: the Metal format expects 2, 4, or 5+ columns!")
 
     # Parser for the simple format: word, label 
-    def parseSimple(rows):
-        assert(len(rows.head) == 2)
+    def parseSimple(self, rows):
+        assert(rows[0].length == 2)
         words = list()
         labels = list()
 
@@ -45,11 +45,11 @@ def parseSimple(rows):
             words += [row.get(self.WORD_POSITION)]
             labels += [row.get(self.WORD_POSITION + 1)]
 
-        return AnnotatedSentence(words), labels
+        return [(AnnotatedSentence(words), labels)]
 
     # Parser for the simple extended format: word, POS tag, NE label, label
-    def parseSimpleExtended(rows):
-        assert(len(rows.head) == 4)
+    def parseSimpleExtended(self, rows):
+        assert(rows[0].length == 4)
         words = list()
         posTags = list()
         neLabels = list()
@@ -61,12 +61,12 @@ def parseSimpleExtended(rows):
             neLabels += [row.get(self.NE_LABEL_POSITION)]
             labels += [row.get(self.LABEL_START_OFFSET)]
 
-        return AnnotatedSentence(words), posTags, neLabels, labels
+        return [(AnnotatedSentence(words), posTags, neLabels, labels)]
 
     # Parser for the full format: word, POS tag, NE label, (label head)+ 
-    def parseFull(rows):
-        assert(len(rows.head) >= 5)
-        numSent = (len(rows.head) - 3) / 2
+    def parseFull(self, rows):
+        assert(rows[0].length >= 5)
+        numSent = (rows[0].length - 3) / 2
         assert(numSent >= 1)
 
         words = list()

From 4cbeb68cd8789d334c19d7c2174c1a8de78cab5d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 30 Sep 2021 04:56:39 -0700
Subject: [PATCH 013/134] finished the whole model except the viterbi part

---
 .../python/embeddings/wordEmbeddingMap.py     |   1 -
 .../python/pytorch/constEmbeddingsGlove.py    |   8 +-
 .../src/main/python/pytorch/embeddingLayer.py |  12 +-
 main/src/main/python/pytorch/layers.py        |  39 +++-
 main/src/main/python/pytorch/metal.py         | 210 +++++++++++++++++-
 main/src/main/python/pytorch/seqScorer.py     |  96 ++++++++
 main/src/main/python/pytorch/utils.py         |  23 +-
 7 files changed, 370 insertions(+), 19 deletions(-)
 create mode 100644 main/src/main/python/pytorch/seqScorer.py

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index 327b4befc..49ab541e9 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -9,7 +9,6 @@ def isOutOfVocabulary(self, word):
         return word not in self.emb_dict
 
 def load(config):
-    emb_matrix = None
     emb_dict = dict()
     for line in open(config.get_string("glove.matrixResourceName")):
         if not len(line.split()) == 2:
diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
index 52547b6a8..d2589f9d3 100644
--- a/main/src/main/python/pytorch/constEmbeddingsGlove.py
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -2,6 +2,8 @@
 import torch.nn as nn
 from embeddings.wordEmbeddingMap import *
 from pyhocon import ConfigFactory
+import numpy as np
+import torch
 
 @dataclass
 class ConstEmbeddingParameters:
@@ -21,11 +23,11 @@ def load(self, config):
 
     def mkConstLookupParams(self, words):
         w2i = dict()
+        weights = np.zeros((len(words), self.dim))
         for i,w  in enumerate(words):
-            weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict[0])
+            weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict["<UNK>"])
             w2i[w] = i
-        emd = nn.Embedding.from_pretrained(weight)
-        emd.weight.requires_grad=False
+        emb = nn.Embedding.from_pretrained(torch.tensor(weights), freeze=True)
         return ConstEmbeddingParameters(emb ,w2i)
 
 ConstEmbeddingsGlove = _ConstEmbeddingsGlove()
diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 2ebea6f7b..15242ed7a 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -87,16 +87,16 @@ def forward(self, sentence, constEmbeddings, doDropout):
         if(headPositions): assert(len(headPositions) == len(words))
 
         # build the word embeddings one by one
-        embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions)
+        embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, doDropout, tags, nes, headPositions)
 
         return embeddings
 
     def mkConstEmbeddings(self, words, constEmbeddings):
-        idxs = [constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]
-        embeddings = self.constEmbeddings.emb(idxs)
+        idxs = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+        embeddings = constEmbeddings.emb(idxs)
         return embeddings
 
-    def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPositions=None):
+    def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, headPositions=None):
         #
         # Learned word embeddings
         # These are initialized randomly, and updated during backprop
@@ -107,14 +107,14 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition
             wordPositions.append(i)
             id = self.w2i.get(word, 0) # 0 reserved for UNK in the vocab
             # sample uniformly with prob 0.5 from singletons; move all other singletons to UNK
-            if(self.doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0
+            if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0
             ids.append(id) 
         learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids))
 
         #
         # biLSTM over character embeddings
         #
-        charEmbedding = torch.stack([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
+        charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
 
         #
         # POS tag embedding
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 4c1a1889d..7e01c2594 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -48,8 +48,45 @@ def get_parameters(self):
             parameters += [p for p in self.finalLayer.parameters() if p.requires_grad]
         return parameters
 
+    def start_train(self):
+        if self.initialLayer is not None:
+            self.initialLayer.train()
+        for il in self.intermediateLayers:
+            il.train()
+        if self.finalLayer is not None:
+            self.finalLayer.train()
+    
+    def start_eval(self):
+        if self.initialLayer is not None:
+            self.initialLayer.eval()
+        for il in self.intermediateLayers:
+            il.eval()
+        if self.finalLayer is not None:
+            self.finalLayer.eval()
+
+    def get_state_dict(self):
+        params = dict()
+        if self.initialLayer is not None:
+            params['initialLayer'] = self.initialLayer.state_dict()
+        if self.intermediateLayers:
+            params['intermediateLayers'] = list()
+        for il in self.intermediateLayers:
+            params['intermediateLayers'].append(il.state_dict())
+        if self.finalLayer is not None:
+            params['finalLayer'] = self.finalLayer.state_dict()
+        return params
+
+    def load_state_dict(self, params):
+        if self.initialLayer is not None:
+            self.initialLayer.load_state_dict(params['initialLayer'])
+        for i, il in enumerate(self.intermediateLayers):
+            il.load_state_dict(params['intermediateLayers'][i])
+        if self.finalLayer is not None:
+            self.finalLayer.load_state_dict(params['finalLayer'])
+
+
     def forward(self, sentence, constEmbeddings, doDropout):
-        if self.initialLayer.isEmpty:
+        if self.initialLayer is None:
             raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!")
         states = self.initialLayer(sentence, constEmbeddings, doDropout)
         for intermediateLayer in self.intermediateLayers:
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index c95e7747f..a2136529b 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -2,9 +2,12 @@
 from collections import Counter
 from sequences.rowReaders import *
 from pytorch.layers import Layers
+from pytorch.seqScorer import *
 
 from torch.optim import SGD, Adam
 
+import json
+
 class Metal(object):
     """docstring for Metal"""
     def __init__(self, taskManager, modelOpt):
@@ -94,7 +97,212 @@ def train(self, modelNamePrefix):
         for epoch in range(0, self.taskManager.maxEpochs):
             if epochPatience <= 0:
                 break
-            
+            # this fetches randomized training sentences from all tasks
+            sentenceIterator = self.taskManager.getSentences()
+            sentCount = 0
+
+            for layers in self.model:
+                layers.start_train()
+            trainer.zero_grad()
+
+            batchLosses = list()
+
+            # traverse all training sentences
+            for metaSentence in sentenceIterator:
+                taskId = metaSentence[0]
+                sentence = metaSentence[1]
+
+                sentCount += 1
+
+                annotatedSentences = reader.toAnnotatedSentences(sentence)
+                assert(annotatedSentences is not None)
+
+                unweightedLoss = sum([Layers.loss(self.model, taskId, a_sent[0], a_sent[1]) for a_sent in annotatedSentences])
+
+                loss = unweightedLoss * self.taskManager.tasks[taskId].taskWeight # Zheng: I don't think this is necessary: if self.taskManager.tasks[taskId].taskWeight!=1.0 else unweightedLoss
+
+                batchLosses.append(loss)
+
+                if len(batchLosses) >= batchSize:
+                    batchLoss = sum(batchLosses)
+                    cummulativeLoss = batchLoss.item()
+                    batchLoss.backward()
+                    trainer.step()
+                    batchLosses = list()
+
+                numTagged += len(sentence)
+
+                if(sentCount % 1000 == 0):
+                    print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} {sentences})")
+                    cummulativeLoss = 0.0
+                    numTagged = 0
+            # we may have an incomplete batch here
+            if batchLosses:
+                batchLoss = sum(batchLosses)
+                cummulativeLoss = batchLoss.item()
+                batchLoss.backward()
+                trainer.step()
+                batchLosses = list()
+
+            # check dev performance in this epoch, for all tasks
+            totalAcc = 0.0
+            totalPrec = 0.0
+            totalRec = 0.0
+            totalF1 = 0.0
+            for taskId in range(0, self.taskManager.taskCount):
+                taskName = self.taskManager.tasks[taskId].taskName
+                devSentences = self.taskManager.tasks[taskId].devSentences
+
+                if devSentences:
+                    acc, prec, rec, f1 = self.evaluate(taskId, taskName, devSentences, "development", epoch)
+                    totalAcc += acc
+                    totalPrec += prec
+                    totalRec += rec
+                    totalF1 += f1
+
+            avgAcc = totalAcc / taskManager.taskCount
+            avgPrec = totalPrec / taskManager.taskCount
+            avgRec = totalRec / taskManager.taskCount
+            avgF1 = totalF1 / taskManager.taskCount
+
+            print (f"Average accuracy across {taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}")
+            print (f"Average P/R/F1 across {taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}")
+
+            allEpochScores.append((epoch, avgF1))
+
+            if avgF1 > maxAvgF1:
+                maxAvgF1 = avgF1
+                maxAvgAcc = avgAcc
+                bestEpoch = epoch
+                epochPatience = self.taskManager.epochPatience
+            else:
+                epochPatience -= 1
+
+            self.save(f"{modelNamePrefix}-epoch{epoch}")
+
+        allEpochScores.sort(key=lambda x: x[1])
+        print ("Epochs in descending order of scores:")
+        for t in allEpochScores:
+            print (f"Epoch #{t[0]}: {t[1]}")
+
+    def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
+        scoreCountsByLabel = ScoreCountsByLabel()
+        taskNumber = taskId + 1
+        sentCount = 0
+
+        print (f"Started evaluation on the {name} dataset for task {taskNumber} ({taskName})...")
+
+        if epoch >= 0:
+            pw = open(f"task{taskNumber}.dev.output.{epoch}", "w")
+        else:
+            pw = open(f"task{taskNumber}.test.output", "w")
+
+        reader = MetalRowReader()
+
+        for sent in sentences:
+            sentCount += 1
+
+            annotatedSentences = reader.toAnnotatedSentences(sent)
+
+            for asent in annotatedSentences:
+                sentence = asent[0]
+                goldLabels = asent[1]
+
+                constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words)
+                preds = Layers.predict(self.model, taskId, sentence, constEmbeddings)
+
+                sc = SeqScorer.f1(goldLabels, preds)
+                scoreCountsByLabel.incAll(sc)
+
+                printCoNLLOutput(pw, sentence.words, goldLabels, preds)
+        
+        pw.close()
+
+        print (f"Accuracy on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}")
+        print (f"Precision on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}")
+        print (f"Recall on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}")
+        print (f"Micro F1 on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}")
+        for label in scoreCountsByLabel.labels:
+            print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map(label).gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+
+        return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() )
+
+    def predictJointly(self, sentence, constEmbeddings):
+        return Layers.predictJointly(self.model, sentence, constEmbeddings)
+
+    def predict(self, taskId, sentence, constEmbeddings):
+        return Layers.predict(self.model, taskId, sentence, constEmbeddings)
+
+    def predictWithScores(self, taskId, sentence, constEmbeddings):
+        return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings)
+
+    # Custom method for the parsing algorithm
+    # @param sentence Input sentence
+    # @param constEmbeddings Constant embeddings for this sentence
+    # @return Tuple of (head, label) for each word in the sentence
+    def parse(self, sentence, constEmbeddings):
+        Layers.parse(self.model, sentence, constEmbeddings)
+
+    def test(self):
+        taskName = taskManager.tasks[taskId].taskName
+        testSentences = taskManager.tasks[taskId].testSentences
+        if testSentences:
+            self.evaluate(taskId, taskName, devSentences, "testing")
+
+    def save(self, baseFilename):
+
+        params = list()
+        for layers in self.model:
+            sd = layers.get_state_dict()
+            x2i = layers.saveX2i()
+            params.append({"model": sd, "x2i": x2i})
+
+        # torch pickle save
+        try:
+            torch.save(params, baseFilename)
+            print("model saved to {}".format(baseFilename+".torch"))
+        except BaseException:
+            print("[Warning: Saving failed... continuing anyway.]")
+
+        # We can also save as text json file:
+        with open(baseFilename+".json") as f:
+            f.write(json.dumps(params))
+
+
+    @classmethod
+    def load(cls, modelFilenamePrefix):
+        print (f"Loading MTL model from {modelFilenamePrefix}...")
+        layersSeq = list()
+        checkpoint = torch.load(modelFilenamePrefix+".torch")
+        for param in checkpoint:
+            layers = loadX2i(param['x2i'])
+            layers.load_state_dict(param['model'])
+            layersSeq.append(layers)
+
+        print (f"Loading MTL model from {modelFilenamePrefix} complete.")
+
+        return layersSeq
+
+    @classmethod
+    def apply(cls, modelFilenamePrefix, taskManager=None):
+        model = Metal.load(modelFilenamePrefix)
+        return cls(taskManager, model)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 
diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py
new file mode 100644
index 000000000..144e5bfe9
--- /dev/null
+++ b/main/src/main/python/pytorch/seqScorer.py
@@ -0,0 +1,96 @@
+from dataclasses import dataclass
+from collections import defaultdict
+
+OUTSIDE_LABEL = "O"
+
+@dataclass
+class ScoreCounts:
+    correct: int
+    gold: int
+    predicted: int
+
+class SeqScorer:
+
+    @staticmethod
+    def f1(golds, preds):
+        scoreCountsByLabel = scoreCountsByLabel()
+
+        for e1, e2 in zip(preds, golds):
+            scoreCountsByLabel.total += 1
+            if e1 == e2:
+                scoreCountsByLabel.correct += 1
+            if e2 != OUTSIDE_LABEL:
+                scoreCountsByLabel.incGold()
+                scoreCountsByLabel.incGold(e2)
+            if e1 != OUTSIDE_LABEL:
+                scoreCountsByLabel.incPredicted()
+                scoreCountsByLabel.incPredicted(e1)
+                if e1 == e2:
+                    scoreCountsByLabel.incCorrect()
+                    scoreCountsByLabel.incCorrect(e1)
+        return scoreCountsByLabel
+
+class ScoreCountsByLabel:
+
+    def __init__(self):
+        self.map = defaultdict(ScoreCounts)
+        self.total = 0
+        self.correct = 0
+
+    def labels(self):
+        return self.map.keys()
+
+    def incGold(self, label="*", value=1):
+        counts = self.map[label]
+        counts.gold += value
+
+    def incPredicted(self, label="*", value=1):
+        counts = self.map[label]
+        counts.predicted += value
+
+    def incCorrect(self, label="*", value=1):
+        counts = self.map[label]
+        counts.correct += value
+
+    def incAll(self, counts):
+        correct += counts.correct
+        total += counts.total
+
+        for label in counts.labels():
+            c = counts.map[label]
+            incGold(label, c.gold)
+            incPredicted(label, c.predicted)
+            incCorrect(label, c.correct)
+
+    def precision(self, label="*", decimals=2):
+        c = self.map[label].correct
+        p = self.map[label].predicted
+
+        prec = c/p if p!=0 else 0
+
+        return round(prec, decimals) if decimals>0 else prec
+
+    def recall(self, label="*", decimals=2):
+        c = self.map[label].correct
+        g = self.map[label].gold
+
+        reca = c/g if p!=0 else 0
+
+        return round(reca, decimals) if decimals>0 else reca
+
+    def f1(self, label="*", decimals=2):
+        p = self.precision(label, decimals=-1)
+        r = self.recall(label, decimals=-1)
+
+        f1 = 2.0 * p * r / (p + r), decimals if (p!=0 and r!=0) else 0
+
+        return round(f1, decimals) if decimals>0 else f1
+
+    def accuracy(self, decimals=2):
+        a = self.correct / self.total
+
+        return round(a, decimals) if decimals>0 else a
+
+
+
+
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 049ca8845..6d80e4169 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -46,7 +46,7 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word]))
-    _, result = transduce(charEmbeddings, charRnnBuilder, True)
+    _, result = transduce(charEmbeddings, charRnnBuilder, len(word))
     return result.view(1, hidden_dim*2)
 
 def readString2Ids(s2iFilename):
@@ -67,26 +67,26 @@ def readChar2Ids(s2iFilename):
                 s2i[chr(int(k))] = int(v)
     return s2i
 
-def transduce(embeddings, builder):
+def transduce(embeddings, builder, l):
 
     hidden_dim = builder.hidden_size
     bi_direct = builder.bidirectional
-    mode = build.mode
+    mode = builder.mode
     
     if mode == 'LSTM':
         if bi_direct:
             (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
+            output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c))
         else:
             (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c))
+            output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
             h =  torch.zeros(2, 1, hidden_dim) 
-            output, result = builder(embeddings.view(len(word), 1, -1), h)
+            output, result = builder(embeddings.view(l, 1, -1), h)
         else:
             h =  torch.zeros(1, 1, hidden_dim)
-            output, result = builder(embeddings.view(len(word), 1, -1), h)
+            output, result = builder(embeddings.view(l, 1, -1), h)
 
     return output, result
 
@@ -109,6 +109,15 @@ def emissionScoresToArrays(expressions):
         probs = expr.data.tolist()
         lattice += [probs]
     return lattice
+
+def printCoNLLOutput(pw, words, golds, preds):
+
+    assert(len(words) == len(golds))
+    assert(len(words) == len(preds))
+
+    for i in range(len(words)):
+      pw.write(f"{words[i]} {golds[i]} {preds[i]}\n")
+    pw.write("\n")
     
 
 

From 470eca9594a3be5f50134207a19cea6767aa57ff Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 30 Sep 2021 12:26:21 -0700
Subject: [PATCH 014/134] finally training...

---
 .../python/pytorch/constEmbeddingsGlove.py    |  2 +-
 main/src/main/python/pytorch/forwardLayer.py  |  6 +++---
 main/src/main/python/pytorch/layers.py        |  4 ++--
 main/src/main/python/pytorch/metal.py         |  2 +-
 main/src/main/python/pytorch/rnnLayer.py      |  3 +--
 main/src/main/python/pytorch/taskManager.py   |  2 ++
 main/src/main/python/pytorch/utils.py         | 21 +++++++++++--------
 7 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
index d2589f9d3..cf887909d 100644
--- a/main/src/main/python/pytorch/constEmbeddingsGlove.py
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -27,7 +27,7 @@ def mkConstLookupParams(self, words):
         for i,w  in enumerate(words):
             weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict["<UNK>"])
             w2i[w] = i
-        emb = nn.Embedding.from_pretrained(torch.tensor(weights), freeze=True)
+        emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True)
         return ConstEmbeddingParameters(emb ,w2i)
 
 ConstEmbeddingsGlove = _ConstEmbeddingsGlove()
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 92b7a133d..492cb3cca 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -36,16 +36,16 @@ def pickSpan(self, v):
                 vs.append(e)
             return torch.cat(vs)
 
-    def forward(inputExpressions, doDropout, headPositionsOpt = None):
+    def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
         emissionScores = list()
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
             for i, e in enumerate(inputExpressions):
                 argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
                 l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
-                if nonlinearity == NONLIN_TANH:
+                if self.nonlinearity == NONLIN_TANH:
                     l1 = F.tanh(l1)
-                elif nonlinearity == NONLIN_RELU:
+                elif self.nonlinearity == NONLIN_RELU:
                     l1 = F.relu(l1)
                 emissionScores.append(l1)
         else:
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 7e01c2594..569addce3 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -92,7 +92,7 @@ def forward(self, sentence, constEmbeddings, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, sentence.headPositions, doDropout)
+            states = self.finalLayer(states, doDropout, sentence.headPositions)
 
         return states
 
@@ -103,7 +103,7 @@ def forwardFrom(self, inStates, headPositions, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, sentence.headPositions, doDropout)
+            states = self.finalLayer(states, doDropout, headPositions)
 
         return states
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index a2136529b..2ee0ac3e9 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -133,7 +133,7 @@ def train(self, modelNamePrefix):
                 numTagged += len(sentence)
 
                 if(sentCount % 1000 == 0):
-                    print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} {sentences})")
+                    print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} sentences)")
                     cummulativeLoss = 0.0
                     numTagged = 0
             # we may have an incomplete batch here
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index ee1896f8e..633d6b65b 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -29,9 +29,8 @@ def forward(self, inputExpressions, dropout):
         assert(inputExpressions is not None)
 
         States, _ = transduce(inputExpressions, self.wordRnnBuilder)
-
         if self.useHighwayConnections:
-            States = torch.cat([States, inputExpressions], dim=1)
+            States = torch.cat([States.squeeze(1), inputExpressions], dim=1)
 
         return States
 
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index 25e669eb0..5f6ecd76c 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -163,6 +163,8 @@ def __init__(self,
     # Current position in the training sentences when we iterate during training
     currentTrainingSentencePosition = 0
 
+    self.taskWeight = taskWeight
+
     print (f"============ starting task {taskNumber} ============")
     print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.")
     if(self.devSentences is not None):
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 6d80e4169..38a9b6186 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -46,8 +46,9 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word]))
-    _, result = transduce(charEmbeddings, charRnnBuilder, len(word))
-    return result.view(1, hidden_dim*2)
+    _, result = transduce(charEmbeddings, charRnnBuilder)
+    # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
+    return result.view(hidden_dim*2)
 
 def readString2Ids(s2iFilename):
     s2i = dict()
@@ -67,26 +68,28 @@ def readChar2Ids(s2iFilename):
                 s2i[chr(int(k))] = int(v)
     return s2i
 
-def transduce(embeddings, builder, l):
+def transduce(embeddings, builder):
+
+    builder = builder.float()
 
     hidden_dim = builder.hidden_size
     bi_direct = builder.bidirectional
     mode = builder.mode
-    
+
     if mode == 'LSTM':
         if bi_direct:
             (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c))
+            output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
         else:
             (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c))
+            output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
             h =  torch.zeros(2, 1, hidden_dim) 
-            output, result = builder(embeddings.view(l, 1, -1), h)
+            output, result = builder(embeddings.unsqueeze(1), h)
         else:
             h =  torch.zeros(1, 1, hidden_dim)
-            output, result = builder(embeddings.view(l, 1, -1), h)
+            output, result = builder(embeddings.unsqueeze(1), h)
 
     return output, result
 
@@ -98,7 +101,7 @@ def expressionDropout(expression, dropoutProb, doDropout):
         return expression
 
 def sentenceLossGreedy(emissionScoresForSeq, golds):
-    assert(emissionScoresForSeq.shape(0) == len(golds))
+    assert(emissionScoresForSeq.size(0) == len(golds))
     criterion = nn.CrossEntropyLoss()
     golds = Variable(torch.LongTensor(golds))
     return criterion(emissionScoresForSeq, golds)

From 2b91e036f47bedcb80796c4817931b170cffd589 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 30 Sep 2021 23:03:55 -0700
Subject: [PATCH 015/134] the training pipeline is working now

---
 .../main/python/pytorch/greedyForwardLayer.py |  3 +-
 main/src/main/python/pytorch/layers.py        |  9 +++--
 main/src/main/python/pytorch/metal.py         | 35 ++++++++++---------
 main/src/main/python/pytorch/rnnLayer.py      |  2 +-
 main/src/main/python/pytorch/seqScorer.py     | 22 ++++++------
 main/src/main/python/pytorch/taskManager.py   |  1 +
 main/src/main/python/pytorch/utils.py         |  4 +--
 7 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index 2d9ddeeae..86ad8cbf1 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -26,8 +26,7 @@ def __str__(self):
         return f"GreedyForwardLayer({self.inDim}, {self.outDim})"
 
     def inference(self, emissionScores):
-        labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist()
-        return [self.i2t[i] for i in labelIds]
+        return [self.i2t[np.argmax(es)] for es in emissionScores]
 
     def inferenceWithScores(self, emissionScores):
         return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores]
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 569addce3..f8f9ecfb4 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -66,15 +66,20 @@ def start_eval(self):
 
     def get_state_dict(self):
         params = dict()
+        j_params = dict()
         if self.initialLayer is not None:
             params['initialLayer'] = self.initialLayer.state_dict()
+            j_params['initialLayer'] = {k:v.data.tolist() for k, v in params['initialLayer'].items()}
         if self.intermediateLayers:
             params['intermediateLayers'] = list()
+            j_params['intermediateLayers'] = list()
         for il in self.intermediateLayers:
             params['intermediateLayers'].append(il.state_dict())
+            j_params['intermediateLayers'].append({k:v.data.tolist() for k, v in params['intermediateLayers'][-1].items()})
         if self.finalLayer is not None:
             params['finalLayer'] = self.finalLayer.state_dict()
-        return params
+            j_params['finalLayer'] = {k:v.data.tolist() for k, v in params['finalLayer'].items()}
+        return params, j_params
 
     def load_state_dict(self, params):
         if self.initialLayer is not None:
@@ -114,7 +119,7 @@ def saveX2i(self):
             x2i['initialLayer'] = self.initialLayer.saveX2i()
         else:
             x2i['hasInitial'] = 0
-        x2i['intermediateCount'] = len(intermediateLayers)
+        x2i['intermediateCount'] = len(self.intermediateLayers)
         x2i['intermediateLayers'] = list()
         for il in self.intermediateLayers:
             x2i['intermediateLayers'].append(il.saveX2i())
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 2ee0ac3e9..ccfdad2c6 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -3,6 +3,7 @@
 from sequences.rowReaders import *
 from pytorch.layers import Layers
 from pytorch.seqScorer import *
+from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 
 from torch.optim import SGD, Adam
 
@@ -160,13 +161,13 @@ def train(self, modelNamePrefix):
                     totalRec += rec
                     totalF1 += f1
 
-            avgAcc = totalAcc / taskManager.taskCount
-            avgPrec = totalPrec / taskManager.taskCount
-            avgRec = totalRec / taskManager.taskCount
-            avgF1 = totalF1 / taskManager.taskCount
+            avgAcc = totalAcc / self.taskManager.taskCount
+            avgPrec = totalPrec / self.taskManager.taskCount
+            avgRec = totalRec / self.taskManager.taskCount
+            avgF1 = totalF1 / self.taskManager.taskCount
 
-            print (f"Average accuracy across {taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}")
-            print (f"Average P/R/F1 across {taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}")
+            print (f"Average accuracy across {self.taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}")
+            print (f"Average P/R/F1 across {self.taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}")
 
             allEpochScores.append((epoch, avgF1))
 
@@ -218,12 +219,12 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
         
         pw.close()
 
-        print (f"Accuracy on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}")
-        print (f"Precision on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}")
-        print (f"Recall on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}")
-        print (f"Micro F1 on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}")
-        for label in scoreCountsByLabel.labels:
-            print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map(label).gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+        print (f"Accuracy on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}")
+        print (f"Precision on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}")
+        print (f"Recall on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}")
+        print (f"Micro F1 on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}")
+        for label in scoreCountsByLabel.labels():
+            print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
 
         return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() )
 
@@ -252,21 +253,23 @@ def test(self):
     def save(self, baseFilename):
 
         params = list()
+        j_params = list()
         for layers in self.model:
-            sd = layers.get_state_dict()
+            sd, j_sd = layers.get_state_dict()
             x2i = layers.saveX2i()
             params.append({"model": sd, "x2i": x2i})
+            j_params.append({"model": j_sd, "x2i": x2i})
 
         # torch pickle save
         try:
-            torch.save(params, baseFilename)
+            torch.save(params, baseFilename+".torch")
             print("model saved to {}".format(baseFilename+".torch"))
         except BaseException:
             print("[Warning: Saving failed... continuing anyway.]")
 
         # We can also save as text json file:
-        with open(baseFilename+".json") as f:
-            f.write(json.dumps(params))
+        with open(baseFilename+".json", "w") as f:
+            f.write(json.dumps(j_params))
 
 
     @classmethod
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index 633d6b65b..3384e89ce 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -39,7 +39,7 @@ def saveX2i(self):
         x2i['inputSize'] = self.inputSize
         x2i['numLayers'] = self.numLayers
         x2i['rnnStateSize'] = self.rnnStateSize
-        x2i['useHighwayConnections'] = 1 if useHighwayConnections else 0
+        x2i['useHighwayConnections'] = 1 if self.useHighwayConnections else 0
         x2i['rnnType'] = self.rnnType
         x2i['dropoutProb'] = self.dropoutProb
         return x2i
diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py
index 144e5bfe9..0855ff120 100644
--- a/main/src/main/python/pytorch/seqScorer.py
+++ b/main/src/main/python/pytorch/seqScorer.py
@@ -5,15 +5,15 @@
 
 @dataclass
 class ScoreCounts:
-    correct: int
-    gold: int
-    predicted: int
+    correct: int = 0
+    gold: int = 0
+    predicted: int = 0
 
 class SeqScorer:
 
     @staticmethod
     def f1(golds, preds):
-        scoreCountsByLabel = scoreCountsByLabel()
+        scoreCountsByLabel = ScoreCountsByLabel()
 
         for e1, e2 in zip(preds, golds):
             scoreCountsByLabel.total += 1
@@ -53,14 +53,14 @@ def incCorrect(self, label="*", value=1):
         counts.correct += value
 
     def incAll(self, counts):
-        correct += counts.correct
-        total += counts.total
+        self.correct += counts.correct
+        self.total += counts.total
 
         for label in counts.labels():
             c = counts.map[label]
-            incGold(label, c.gold)
-            incPredicted(label, c.predicted)
-            incCorrect(label, c.correct)
+            self.incGold(label, c.gold)
+            self.incPredicted(label, c.predicted)
+            self.incCorrect(label, c.correct)
 
     def precision(self, label="*", decimals=2):
         c = self.map[label].correct
@@ -74,7 +74,7 @@ def recall(self, label="*", decimals=2):
         c = self.map[label].correct
         g = self.map[label].gold
 
-        reca = c/g if p!=0 else 0
+        reca = c/g if g!=0 else 0
 
         return round(reca, decimals) if decimals>0 else reca
 
@@ -82,7 +82,7 @@ def f1(self, label="*", decimals=2):
         p = self.precision(label, decimals=-1)
         r = self.recall(label, decimals=-1)
 
-        f1 = 2.0 * p * r / (p + r), decimals if (p!=0 and r!=0) else 0
+        f1 = 2.0 * p * r / (p + r) if (p!=0 and r!=0) else 0
 
         return round(f1, decimals) if decimals>0 else f1
 
diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py
index 5f6ecd76c..ba8eab3cd 100644
--- a/main/src/main/python/pytorch/taskManager.py
+++ b/main/src/main/python/pytorch/taskManager.py
@@ -164,6 +164,7 @@ def __init__(self,
     currentTrainingSentencePosition = 0
 
     self.taskWeight = taskWeight
+    self.taskName = taskName
 
     print (f"============ starting task {taskNumber} ============")
     print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.")
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 38a9b6186..053339d1e 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -45,7 +45,7 @@ def save(file, values, comment):
 
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
-    charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word]))
+    charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
     _, result = transduce(charEmbeddings, charRnnBuilder)
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
     return result.view(hidden_dim*2)
@@ -109,7 +109,7 @@ def sentenceLossGreedy(emissionScoresForSeq, golds):
 def emissionScoresToArrays(expressions):
     lattice = list()
     for expr in expressions:
-        probs = expr.data.tolist()
+        probs = expr.data.numpy()
         lattice += [probs]
     return lattice
 

From 1892713b9a3ab93b9268982db682060c3acf23e5 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Fri, 1 Oct 2021 00:19:09 -0700
Subject: [PATCH 016/134] fix some minor issues

---
 main/src/main/python/embeddings/wordEmbeddingMap.py | 6 ++----
 main/src/main/python/pytorch/metal.py               | 2 +-
 main/src/main/python/pytorch/utils.py               | 6 +-----
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index 49ab541e9..95cac8d04 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -16,10 +16,8 @@ def load(config):
                 delimiter = "\t"
             else:
                 delimiter = " "
-            line_split = line.rstrip().split(delimiter)
-            # extract word and vector
-            word = line_split[0]
-            x = np.array([float(i) for i in line_split[1:]])
+            word, *rest = line.rstrip().split(delimiter)
+            x = np.array(list(map(float, rest)))
             vector = (x /np.linalg.norm(x))
             embedding_size = vector.shape[0]
             emb_dict[word] = vector
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index ccfdad2c6..74376cf7f 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -5,7 +5,7 @@
 from pytorch.seqScorer import *
 from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 
-from torch.optim import SGD, Adam
+from torch.optim import SGD, Adam, RMSprop
 
 import json
 
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 053339d1e..cce14eb5f 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -107,11 +107,7 @@ def sentenceLossGreedy(emissionScoresForSeq, golds):
     return criterion(emissionScoresForSeq, golds)
 
 def emissionScoresToArrays(expressions):
-    lattice = list()
-    for expr in expressions:
-        probs = expr.data.numpy()
-        lattice += [probs]
-    return lattice
+    return [expr.data.tolist() for expr in expressions]
 
 def printCoNLLOutput(pw, words, golds, preds):
 

From 850e9d202797a601abe4b9f588d2e147a9a10986 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 7 Oct 2021 02:22:26 -0700
Subject: [PATCH 017/134] make minor changes, implemented Viterbi decoder

---
 .../python/embeddings/wordEmbeddingMap.py     |  17 +-
 .../python/pytorch/constEmbeddingsGlove.py    |  13 +-
 main/src/main/python/pytorch/forwardLayer.py  |  29 ++--
 main/src/main/python/pytorch/layers.py        |   5 +-
 main/src/main/python/pytorch/metal.py         |  32 ++--
 main/src/main/python/pytorch/utils.py         |   6 +
 .../python/pytorch/viterbiForwardLayer.py     | 153 +++++++++++++++---
 7 files changed, 201 insertions(+), 54 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index 95cac8d04..f2766c9c8 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -1,15 +1,19 @@
 import numpy as np
 import math
+import torch.nn as nn
+import torch
 
 class WordEmbeddingMap:
     def __init__(self, config):
-        self.emb_dict, self.dim = load(config)
+        self.emb_dict, self.dim, self.w2i, self.emb = load(config)
 
     def isOutOfVocabulary(self, word):
-        return word not in self.emb_dict
+        return word not in self.w2i
 
 def load(config):
     emb_dict = dict()
+    w2i = {"<UNK>":0}
+    i = 1
     for line in open(config.get_string("glove.matrixResourceName")):
         if not len(line.split()) == 2:
             if "\t" in line:
@@ -17,10 +21,17 @@ def load(config):
             else:
                 delimiter = " "
             word, *rest = line.rstrip().split(delimiter)
+            w2i[word] = i
+            i += 1
             x = np.array(list(map(float, rest)))
             vector = (x /np.linalg.norm(x))
             embedding_size = vector.shape[0]
             emb_dict[word] = vector
     base = math.sqrt(6/embedding_size)
     emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
-    return emb_dict, embedding_size
\ No newline at end of file
+
+    weights = np.zeros((len(emb_dict), embedding_size))
+    for w, i in w2i.items():
+        weights[i] = emb_dict[w]
+    emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True)
+    return emb_dict, embedding_size, w2i, emb
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py
index cf887909d..24d298f0b 100644
--- a/main/src/main/python/pytorch/constEmbeddingsGlove.py
+++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py
@@ -2,7 +2,6 @@
 import torch.nn as nn
 from embeddings.wordEmbeddingMap import *
 from pyhocon import ConfigFactory
-import numpy as np
 import torch
 
 @dataclass
@@ -13,6 +12,7 @@ class ConstEmbeddingParameters:
 class _ConstEmbeddingsGlove:
     def __init__(self):
         self.SINGLETON_WORD_EMBEDDING_MAP = None
+        self.cep = None
         config = ConfigFactory.parse_file('../resources/org/clulab/glove.conf')
         self.load(config)
         self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim
@@ -20,14 +20,9 @@ def __init__(self):
     def load(self, config):
         if self.SINGLETON_WORD_EMBEDDING_MAP is None:
             self.SINGLETON_WORD_EMBEDDING_MAP = WordEmbeddingMap(config)
+        self.cep = ConstEmbeddingParameters(self.SINGLETON_WORD_EMBEDDING_MAP.emb, self.SINGLETON_WORD_EMBEDDING_MAP.w2i)
 
-    def mkConstLookupParams(self, words):
-        w2i = dict()
-        weights = np.zeros((len(words), self.dim))
-        for i,w  in enumerate(words):
-            weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict["<UNK>"])
-            w2i[w] = i
-        emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True)
-        return ConstEmbeddingParameters(emb ,w2i)
+    def get_ConstLookupParams(self):
+        return self.cep
 
 ConstEmbeddingsGlove = _ConstEmbeddingsGlove()
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 492cb3cca..3a162c525 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -32,23 +32,29 @@ def pickSpan(self, v):
             # Zheng: Will spans overlap?
             vs = list()
             for span in self.spans:
-                e = torch.index_select(v, 0, torch.tensor([span[0], span[1]]))
+                e = torch.index_select(v, 1, torch.tensor([span[0], span[1]]))
                 vs.append(e)
             return torch.cat(vs)
 
     def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
-        emissionScores = list()
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            for i, e in enumerate(inputExpressions):
-                argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
-                l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
-                if self.nonlinearity == NONLIN_TANH:
-                    l1 = F.tanh(l1)
-                elif self.nonlinearity == NONLIN_RELU:
-                    l1 = F.relu(l1)
-                emissionScores.append(l1)
+            argExp = expressionDropout(self.pickSpan(inputExpressions), self.dropoutProb, doDropout)
+            emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
+            if self.nonlinearity == NONLIN_TANH:
+                emissionScores = F.tanh(emissionScores)
+            elif self.nonlinearity == NONLIN_RELU:
+                emissionScores = F.relu(emissionScores)
+            # for i, e in enumerate(inputExpressions):
+            #     argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
+            #     l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
+            #     if self.nonlinearity == NONLIN_TANH:
+            #         l1 = F.tanh(l1)
+            #     elif self.nonlinearity == NONLIN_RELU:
+            #         l1 = F.relu(l1)
+            #     emissionScores.append(l1)
         else:
+            emissionScores = list()
             if headPositionsOpt is None:
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
@@ -68,7 +74,8 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                 elif nonlinearity == NONLIN_RELU:
                     l1 = F.relu(l1)
                 emissionScores.append(l1)
-        return torch.stack(emissionScores)
+            emissionScores = torch.stack(emissionScores)
+        return emissionScores
 
     @staticmethod
     def load(x2i):
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index f8f9ecfb4..178a76693 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -280,9 +280,10 @@ def parse(layers, sentence, constEmbeddings):
     @staticmethod
     def loss(layers, taskId, sentence, goldLabels):
         # Zheng: I am not sure this is the suitable way to load embeddings or not, need help...
-        constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words)
+        constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
         states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training!
-        return layers[taskId+1].finalLayer.loss(states, goldLabels)
+        loss = layers[taskId+1].finalLayer.loss(states, goldLabels)
+        return loss
 
 
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 74376cf7f..887e31e2c 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -106,7 +106,8 @@ def train(self, modelNamePrefix):
                 layers.start_train()
             trainer.zero_grad()
 
-            batchLosses = list()
+            batchLoss = 0
+            i = 0
 
             # traverse all training sentences
             for metaSentence in sentenceIterator:
@@ -118,18 +119,21 @@ def train(self, modelNamePrefix):
                 annotatedSentences = reader.toAnnotatedSentences(sentence)
                 assert(annotatedSentences is not None)
 
-                unweightedLoss = sum([Layers.loss(self.model, taskId, a_sent[0], a_sent[1]) for a_sent in annotatedSentences])
+                unweightedLoss = 0
+                for a_sent in annotatedSentences:
+                    unweightedLoss += Layers.loss(self.model, taskId, a_sent[0], a_sent[1])
 
                 loss = unweightedLoss * self.taskManager.tasks[taskId].taskWeight # Zheng: I don't think this is necessary: if self.taskManager.tasks[taskId].taskWeight!=1.0 else unweightedLoss
 
-                batchLosses.append(loss)
+                batchLoss += loss
+                i += 1
 
-                if len(batchLosses) >= batchSize:
-                    batchLoss = sum(batchLosses)
-                    cummulativeLoss = batchLoss.item()
+                if i >= batchSize:
+                    cummulativeLoss += batchLoss.item()
                     batchLoss.backward()
                     trainer.step()
-                    batchLosses = list()
+                    batchLoss = 0
+                    i = 0
 
                 numTagged += len(sentence)
 
@@ -138,12 +142,12 @@ def train(self, modelNamePrefix):
                     cummulativeLoss = 0.0
                     numTagged = 0
             # we may have an incomplete batch here
-            if batchLosses:
-                batchLoss = sum(batchLosses)
+            if batchLoss:
                 cummulativeLoss = batchLoss.item()
                 batchLoss.backward()
                 trainer.step()
-                batchLosses = list()
+                batchLoss = 0
+                i = 0
 
             # check dev performance in this epoch, for all tasks
             totalAcc = 0.0
@@ -209,7 +213,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
                 sentence = asent[0]
                 goldLabels = asent[1]
 
-                constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words)
+                constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
                 preds = Layers.predict(self.model, taskId, sentence, constEmbeddings)
 
                 sc = SeqScorer.f1(goldLabels, preds)
@@ -229,12 +233,18 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
         return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() )
 
     def predictJointly(self, sentence, constEmbeddings):
+        for layers in self.model:
+            layers.start_eval()
         return Layers.predictJointly(self.model, sentence, constEmbeddings)
 
     def predict(self, taskId, sentence, constEmbeddings):
+        for layers in self.model:
+            layers.start_eval()
         return Layers.predict(self.model, taskId, sentence, constEmbeddings)
 
     def predictWithScores(self, taskId, sentence, constEmbeddings):
+        for layers in self.model:
+            layers.start_eval()
         return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings)
 
     # Custom method for the parsing algorithm
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index cce14eb5f..2c6500352 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -117,6 +117,12 @@ def printCoNLLOutput(pw, words, golds, preds):
     for i in range(len(words)):
       pw.write(f"{words[i]} {golds[i]} {preds[i]}\n")
     pw.write("\n")
+
+def log_sum_exp(vec):
+    max_score = vec[0, argmax(vec)]
+    max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1])
+    return max_score + \
+        torch.log(torch.sum(torch.exp(vec - max_score_broadcast)))
     
 
 
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 4b025293b..2d8911c98 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -5,28 +5,145 @@ class ViterbiForwardLayer(ForwardLayer):
     def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None):
         super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
 
-    def initializeTransitions(self):
-        #TODO
-        pass
-    def initTransitionsTo(self, dst, size, startTag, stopTag):
-        #TODO
-        pass
+        # Matrix of transition parameters.  Entry i,j is the score of
+        # transitioning *to* i *from* j.
+        self.transitions = nn.Parameter(
+            torch.randn(self.outDim, self.outDim))
+
+        # These two statements enforce the constraint that we never transfer
+        # to the start tag and we never transfer from the stop tag
+        self.transitions.data[t2i[START_TAG], :] = -10000
+        self.transitions.data[:, t2i[STOP_TAG]] = -10000
+
+    def _forward_alg(self, feats):
+        # Do the forward algorithm to compute the partition function
+        init_alphas = torch.full((1, self.tagset_size), -10000.)
+        # START_TAG has all of the score.
+        init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
+
+        # Wrap in a variable so that we will get automatic backprop
+        forward_var = init_alphas
+
+        # Iterate through the sentence
+        for feat in feats:
+            alphas_t = []  # The forward tensors at this timestep
+            for next_tag in range(self.tagset_size):
+                # broadcast the emission score: it is the same regardless of
+                # the previous tag
+                emit_score = feat[next_tag].view(
+                    1, -1).expand(1, self.tagset_size)
+                # the ith entry of trans_score is the score of transitioning to
+                # next_tag from i
+                trans_score = self.transitions[next_tag].view(1, -1)
+                # The ith entry of next_tag_var is the value for the
+                # edge (i -> next_tag) before we do log-sum-exp
+                next_tag_var = forward_var + trans_score + emit_score
+                # The forward variable for this tag is log-sum-exp of all the
+                # scores.
+                alphas_t.append(log_sum_exp(next_tag_var).view(1))
+            forward_var = torch.cat(alphas_t).view(1, -1)
+        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
+        alpha = log_sum_exp(terminal_var)
+        return alpha
+
+    def _score_sentence(self, feats, tags):
+        # Gives the score of a provided tag sequence
+        score = torch.zeros(1)
+        tags = torch.cat([torch.tensor([self.tag_to_ix[START_TAG]], dtype=torch.long), tags])
+        for i, feat in enumerate(feats):
+            score = score + \
+                self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]]
+        score = score + self.transitions[self.tag_to_ix[STOP_TAG], tags[-1]]
+        return score
+
+    def _viterbi_decode(self, feats):
+        backpointers = []
+
+        # Initialize the viterbi variables in log space
+        init_vvars = torch.full((1, self.tagset_size), -10000.)
+        init_vvars[0][self.tag_to_ix[START_TAG]] = 0
+
+        # forward_var at step i holds the viterbi variables for step i-1
+        forward_var = init_vvars
+        for feat in feats:
+            bptrs_t = []  # holds the backpointers for this step
+            viterbivars_t = []  # holds the viterbi variables for this step
+
+            for next_tag in range(self.tagset_size):
+                # next_tag_var[i] holds the viterbi variable for tag i at the
+                # previous step, plus the score of transitioning
+                # from tag i to next_tag.
+                # We don't include the emission scores here because the max
+                # does not depend on them (we add them in below)
+                next_tag_var = forward_var + self.transitions[next_tag]
+                best_tag_id = argmax(next_tag_var)
+                bptrs_t.append(best_tag_id)
+                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
+            # Now add in the emission scores, and assign forward_var to the set
+            # of viterbi variables we just computed
+            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
+            backpointers.append(bptrs_t)
+
+        # Transition to STOP_TAG
+        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
+        best_tag_id = argmax(terminal_var)
+        path_score = terminal_var[0][best_tag_id]
+
+        # Follow the back pointers to decode the best path.
+        best_path = [best_tag_id]
+        for bptrs_t in reversed(backpointers):
+            best_tag_id = bptrs_t[best_tag_id]
+            best_path.append(best_tag_id)
+        # Pop off the start tag (we dont want to return that to the caller)
+        start = best_path.pop()
+        assert start == self.tag_to_ix[START_TAG]  # Sanity check
+        best_path.reverse()
+        return path_score, best_path
+
     def loss(self, finalStates, goldLabelStrings):
-        #TODO
-        pass
+        goldLabels = [self.t2i[gs] for gs in goldLabelStrings]
+        forward_score = self._forward_alg(finalStates)
+        gold_score = self._score_sentence(feats, goldLabels)
+        return forward_score - gold_score
+    
     def saveX2i(self):
-        #TODO
-        pass
+        x2i = dict()
+        x2i["inferenceType"] = TYPE_GREEDY
+        x2i["inputSize"] = self.inputSize
+        x2i["isDual"] = 1 if self.isDual else 0
+        x2i["span"] = spanToString(span) if self.spans else ""
+        x2i["nonlinearity"] = self.nonlinearity
+        x2i["t2i"] = self.t2i
+        x2i["dropoutProb"] = self.dropoutProb
+
+        return x2i
+
     def __str__(self):
-        #TODO
-        pass
+        return f"ViterbiForwardLayer({self.inDim}, {self.outDim})"
+
     def inference(emissionScores):
-        #TODO
-        pass
+        score, labelsIds = self._viterbi_decode(emissionScores)
+        return [self.i2t[i] for i in labelsIds]
+
     def inferenceWithScores(emissionScores):
-        #TODO
-        pass
+        raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!")
+
     @classmethod
     def load(cls, x2i):
-        #TODO
-        pass
\ No newline at end of file
+        inputSize = x2i["inputSize"]
+        isDual = x2i.get("isDual", DEFAULT_IS_DUAL) == 1
+        sapnValue = x2i.get("span", "")
+        spans = None if sapnValue == "" else parseSpan(sapnValue, inputSize)
+        nonlinearity = x2i.get("nonlinearity", NONLIN_NONE)
+        t2i = x2i["t2i"]
+        i2t = {i:t for t, i in t2i.items()}
+        dropoutProb = x2i.get("dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
+
+        if spans:
+            l = spanLength(spans)
+            actualInputSize = 2*l if isDual else l
+        else:
+            actualInputSize = 2*inputSize if isDual else inputSize
+
+        return cls(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans)
+

From b8e2d3c32c7613bc010719cf0cb0ce8916428f2b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 7 Oct 2021 02:27:49 -0700
Subject: [PATCH 018/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 3a162c525..f066d1011 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -83,9 +83,7 @@ def load(x2i):
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
         if inferenceType == TYPE_VITERBI:
-            pass
-            # TODO
-            # return ViterbiForwardLayer.load(x2i)
+            return ViterbiForwardLayer.load(x2i)
         elif inferenceType == TYPE_GREEDY:
             return GreedyForwardLayer.load(x2i)
         else:
@@ -125,11 +123,9 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
         if inferenceType == TYPE_GREEDY_STRING:
             return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
         elif inferenceType == TYPE_VITERBI_STRING:
-            pass
-            # TODO
-            # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
-            # layer.initializeTransitions()
-            # return layer
+            layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
+            layer.initializeTransitions()
+            return layer
         else:
             raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!")
     

From c5476bc227988ce674a35b12de7f944da26e983b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 7 Oct 2021 02:30:47 -0700
Subject: [PATCH 019/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index f066d1011..c1910a119 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -124,7 +124,6 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
             return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
         elif inferenceType == TYPE_VITERBI_STRING:
             layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span)
-            layer.initializeTransitions()
             return layer
         else:
             raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!")

From 9199eed65694acab90cca9bad8572998eb590fbb Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 7 Oct 2021 13:57:48 -0700
Subject: [PATCH 020/134] fixed bugs in viterbi decoder

---
 .../main/python/pytorch/greedyForwardLayer.py |  2 ++
 main/src/main/python/pytorch/layers.py        | 18 ++++-------
 main/src/main/python/pytorch/utils.py         |  6 +++-
 .../python/pytorch/viterbiForwardLayer.py     | 30 +++++++++----------
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index 86ad8cbf1..e45116a67 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -26,9 +26,11 @@ def __str__(self):
         return f"GreedyForwardLayer({self.inDim}, {self.outDim})"
 
     def inference(self, emissionScores):
+        emissionScores = emissionScoresToArrays(states)
         return [self.i2t[np.argmax(es)] for es in emissionScores]
 
     def inferenceWithScores(self, emissionScores):
+        emissionScores = emissionScoresToArrays(states)
         return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores]
 
     @classmethod
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 178a76693..6bca50dfc 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -194,15 +194,13 @@ def predictJointly(layers, sentence, constEmbeddings):
             sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False)
             for i in range(1, len(layers)):
                 states = layers[i].forwardFrom(sharedStates, sentence.headPositions, doDropout=False)
-                emissionScores = emissionScoresToArrays(states)
-                labels = layers[i].finalLayer.inference(emissionScores)
+                labels = layers[i].finalLayer.inference(states)
                 labelsPerTask += [labels]
         # no shared layer
         else:
             for i in range(1, len(layers)):
                 states = layers[i].forward(sentence, sentence.headPositions, doDropout=False)
-                emissionScores = emissionScoresToArrays(states)
-                labels = layers[i].finalLayer.inference(emissionScores)
+                labels = layers[i].finalLayer.inference(states)
                 labelsPerTask += [labels]
 
         return labelsPerTask
@@ -219,14 +217,12 @@ def forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout):
     @staticmethod
     def predict(layers, taskId, sentence, constEmbeddings):
         states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False)
-        emissionScores = emissionScoresToArrays(states)
-        return layers[taskId+1].finalLayer.inference(emissionScores)
+        return layers[taskId+1].finalLayer.inference(states)
 
     @staticmethod
     def predictWithScores(layers, taskId, sentence, constEmbeddings):
         states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False)
-        emissionScores = emissionScoresToArrays(states)
-        return layers[taskId+1].finalLayer.inferenceWithScores(emissionScores)
+        return layers[taskId+1].finalLayer.inferenceWithScores(states)
 
     @staticmethod
     def parse(layers, sentence, constEmbeddings):
@@ -240,8 +236,7 @@ def parse(layers, sentence, constEmbeddings):
         # now predict the heads (first task)
         #
         headStates = layers[1].forwardFrom(sharedStates, None, doDropout=False)
-        headEmissionScores = emissionScoresToArrays(headStates)
-        headScores = layers[1].finalLayer.inference(headEmissionScores)
+        headScores = layers[1].finalLayer.inference(headStates)
 
         # store the head values here
         heads = list()
@@ -271,8 +266,7 @@ def parse(layers, sentence, constEmbeddings):
         # next, predict the labels using the predicted heads
         #
         labelStates = layers[2].forwardFrom(sharedStates, heads, doDropout=False)
-        emissionScores = emissionScoresToArrays(labelStates)
-        labels = layers[2].finalLayer.inference(emissionScores)
+        labels = layers[2].finalLayer.inference(labelStates)
         assert(len(labels)==len(heads))
 
         return zip(heads, labels)
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 2c6500352..26ef279cd 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -117,7 +117,11 @@ def printCoNLLOutput(pw, words, golds, preds):
     for i in range(len(words)):
       pw.write(f"{words[i]} {golds[i]} {preds[i]}\n")
     pw.write("\n")
-
+def argmax(vec):
+    # return the argmax as a python int
+    _, idx = torch.max(vec, 1)
+    return idx.item()
+    
 def log_sum_exp(vec):
     max_score = vec[0, argmax(vec)]
     max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1])
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 2d8911c98..1952666b0 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -17,9 +17,9 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
 
     def _forward_alg(self, feats):
         # Do the forward algorithm to compute the partition function
-        init_alphas = torch.full((1, self.tagset_size), -10000.)
+        init_alphas = torch.full((1, self.outDim), -10000.)
         # START_TAG has all of the score.
-        init_alphas[0][self.tag_to_ix[START_TAG]] = 0.
+        init_alphas[0][self.t2i[START_TAG]] = 0.
 
         # Wrap in a variable so that we will get automatic backprop
         forward_var = init_alphas
@@ -27,11 +27,11 @@ def _forward_alg(self, feats):
         # Iterate through the sentence
         for feat in feats:
             alphas_t = []  # The forward tensors at this timestep
-            for next_tag in range(self.tagset_size):
+            for next_tag in range(self.outDim):
                 # broadcast the emission score: it is the same regardless of
                 # the previous tag
                 emit_score = feat[next_tag].view(
-                    1, -1).expand(1, self.tagset_size)
+                    1, -1).expand(1, self.outDim)
                 # the ith entry of trans_score is the score of transitioning to
                 # next_tag from i
                 trans_score = self.transitions[next_tag].view(1, -1)
@@ -42,26 +42,26 @@ def _forward_alg(self, feats):
                 # scores.
                 alphas_t.append(log_sum_exp(next_tag_var).view(1))
             forward_var = torch.cat(alphas_t).view(1, -1)
-        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
+        terminal_var = forward_var + self.transitions[self.t2i[STOP_TAG]]
         alpha = log_sum_exp(terminal_var)
         return alpha
 
     def _score_sentence(self, feats, tags):
         # Gives the score of a provided tag sequence
         score = torch.zeros(1)
-        tags = torch.cat([torch.tensor([self.tag_to_ix[START_TAG]], dtype=torch.long), tags])
+        tags = torch.cat([torch.tensor([self.t2i[START_TAG]], dtype=torch.long), tags])
         for i, feat in enumerate(feats):
             score = score + \
                 self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]]
-        score = score + self.transitions[self.tag_to_ix[STOP_TAG], tags[-1]]
+        score = score + self.transitions[self.t2i[STOP_TAG], tags[-1]]
         return score
 
     def _viterbi_decode(self, feats):
         backpointers = []
 
         # Initialize the viterbi variables in log space
-        init_vvars = torch.full((1, self.tagset_size), -10000.)
-        init_vvars[0][self.tag_to_ix[START_TAG]] = 0
+        init_vvars = torch.full((1, self.outDim), -10000.)
+        init_vvars[0][self.t2i[START_TAG]] = 0
 
         # forward_var at step i holds the viterbi variables for step i-1
         forward_var = init_vvars
@@ -69,7 +69,7 @@ def _viterbi_decode(self, feats):
             bptrs_t = []  # holds the backpointers for this step
             viterbivars_t = []  # holds the viterbi variables for this step
 
-            for next_tag in range(self.tagset_size):
+            for next_tag in range(self.outDim):
                 # next_tag_var[i] holds the viterbi variable for tag i at the
                 # previous step, plus the score of transitioning
                 # from tag i to next_tag.
@@ -85,7 +85,7 @@ def _viterbi_decode(self, feats):
             backpointers.append(bptrs_t)
 
         # Transition to STOP_TAG
-        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
+        terminal_var = forward_var + self.transitions[self.t2i[STOP_TAG]]
         best_tag_id = argmax(terminal_var)
         path_score = terminal_var[0][best_tag_id]
 
@@ -96,14 +96,14 @@ def _viterbi_decode(self, feats):
             best_path.append(best_tag_id)
         # Pop off the start tag (we dont want to return that to the caller)
         start = best_path.pop()
-        assert start == self.tag_to_ix[START_TAG]  # Sanity check
+        assert start == self.t2i[START_TAG]  # Sanity check
         best_path.reverse()
         return path_score, best_path
 
     def loss(self, finalStates, goldLabelStrings):
-        goldLabels = [self.t2i[gs] for gs in goldLabelStrings]
+        goldLabels = torch.tensor([self.t2i[gs] for gs in goldLabelStrings], dtype=torch.long)
         forward_score = self._forward_alg(finalStates)
-        gold_score = self._score_sentence(feats, goldLabels)
+        gold_score = self._score_sentence(finalStates, goldLabels)
         return forward_score - gold_score
     
     def saveX2i(self):
@@ -121,7 +121,7 @@ def saveX2i(self):
     def __str__(self):
         return f"ViterbiForwardLayer({self.inDim}, {self.outDim})"
 
-    def inference(emissionScores):
+    def inference(self, emissionScores):
         score, labelsIds = self._viterbi_decode(emissionScores)
         return [self.i2t[i] for i in labelsIds]
 

From fdaf8e48986d98448f236913a2694fb1c0c5a995 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Fri, 8 Oct 2021 17:39:20 -0700
Subject: [PATCH 021/134] fixed some bugs, changed default learning rate

---
 .../src/main/python/pytorch/greedyForwardLayer.py |  4 ++--
 main/src/main/python/pytorch/metal.py             |  8 ++++----
 main/src/main/python/pytorch/utils.py             | 15 ++++++++-------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index e45116a67..556559cf4 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -26,11 +26,11 @@ def __str__(self):
         return f"GreedyForwardLayer({self.inDim}, {self.outDim})"
 
     def inference(self, emissionScores):
-        emissionScores = emissionScoresToArrays(states)
+        emissionScores = emissionScoresToArrays(emissionScores)
         return [self.i2t[np.argmax(es)] for es in emissionScores]
 
     def inferenceWithScores(self, emissionScores):
-        emissionScores = emissionScoresToArrays(states)
+        emissionScores = emissionScoresToArrays(emissionScores)
         return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores]
 
     @classmethod
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 887e31e2c..e31f464a2 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -65,7 +65,7 @@ def mkVocabularies(self):
         return words, labels
 
     def train(self, modelNamePrefix):
-        learningRate = self.taskManager.get_float("mtl.learningRate", 0.001)
+        learningRate = self.taskManager.get_float("mtl.learningRate", 1e-5)
         trainerType = self.taskManager.get_string("mtl.trainer", "adam")
         batchSize = self.taskManager.get_int("mtl.batchSize", 1)
         assert(batchSize>0)
@@ -75,11 +75,11 @@ def train(self, modelNamePrefix):
             parameters += layers.get_parameters()
 
         if trainerType == "adam":
-            trainer = Adam(parameters, lr=learningRate)
+            trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
         elif trainerType == "rmsprop":
-            trainer = RMSprop(parameters, lr=learningRate)
+            trainer = RMSprop(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
         elif trainerType == "sgd":
-            trainer = SDG(parameters, lr=learningRate)
+            trainer = SDG(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
         else:
             raise RuntimeError(f"ERROR: unknown trainer {trainerType}!")
 
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 26ef279cd..472eab18a 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -46,9 +46,10 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
-    _, result = transduce(charEmbeddings, charRnnBuilder)
+    output, _ = transduce(charEmbeddings, charRnnBuilder)
+    result = output.squeeze(1)[-1]
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
-    return result.view(hidden_dim*2)
+    return result
 
 def readString2Ids(s2iFilename):
     s2i = dict()
@@ -78,17 +79,17 @@ def transduce(embeddings, builder):
 
     if mode == 'LSTM':
         if bi_direct:
-            (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
+            (h, c) =  (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) 
             output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
         else:
-            (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
+            (h, c) =  (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) 
             output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
-            h =  torch.zeros(2, 1, hidden_dim) 
+            h =  torch.rand(2, 1, hidden_dim) 
             output, result = builder(embeddings.unsqueeze(1), h)
         else:
-            h =  torch.zeros(1, 1, hidden_dim)
+            h =  torch.rand(1, 1, hidden_dim)
             output, result = builder(embeddings.unsqueeze(1), h)
 
     return output, result
@@ -121,7 +122,7 @@ def argmax(vec):
     # return the argmax as a python int
     _, idx = torch.max(vec, 1)
     return idx.item()
-    
+
 def log_sum_exp(vec):
     max_score = vec[0, argmax(vec)]
     max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1])

From c2b7193372944340e7fd8e8e05d62f1a09834323 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 19 Oct 2021 14:08:33 -0700
Subject: [PATCH 022/134] add features and fixed bugs

fixed bugs on UNK word embedding
set dropout prob to 0.1
add clipping
---
 .../python/embeddings/wordEmbeddingMap.py     |  9 ++++----
 main/src/main/python/pytorch/metal.py         |  2 ++
 main/src/main/python/pytorch/rnnLayer.py      |  2 +-
 main/src/main/python/pytorch/seqScorer.py     | 13 ++++++++++++
 main/src/main/python/pytorch/utils.py         | 21 +++++++++----------
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index f2766c9c8..f2b820402 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -24,11 +24,12 @@ def load(config):
             w2i[word] = i
             i += 1
             x = np.array(list(map(float, rest)))
-            vector = (x /np.linalg.norm(x))
+            vector = x #(x /np.linalg.norm(x)) #normalized
             embedding_size = vector.shape[0]
-            emb_dict[word] = vector
-    base = math.sqrt(6/embedding_size)
-    emb_dict["<UNK>"] = np.random.uniform(-base,base,(embedding_size))
+            if word == "":
+                emb_dict["<UNK>"] = vector
+            else:
+                emb_dict[word] = vector    
 
     weights = np.zeros((len(emb_dict), embedding_size))
     for w, i in w2i.items():
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index e31f464a2..609292c64 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -74,6 +74,8 @@ def train(self, modelNamePrefix):
         for layers in self.model:
             parameters += layers.get_parameters()
 
+        torch.nn.utils.clip_grad_norm_(parameters, 1e-2)
+
         if trainerType == "adam":
             trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
         elif trainerType == "rmsprop":
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index 3384e89ce..525f5747a 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -28,7 +28,7 @@ def forward(self, inputExpressions, dropout):
 
         assert(inputExpressions is not None)
 
-        States, _ = transduce(inputExpressions, self.wordRnnBuilder)
+        States = transduce(inputExpressions, self.wordRnnBuilder)
         if self.useHighwayConnections:
             States = torch.cat([States.squeeze(1), inputExpressions], dim=1)
 
diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py
index 0855ff120..335339e05 100644
--- a/main/src/main/python/pytorch/seqScorer.py
+++ b/main/src/main/python/pytorch/seqScorer.py
@@ -91,6 +91,19 @@ def accuracy(self, decimals=2):
 
         return round(a, decimals) if decimals>0 else a
 
+def round(d, decimals):
+    if(decimals < 0):
+      return d # do not round when decimals is set to a negative value
+
+    zeros = 1
+    i = 0
+    while (i < decimals + 2):
+      zeros *= 10
+      i += 1
+
+    v = (d * zeros) / 100
+    return v
+
 
 
 
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 472eab18a..f3e6571a3 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -14,14 +14,12 @@
 START_TAG = "<START>"
 STOP_TAG = "<STOP>"
 
-RANDOM_SEED = 2522620396 # used for both DyNet, and the JVM seed for shuffling data
-WEIGHT_DECAY = 1e-5
+RANDOM_SEED = 2522620396
+WEIGHT_DECAY = 0.01
 
 LOG_MIN_VALUE = -10000.0
 
-DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by  default
-
-IS_DYNET_INITIALIZED = False
+DEFAULT_DROPOUT_PROBABILITY = 0.1 # no dropout by  default
 
 TYPE_VITERBI = 1
 TYPE_GREEDY = 2
@@ -46,7 +44,7 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
-    output, _ = transduce(charEmbeddings, charRnnBuilder)
+    output = transduce(charEmbeddings, charRnnBuilder)
     result = output.squeeze(1)[-1]
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
     return result
@@ -79,20 +77,21 @@ def transduce(embeddings, builder):
 
     if mode == 'LSTM':
         if bi_direct:
+            # change 1 to the layers we need
             (h, c) =  (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
+            output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
         else:
             (h, c) =  (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) 
-            output, (result, c) = builder(embeddings.unsqueeze(1), (h, c))
+            output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
             h =  torch.rand(2, 1, hidden_dim) 
-            output, result = builder(embeddings.unsqueeze(1), h)
+            output, h = builder(embeddings.unsqueeze(1), h)
         else:
             h =  torch.rand(1, 1, hidden_dim)
-            output, result = builder(embeddings.unsqueeze(1), h)
+            output, h = builder(embeddings.unsqueeze(1), h)
 
-    return output, result
+    return output
 
 def expressionDropout(expression, dropoutProb, doDropout):
     if doDropout and dropoutProb > 0:

From 66e64003496870a8a7b25618ad2fa3ee127aa16e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 19 Oct 2021 14:10:17 -0700
Subject: [PATCH 023/134] Update wordEmbeddingMap.py

---
 main/src/main/python/embeddings/wordEmbeddingMap.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index f2b820402..7412db665 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -21,15 +21,13 @@ def load(config):
             else:
                 delimiter = " "
             word, *rest = line.rstrip().split(delimiter)
+            word = "<UNK>" if word == "" else word
             w2i[word] = i
             i += 1
             x = np.array(list(map(float, rest)))
             vector = x #(x /np.linalg.norm(x)) #normalized
             embedding_size = vector.shape[0]
-            if word == "":
-                emb_dict["<UNK>"] = vector
-            else:
-                emb_dict[word] = vector    
+            emb_dict[word] = vector    
 
     weights = np.zeros((len(emb_dict), embedding_size))
     for w, i in w2i.items():

From 46bc27e9669d0ba90fe92e4afc0e7b3f68447f1e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 19 Oct 2021 14:18:53 -0700
Subject: [PATCH 024/134] Update wordEmbeddingMap.py

---
 main/src/main/python/embeddings/wordEmbeddingMap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py
index 7412db665..7ea6328cd 100644
--- a/main/src/main/python/embeddings/wordEmbeddingMap.py
+++ b/main/src/main/python/embeddings/wordEmbeddingMap.py
@@ -12,8 +12,8 @@ def isOutOfVocabulary(self, word):
 
 def load(config):
     emb_dict = dict()
-    w2i = {"<UNK>":0}
-    i = 1
+    w2i = {}
+    i = 0
     for line in open(config.get_string("glove.matrixResourceName")):
         if not len(line.split()) == 2:
             if "\t" in line:

From 21d861ff4c2324ea746feca881650418726aab9b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 19 Oct 2021 22:24:54 -0700
Subject: [PATCH 025/134] Update seqScorer.py

---
 main/src/main/python/pytorch/seqScorer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py
index 335339e05..1c626566d 100644
--- a/main/src/main/python/pytorch/seqScorer.py
+++ b/main/src/main/python/pytorch/seqScorer.py
@@ -101,7 +101,7 @@ def round(d, decimals):
       zeros *= 10
       i += 1
 
-    v = (d * zeros) / 100
+    v = int(d * zeros) / 100.0
     return v
 
 

From a1a44653ceb32ad42f5add2d521217bb80c22df8 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 20 Oct 2021 14:45:23 -0700
Subject: [PATCH 026/134] Update seqScorer.py

---
 main/src/main/python/pytorch/seqScorer.py | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py
index 1c626566d..068815cff 100644
--- a/main/src/main/python/pytorch/seqScorer.py
+++ b/main/src/main/python/pytorch/seqScorer.py
@@ -68,7 +68,7 @@ def precision(self, label="*", decimals=2):
 
         prec = c/p if p!=0 else 0
 
-        return round(prec, decimals) if decimals>0 else prec
+        return round(prec*100, decimals) if decimals>0 else prec
 
     def recall(self, label="*", decimals=2):
         c = self.map[label].correct
@@ -76,7 +76,7 @@ def recall(self, label="*", decimals=2):
 
         reca = c/g if g!=0 else 0
 
-        return round(reca, decimals) if decimals>0 else reca
+        return round(reca*100, decimals) if decimals>0 else reca
 
     def f1(self, label="*", decimals=2):
         p = self.precision(label, decimals=-1)
@@ -84,25 +84,14 @@ def f1(self, label="*", decimals=2):
 
         f1 = 2.0 * p * r / (p + r) if (p!=0 and r!=0) else 0
 
-        return round(f1, decimals) if decimals>0 else f1
+        return round(f1*100, decimals) if decimals>0 else f1
 
     def accuracy(self, decimals=2):
         a = self.correct / self.total
 
-        return round(a, decimals) if decimals>0 else a
+        return round(a*100, decimals) if decimals>0 else a
 
-def round(d, decimals):
-    if(decimals < 0):
-      return d # do not round when decimals is set to a negative value
 
-    zeros = 1
-    i = 0
-    while (i < decimals + 2):
-      zeros *= 10
-      i += 1
-
-    v = int(d * zeros) / 100.0
-    return v
 
 
 

From cfb54792294499f6179079661bf96eaa363b0e9a Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 21 Oct 2021 14:34:02 -0700
Subject: [PATCH 027/134] fixed the eval() bug

---
 main/src/main/python/pytorch/layers.py |  4 ++--
 main/src/main/python/pytorch/metal.py  | 17 +++++++++--------
 main/src/main/python/run.py            |  8 ++++++--
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 6bca50dfc..7447c79e3 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -127,7 +127,7 @@ def saveX2i(self):
             x2i['hasFinal'] = 1
             x2i['finalLayer'] = self.finalLayer.saveX2i()
         else:
-            x2i['finalLayer'] = 0
+            x2i['hasFinal'] = 0
 
         return x2i
 
@@ -182,7 +182,7 @@ def loadX2i(cls, x2i):
             intermediateLayers.append(il)
 
         hasFinal = x2i['hasFinal']
-        finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else none
+        finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else None
 
         return cls(initialLayer, intermediateLayers, finalLayer)
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 609292c64..57ec114d2 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -74,7 +74,7 @@ def train(self, modelNamePrefix):
         for layers in self.model:
             parameters += layers.get_parameters()
 
-        torch.nn.utils.clip_grad_norm_(parameters, 1e-2)
+        torch.nn.utils.clip_grad_norm_(parameters, 5)
 
         if trainerType == "adam":
             trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
@@ -216,7 +216,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
                 goldLabels = asent[1]
 
                 constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
-                preds = Layers.predict(self.model, taskId, sentence, constEmbeddings)
+                preds = self.predict(taskId, sentence, constEmbeddings)
 
                 sc = SeqScorer.f1(goldLabels, preds)
                 scoreCountsByLabel.incAll(sc)
@@ -257,10 +257,11 @@ def parse(self, sentence, constEmbeddings):
         Layers.parse(self.model, sentence, constEmbeddings)
 
     def test(self):
-        taskName = taskManager.tasks[taskId].taskName
-        testSentences = taskManager.tasks[taskId].testSentences
-        if testSentences:
-            self.evaluate(taskId, taskName, devSentences, "testing")
+        for taskId in range(0, self.taskManager.taskCount):
+            taskName = self.taskManager.tasks[taskId].taskName
+            testSentences = self.taskManager.tasks[taskId].devSentences
+            if testSentences:
+                self.evaluate(taskId, taskName, testSentences, "testing")
 
     def save(self, baseFilename):
 
@@ -270,7 +271,7 @@ def save(self, baseFilename):
             sd, j_sd = layers.get_state_dict()
             x2i = layers.saveX2i()
             params.append({"model": sd, "x2i": x2i})
-            j_params.append({"model": j_sd, "x2i": x2i})
+            j_params.append({"x2i": x2i})
 
         # torch pickle save
         try:
@@ -290,7 +291,7 @@ def load(cls, modelFilenamePrefix):
         layersSeq = list()
         checkpoint = torch.load(modelFilenamePrefix+".torch")
         for param in checkpoint:
-            layers = loadX2i(param['x2i'])
+            layers = Layers.loadX2i(param['x2i'])
             layers.load_state_dict(param['model'])
             layersSeq.append(layers)
 
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index fc4e1385a..01c37404b 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -18,11 +18,15 @@
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
         modelName = args.model_file
-        print (taskManager.debugTraversal())
 
         mtl = Metal(taskManager, None)
         mtl.train(modelName)
     elif args.test:
-        pass
+        config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
+        taskManager = TaskManager(config, args.seed)
+        modelName = args.model_file
+        model = Metal.load(modelName)
+        mtl = Metal(taskManager, model)
+        mtl.test()
     elif args.shell:
         pass
\ No newline at end of file

From 3ee92feb42d346941e6dd4402a5a3650e2de8523 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 26 Oct 2021 21:55:54 -0700
Subject: [PATCH 028/134] Controlling sources of randomness

---
 main/src/main/python/pytorch/metal.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 57ec114d2..3ee8e605b 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -65,16 +65,21 @@ def mkVocabularies(self):
         return words, labels
 
     def train(self, modelNamePrefix):
+
         learningRate = self.taskManager.get_float("mtl.learningRate", 1e-5)
         trainerType = self.taskManager.get_string("mtl.trainer", "adam")
         batchSize = self.taskManager.get_int("mtl.batchSize", 1)
+
+        torch.manual_seed(self.taskManager.random)
+        random.seed(self.taskManager.random)
+
         assert(batchSize>0)
 
         parameters = list()
         for layers in self.model:
             parameters += layers.get_parameters()
 
-        torch.nn.utils.clip_grad_norm_(parameters, 5)
+        # torch.nn.utils.clip_grad_norm_(parameters, 5)
 
         if trainerType == "adam":
             trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
@@ -259,7 +264,7 @@ def parse(self, sentence, constEmbeddings):
     def test(self):
         for taskId in range(0, self.taskManager.taskCount):
             taskName = self.taskManager.tasks[taskId].taskName
-            testSentences = self.taskManager.tasks[taskId].devSentences
+            testSentences = self.taskManager.tasks[taskId].testSentences
             if testSentences:
                 self.evaluate(taskId, taskName, testSentences, "testing")
 

From a37ef474d48a8cb41ffe9f711628b1b5a8688167 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 27 Oct 2021 00:34:35 -0700
Subject: [PATCH 029/134] missed import...

---
 main/src/main/python/pytorch/metal.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 3ee8e605b..340664028 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -8,6 +8,7 @@
 from torch.optim import SGD, Adam, RMSprop
 
 import json
+import random
 
 class Metal(object):
     """docstring for Metal"""

From 5e9434bc0fd5fa3521d090a20f6f5370e1c56c05 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 27 Oct 2021 16:44:33 -0700
Subject: [PATCH 030/134] debugged for parsing

---
 .../src/main/python/pytorch/embeddingLayer.py |  9 ++++-----
 main/src/main/python/pytorch/forwardLayer.py  | 19 ++++++++++++-------
 main/src/main/python/pytorch/rnnLayer.py      |  3 ++-
 main/src/main/python/pytorch/utils.py         |  4 ++--
 main/src/main/python/sequences/rowReaders.py  |  6 +++++-
 5 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 15242ed7a..d348e3052 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -66,9 +66,8 @@ def __init__(self, w2i, # word to index
         posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0
         neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0
         distanceDim = distanceWindowSize if distanceLookupParameters else 0
-        positionDim = 1 if distanceLookupParameters and useIsPredicate else 0
-        predicateDim = positionEmbeddingSize if positionLookupParameters else 0
-
+        positionDim = 1 if distanceLookupParameters else 0
+        predicateDim = positionEmbeddingSize if positionLookupParameters and useIsPredicate else 0
         self.outDim =    ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
         random.seed(RANDOM_SEED)
     
@@ -119,14 +118,14 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
         #
         # POS tag embedding
         #
-        if tags:
+        if tags and self.posTagLookupParameters:
             posTagEmbed = self.posTagLookupParameters(torch.LongTensor([self.tag2i.get(tag, 0) for tag in tags]))
         else:
             posTagEmbed = None
         #
         # NE tag embedding
         #
-        if nes:
+        if nes and self.neTagLookupParameters:
             neTagEmbed = self.neTagLookupParameters(torch.LongTensor([self.ne2i.get(ne, 0) for ne in nes]))
         else:
             neTagEmbed = None
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index c1910a119..06c29d8ba 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -32,9 +32,9 @@ def pickSpan(self, v):
             # Zheng: Will spans overlap?
             vs = list()
             for span in self.spans:
-                e = torch.index_select(v, 1, torch.tensor([span[0], span[1]]))
+                e = torch.index_select(v, 1, torch.tensor(range(span[0], span[1])))
                 vs.append(e)
-            return torch.cat(vs)
+            return torch.cat(vs, dim=1)
 
     def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
         if not self.isDual:
@@ -65,7 +65,6 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                     predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout)
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    # Zheng: Why not add root node to the input sequence at the beginning?
                     predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout)
                 ss = torch.cat([argExp, predExp])
                 l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
@@ -129,12 +128,18 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize):
             raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!")
     
 def spanLength(spans):
-    sum(end - start for start, end in spans)
+    return sum(end - start for start, end in spans)
 
-def parseSpan(spanParam, inputSize):
+def parseSpan(spanParam, inputSize=None):
     # Zheng: Why do we need inputSize here?
-    token1, token2 = map(int, spanParamToken.split('-'))
-    spans.append((token1, token2))
+    spans = list()
+    spanParamTokens = spanParam.split(",")
+    for spanParamToken in spanParamTokens:
+        # spanTokens = spanParamToken.split('-')
+        # assert(len(spanTokens) == 2)
+        # spans.append((int(spanTokens[0]), int(spanTokens[1])))
+        token1, token2 = map(int, spanParamToken.split('-'))
+        spans.append((token1, token2))
     return spans
 
 def spanToString(spans):
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index 525f5747a..5c8681269 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -29,8 +29,9 @@ def forward(self, inputExpressions, dropout):
         assert(inputExpressions is not None)
 
         States = transduce(inputExpressions, self.wordRnnBuilder)
+        States = States.squeeze(1)
         if self.useHighwayConnections:
-            States = torch.cat([States.squeeze(1), inputExpressions], dim=1)
+            States = torch.cat([States, inputExpressions], dim=1)
 
         return States
 
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index f3e6571a3..25457c88c 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -53,7 +53,7 @@ def readString2Ids(s2iFilename):
     s2i = dict()
     with open(s2iFilename) as f:
         for line in f:
-            if not line.startswith("#"):
+            if not line.startswith("# ") and line.rstrip():
                 k, v = line.strip().split('\t')
                 s2i[k] = int(v)
     return s2i
@@ -62,7 +62,7 @@ def readChar2Ids(s2iFilename):
     s2i = dict()
     with open(s2iFilename) as f:
         for line in f:
-            if not line.startswith("#") and line.rstrip():
+            if not line.startswith("# ") and line.rstrip():
                 k, v = line.strip().split('\t')
                 s2i[chr(int(k))] = int(v)
     return s2i
diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py
index 58a15cb71..0bd68210f 100644
--- a/main/src/main/python/sequences/rowReaders.py
+++ b/main/src/main/python/sequences/rowReaders.py
@@ -68,6 +68,10 @@ def parseFull(self, rows):
         assert(rows[0].length >= 5)
         numSent = (rows[0].length - 3) / 2
         assert(numSent >= 1)
+        assert(numSent==int(numSent))
+        numSent = int(numSent)
+
+
 
         words = list()
         posTags = list()
@@ -85,7 +89,7 @@ def parseFull(self, rows):
                 try:
                     headPositions[j] += [int(row.get(self.LABEL_START_OFFSET + (j * 2) + 1))]
                 except:
-                    raise RuntimeError # not sure about this part
+                    raise RuntimeError 
 
         sentences = list()
         for i in range(numSent):

From ce00bd7e3648cc7118c0eabb5e8867870e75695c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 27 Oct 2021 20:12:33 -0700
Subject: [PATCH 031/134] fixed bugs for parsing

---
 main/src/main/python/pytorch/embeddingLayer.py | 10 +++++-----
 main/src/main/python/pytorch/forwardLayer.py   |  6 +++---
 main/src/main/python/sequences/rowReaders.py   |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index d348e3052..14cca2d52 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -15,6 +15,7 @@
 DEFAULT_POSITION_EMBEDDING_SIZE: int = -1 # no position embeddings by default
 DEFAULT_DISTANCE_WINDOW_SIZE: int = -1
 DEFAULT_USE_IS_PREDICATE: int = -1
+random.seed(RANDOM_SEED)
 
 class EmbeddingLayer(InitialLayer):
     def __init__(self, w2i, # word to index
@@ -66,10 +67,10 @@ def __init__(self, w2i, # word to index
         posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0
         neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0
         distanceDim = distanceWindowSize if distanceLookupParameters else 0
-        positionDim = 1 if distanceLookupParameters else 0
-        predicateDim = positionEmbeddingSize if positionLookupParameters and useIsPredicate else 0
-        self.outDim =    ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
-        random.seed(RANDOM_SEED)
+        predicateDim = 1 if distanceLookupParameters and useIsPredicate else 0
+        positionDim = positionEmbeddingSize if positionLookupParameters else 0
+        self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
+        
     
     def forward(self, sentence, constEmbeddings, doDropout):
 
@@ -276,7 +277,6 @@ def initialize(cls, config, paramPrefix, wordCounter):
         charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         if(posTagEmbeddingSize > 0):
-
             tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt"))
             posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize)
         else:
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 06c29d8ba..95cd36c5c 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -59,13 +59,13 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                argExp = expressionDropout(pickSpan(e), self.dropoutProb, doDropout)
+                argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout)
+                    predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition]), self.dropout, doDropout)
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout)
+                    predExp = expressionDropout(self.pickSpan(self.pRoot), self.dropout, doDropout)
                 ss = torch.cat([argExp, predExp])
                 l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
                 if nonlinearity == NONLIN_TANH:
diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py
index 0bd68210f..434469e92 100644
--- a/main/src/main/python/sequences/rowReaders.py
+++ b/main/src/main/python/sequences/rowReaders.py
@@ -61,7 +61,7 @@ def parseSimpleExtended(self, rows):
             neLabels += [row.get(self.NE_LABEL_POSITION)]
             labels += [row.get(self.LABEL_START_OFFSET)]
 
-        return [(AnnotatedSentence(words), posTags, neLabels, labels)]
+        return [(AnnotatedSentence(words, posTags, neLabels), labels)]
 
     # Parser for the full format: word, POS tag, NE label, (label head)+ 
     def parseFull(self, rows):

From d00c087fe90307dae77bce87bda88b8f3b063ed5 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 27 Oct 2021 21:44:47 -0700
Subject: [PATCH 032/134] export model to onnx

---
 main/src/main/python/pytorch/pytorch2onnx.py | 29 ++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 main/src/main/python/pytorch/pytorch2onnx.py

diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py
new file mode 100644
index 000000000..a1b620c03
--- /dev/null
+++ b/main/src/main/python/pytorch/pytorch2onnx.py
@@ -0,0 +1,29 @@
+import torch
+
+from pytorch.metal import Metal
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    args = parser.parse_args()
+    modelName = args.model_file
+    model = Metal.load(modelName)
+
+    input_names = [ "input" ]
+    output_names = [ "output" ]
+    
+    #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
+    for i, layers in enumerate(model):
+        if layers.initialLayer is not None:
+            #export the initial layer
+            dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though...
+            torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names)
+            dummy_input = layers.initialLayer(sentence, embeddings)
+        for j, il in enumerate(layers.intermediateLayers):
+            #export the intermediate layer layer
+            torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names, output_names=output_names)
+            dummy_input = il(dummy_input)
+        if layers.finalLayer is not None:
+            #export the final layer
+            torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names)
\ No newline at end of file

From ee1c6ddbfe3382868bbcb76780879258cded9dfe Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 27 Oct 2021 21:53:01 -0700
Subject: [PATCH 033/134] specified input and output names

---
 main/src/main/python/pytorch/pytorch2onnx.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py
index a1b620c03..6b8a1a8fd 100644
--- a/main/src/main/python/pytorch/pytorch2onnx.py
+++ b/main/src/main/python/pytorch/pytorch2onnx.py
@@ -10,20 +10,21 @@
     modelName = args.model_file
     model = Metal.load(modelName)
 
-    input_names = [ "input" ]
-    output_names = [ "output" ]
-    
     #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
     for i, layers in enumerate(model):
         if layers.initialLayer is not None:
             #export the initial layer
+            input_names_1 = ["sentence", "const embeddings"]
+            output_names_1 = [ "embeddings" ]
             dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though...
-            torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names)
+            torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_1, output_names=output_names_1)
             dummy_input = layers.initialLayer(sentence, embeddings)
         for j, il in enumerate(layers.intermediateLayers):
             #export the intermediate layer layer
-            torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names, output_names=output_names)
+            input_names_2 = ["input", "dropout"]
+            output_names_2 = [ "output" ]
+            torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names_2, output_names=output_names_2)
             dummy_input = il(dummy_input)
         if layers.finalLayer is not None:
             #export the final layer
-            torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names)
\ No newline at end of file
+            torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_2, output_names=output_names_2)
\ No newline at end of file

From 3f89fa76ce2cf32d5b828081846926f978eedd0c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 28 Oct 2021 09:44:43 -0700
Subject: [PATCH 034/134] fixed bug in saving x2i

---
 main/src/main/python/pytorch/greedyForwardLayer.py  | 2 +-
 main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index 556559cf4..b913c10d5 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -15,7 +15,7 @@ def saveX2i(self):
         x2i["inferenceType"] = TYPE_GREEDY
         x2i["inputSize"] = self.inputSize
         x2i["isDual"] = 1 if self.isDual else 0
-        x2i["span"] = spanToString(span) if self.spans else ""
+        x2i["span"] = spanToString(self.spans) if self.spans else ""
         x2i["nonlinearity"] = self.nonlinearity
         x2i["t2i"] = self.t2i
         x2i["dropoutProb"] = self.dropoutProb
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 1952666b0..5aa9e6669 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -111,7 +111,7 @@ def saveX2i(self):
         x2i["inferenceType"] = TYPE_GREEDY
         x2i["inputSize"] = self.inputSize
         x2i["isDual"] = 1 if self.isDual else 0
-        x2i["span"] = spanToString(span) if self.spans else ""
+        x2i["span"] = spanToString(self.spans) if self.spans else ""
         x2i["nonlinearity"] = self.nonlinearity
         x2i["t2i"] = self.t2i
         x2i["dropoutProb"] = self.dropoutProb

From 96d7dcfa6e9b61fa09b53a95bbc816c75a9d3cd0 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 28 Oct 2021 21:03:26 -0700
Subject: [PATCH 035/134] fixed some bugs

also trying the new weight decay strategy: (rule out biases and norms)
---
 main/src/main/python/pytorch/forwardLayer.py | 18 +++++++++---------
 main/src/main/python/pytorch/layers.py       | 16 ++++++++++++----
 main/src/main/python/pytorch/metal.py        |  8 ++++----
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 95cd36c5c..145a76d1a 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -25,21 +25,21 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
         self.outDim = len(t2i)
 
 
-    def pickSpan(self, v):
+    def pickSpan(self, v, i):
         if self.spans is None:
             return v
         else:
             # Zheng: Will spans overlap?
             vs = list()
             for span in self.spans:
-                e = torch.index_select(v, 1, torch.tensor(range(span[0], span[1])))
+                e = torch.index_select(v, i, torch.tensor(range(span[0], span[1])))
                 vs.append(e)
-            return torch.cat(vs, dim=1)
+            return torch.cat(vs, dim=i)
 
     def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            argExp = expressionDropout(self.pickSpan(inputExpressions), self.dropoutProb, doDropout)
+            argExp = expressionDropout(self.pickSpan(inputExpressions, 1), self.dropoutProb, doDropout)
             emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
@@ -59,18 +59,18 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
+                argExp = expressionDropout(self.pickSpan(e, 0), self.dropoutProb, doDropout)
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition]), self.dropout, doDropout)
+                    predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition], 0), self.dropoutProb, doDropout)
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    predExp = expressionDropout(self.pickSpan(self.pRoot), self.dropout, doDropout)
+                    predExp = expressionDropout(self.pickSpan(self.pRoot, 0), self.dropoutProb, doDropout)
                 ss = torch.cat([argExp, predExp])
                 l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
-                if nonlinearity == NONLIN_TANH:
+                if self.nonlinearity == NONLIN_TANH:
                     l1 = F.tanh(l1)
-                elif nonlinearity == NONLIN_RELU:
+                elif self.nonlinearity == NONLIN_RELU:
                     l1 = F.relu(l1)
                 emissionScores.append(l1)
             emissionScores = torch.stack(emissionScores)
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 7447c79e3..ec79b7d02 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -41,12 +41,20 @@ def __str__(self):
     def get_parameters(self):
         parameters = list()
         if self.initialLayer is not None:
-            parameters += [p for p in self.initialLayer.parameters() if p.requires_grad]
+            parameters += [p for p in self.initialLayer.named_parameters() if p.requires_grad]
         for il in self.intermediateLayers:
-            parameters += [p for p in il.parameters() if p.requires_grad]
+            parameters += [p for p in il.named_parameters() if p.requires_grad]
         if self.finalLayer is not None:
-            parameters += [p for p in self.finalLayer.parameters() if p.requires_grad]
-        return parameters
+            parameters += [p for p in self.finalLayer.named_parameters() if p.requires_grad]
+        
+        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
+        optimizer_grouped_parameters = [
+            {'params': [p for n, p in parameters
+                        if not any(nd in n for nd in no_decay)], 'weight_decay': WEIGHT_DECAY},
+            {'params': [p for n, p in parameters
+                        if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
+        ]
+        return optimizer_grouped_parameters
 
     def start_train(self):
         if self.initialLayer is not None:
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 340664028..07350ba30 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -80,14 +80,14 @@ def train(self, modelNamePrefix):
         for layers in self.model:
             parameters += layers.get_parameters()
 
-        # torch.nn.utils.clip_grad_norm_(parameters, 5)
+        torch.nn.utils.clip_grad_norm_(parameters, 0.01)
 
         if trainerType == "adam":
-            trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
+            trainer = Adam(parameters, lr=learningRate)
         elif trainerType == "rmsprop":
-            trainer = RMSprop(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
+            trainer = RMSprop(parameters, lr=learningRate)
         elif trainerType == "sgd":
-            trainer = SDG(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY)
+            trainer = SDG(parameters, lr=learningRate)
         else:
             raise RuntimeError(f"ERROR: unknown trainer {trainerType}!")
 

From 6a37769c5ad76c547f0dc1a5c2796090a10dbd11 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 28 Oct 2021 21:38:18 -0700
Subject: [PATCH 036/134] remove clipping

---
 main/src/main/python/pytorch/layers.py | 10 +++++-----
 main/src/main/python/pytorch/metal.py  |  2 --
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index ec79b7d02..4acf2f2cf 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -41,18 +41,18 @@ def __str__(self):
     def get_parameters(self):
         parameters = list()
         if self.initialLayer is not None:
-            parameters += [p for p in self.initialLayer.named_parameters() if p.requires_grad]
+            parameters += [p for p in self.initialLayer.named_parameters()]
         for il in self.intermediateLayers:
-            parameters += [p for p in il.named_parameters() if p.requires_grad]
+            parameters += [p for p in il.named_parameters()]
         if self.finalLayer is not None:
-            parameters += [p for p in self.finalLayer.named_parameters() if p.requires_grad]
+            parameters += [p for p in self.finalLayer.named_parameters()]
         
         no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
         optimizer_grouped_parameters = [
             {'params': [p for n, p in parameters
-                        if not any(nd in n for nd in no_decay)], 'weight_decay': WEIGHT_DECAY},
+                        if not any(nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': WEIGHT_DECAY},
             {'params': [p for n, p in parameters
-                        if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
+                        if any(nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': 0.0}
         ]
         return optimizer_grouped_parameters
 
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 07350ba30..309b13554 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -80,8 +80,6 @@ def train(self, modelNamePrefix):
         for layers in self.model:
             parameters += layers.get_parameters()
 
-        torch.nn.utils.clip_grad_norm_(parameters, 0.01)
-
         if trainerType == "adam":
             trainer = Adam(parameters, lr=learningRate)
         elif trainerType == "rmsprop":

From e577f3c8eba652906b921de9371024a13238a6cf Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 3 Nov 2021 15:27:48 -0700
Subject: [PATCH 037/134] add scheduler

---
 main/src/main/python/pytorch/metal.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 309b13554..127e94207 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -6,6 +6,7 @@
 from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 
 from torch.optim import SGD, Adam, RMSprop
+from torch.optim.lr_scheduler import *
 
 import json
 import random
@@ -89,6 +90,8 @@ def train(self, modelNamePrefix):
         else:
             raise RuntimeError(f"ERROR: unknown trainer {trainerType}!")
 
+        scheduler = ExponentialLR(trainer, gamma=0.9)
+
         reader = MetalRowReader()
 
         cummulativeLoss = 0.0
@@ -154,6 +157,7 @@ def train(self, modelNamePrefix):
                 trainer.step()
                 batchLoss = 0
                 i = 0
+            scheduler.step()
 
             # check dev performance in this epoch, for all tasks
             totalAcc = 0.0

From c8ec489c602cc1b9188d85b9741e570c270263bf Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 16 Nov 2021 00:49:03 -0700
Subject: [PATCH 038/134] use xavier uniform to initialize weights

---
 main/src/main/python/pytorch/embeddingLayer.py | 16 ++++++++++++++--
 main/src/main/python/pytorch/forwardLayer.py   |  3 ++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 14cca2d52..502bb21bb 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -269,16 +269,19 @@ def initialize(cls, config, paramPrefix, wordCounter):
         w2i = {w:i for i, w in enumerate(wordList)}
 
         wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
+        nn.init.xavier_uniform_(wordLookupParameters.weight)
 
         c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt")
         c2i = readChar2Ids(c2iFilename)
 
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
+        nn.init.xavier_uniform_(charLookupParameters.weight)
         charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         if(posTagEmbeddingSize > 0):
             tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt"))
             posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize)
+            nn.init.xavier_uniform_(posTagLookupParameters.weight)
         else:
             tag2i = None
             posTagLookupParameters = None
@@ -290,8 +293,17 @@ def initialize(cls, config, paramPrefix, wordCounter):
             ne2i = None
             neTagLookupParameters = None
 
-        distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None
-        positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None
+        if distanceEmbeddingSize > 0:
+          distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize)
+          nn.init.xavier_uniform_(distanceLookupParameters.weight)
+        else:
+          distanceLookupParameters = None
+
+        if positionEmbeddingSize > 0:
+          positionLookupParameters = nn.Embedding(101, positionEmbeddingSize)
+          nn.init.xavier_uniform_(positionLookupParameters.weight)
+        else:
+          positionLookupParameters = None
 
         return cls(w2i, wordCounter, c2i, tag2i, ne2i,
                   learnedWordEmbeddingSize,
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 145a76d1a..2241db86a 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -1,5 +1,5 @@
 import torch
-import torch.nn
+import torch.nn as nn
 from torch.autograd import Variable
 import torch.nn.functional as F
 
@@ -18,6 +18,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
         self.nonlinearity = nonlinearity
 
         self.pH = nn.Linear(actualInputSize, len(t2i))
+        nn.init.xavier_uniform_(self.pH.weight)
         self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here
         self.dropoutProb = dropoutProb
 

From 1a9d4bdab0276485f9ff64ac68e9af98961626b5 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 16 Nov 2021 10:57:04 -0700
Subject: [PATCH 039/134] Update metal.py

---
 main/src/main/python/pytorch/metal.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 127e94207..298320d08 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -265,6 +265,10 @@ def parse(self, sentence, constEmbeddings):
         Layers.parse(self.model, sentence, constEmbeddings)
 
     def test(self):
+
+        torch.manual_seed(self.taskManager.random)
+        random.seed(self.taskManager.random)
+
         for taskId in range(0, self.taskManager.taskCount):
             taskName = self.taskManager.tasks[taskId].taskName
             testSentences = self.taskManager.tasks[taskId].testSentences

From dc4e77b84595d2323838aceb2c43ae55f973bd4b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 1 Dec 2021 20:38:12 -0700
Subject: [PATCH 040/134] convert layers to a single NN module to save it to
 onnx

---
 main/src/main/python/pytorch/pytorch2onnx.py | 56 +++++++++++++-------
 main/src/main/python/pytorch/utils.py        |  8 +++
 2 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py
index 6b8a1a8fd..5b749ca7a 100644
--- a/main/src/main/python/pytorch/pytorch2onnx.py
+++ b/main/src/main/python/pytorch/pytorch2onnx.py
@@ -1,6 +1,39 @@
 import torch
 
 from pytorch.metal import Metal
+from pytorch.utils import mkCharacterEmbedding2
+
+class Saving_Model(nn.Module):
+    """docstring for Saving_Model"""
+    def __init__(self, model, constEmbeddings):
+        super().__init__()
+        self.model = model
+        for layers in model:
+            layers.start_eval()
+        self.constEmbeddings = constEmbeddings
+        self.initialLayers = [None for _ in range(len(model))]
+        for i, layers in enumerate(model):
+            if layers.initialLayer is not None:
+                self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters,
+                                         "charLookupParameters":layers.initialLayer.charLookupParameters,
+                                         "charRnnBuilder":layers.initialLayer.charRnnBuilder}
+        
+    def forward(self, word_ids, char_ids_list):
+
+        #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
+        for i, layers in enumerate(self.model):
+            if self.initialLayers[i]:
+                embeddings = constEmbeddings.emb(idxs)
+                learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids)
+                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list])
+                embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+                embedParts = [ep for ep in embedParts if ep is not None]
+                embed = torch.cat(embedParts, dim=1)
+            for j, il in enumerate(layers.intermediateLayers):
+                dummy_input = il(dummy_input)
+            if layers.finalLayer is not None:
+                output = layers.finalLayer(dummy_input)
+        return output
 
 if __name__ == '__main__':
 
@@ -10,21 +43,8 @@
     modelName = args.model_file
     model = Metal.load(modelName)
 
-    #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
-    for i, layers in enumerate(model):
-        if layers.initialLayer is not None:
-            #export the initial layer
-            input_names_1 = ["sentence", "const embeddings"]
-            output_names_1 = [ "embeddings" ]
-            dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though...
-            torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_1, output_names=output_names_1)
-            dummy_input = layers.initialLayer(sentence, embeddings)
-        for j, il in enumerate(layers.intermediateLayers):
-            #export the intermediate layer layer
-            input_names_2 = ["input", "dropout"]
-            output_names_2 = [ "output" ]
-            torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names_2, output_names=output_names_2)
-            dummy_input = il(dummy_input)
-        if layers.finalLayer is not None:
-            #export the final layer
-            torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_2, output_names=output_names_2)
\ No newline at end of file
+    export_model = Saving_Model(model)
+
+    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names_2, output_names=output_names_2)
+
+    
\ No newline at end of file
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 25457c88c..b6218ba2f 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -49,6 +49,14 @@ def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
     return result
 
+def mkCharacterEmbedding2(char_ids, charLookupParameters, charRnnBuilder):
+    hidden_dim = charRnnBuilder.hidden_size
+    charEmbeddings = charLookupParameters(char_ids)
+    output = transduce(charEmbeddings, charRnnBuilder)
+    result = output.squeeze(1)[-1]
+    # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
+    return result
+
 def readString2Ids(s2iFilename):
     s2i = dict()
     with open(s2iFilename) as f:

From 86f12505c049dcbaa183d3ec8120750912738d4f Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 1 Dec 2021 21:58:02 -0700
Subject: [PATCH 041/134] get dummy input

---
 main/src/main/python/pytorch/pytorch2onnx.py | 46 +++++++++++++++++---
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py
index 5b749ca7a..5a9cae82e 100644
--- a/main/src/main/python/pytorch/pytorch2onnx.py
+++ b/main/src/main/python/pytorch/pytorch2onnx.py
@@ -1,15 +1,15 @@
 import torch
 
 from pytorch.metal import Metal
-from pytorch.utils import mkCharacterEmbedding2
+from pytorch.utils import *
+from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
+from sequences.rowReaders import *
 
 class Saving_Model(nn.Module):
     """docstring for Saving_Model"""
     def __init__(self, model, constEmbeddings):
         super().__init__()
         self.model = model
-        for layers in model:
-            layers.start_eval()
         self.constEmbeddings = constEmbeddings
         self.initialLayers = [None for _ in range(len(model))]
         for i, layers in enumerate(model):
@@ -39,12 +39,48 @@ def forward(self, word_ids, char_ids_list):
 
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--config', type=str, help='Filename of the configuration.')
+    parser.add_argument('--seed', type=int, default=1234)
     args = parser.parse_args()
+
+    config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
+    taskManager = TaskManager(config, args.seed)
     modelName = args.model_file
     model = Metal.load(modelName)
+    for layers in model:
+        layers.start_eval()
+    constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
+    export_model = Saving_Model(model, constEmbeddings)
+    export_model.eval()
+
+    torch.manual_seed(taskManager.random)
+    random.seed(taskManager.random)
+
+    for i, layers in enumerate(model):
+        if layers.initialLayer is not None:
+            c2i = layers.initialLayer.c2i
+
+    for taskId in range(0, taskManager.taskCount):
+        taskName = taskManager.tasks[taskId].taskName
+        testSentences = taskManager.tasks[taskId].testSentences
+        if testSentences:
+            reader = MetalRowReader()
+            annotatedSentences = reader.toAnnotatedSentences(testSentences[0])
+
+            asent = annotatedSentences[0]
+            sentence = asent[0]
+            goldLabels = asent[1]
+
+            words = sentence.words
+
+            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words]
+
+            dummy_input = [word_ids, char_ids_list]
 
-    export_model = Saving_Model(model)
+            input_names = [ "input1", "input2" ]
+            output_names = [ "output" ]
 
-    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names_2, output_names=output_names_2)
+    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)
 
     
\ No newline at end of file

From 3e1fd11779b55d4c055c625424a9e32f6ee91631 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 1 Dec 2021 23:28:25 -0700
Subject: [PATCH 042/134] Create pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 90 ++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 main/src/main/python/pytorch2onnx.py

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
new file mode 100644
index 000000000..fe3426678
--- /dev/null
+++ b/main/src/main/python/pytorch2onnx.py
@@ -0,0 +1,90 @@
+import torch
+import argparse
+from pyhocon import ConfigFactory
+import random
+
+from pytorch.taskManager import TaskManager
+from pytorch.metal import Metal
+from pytorch.utils import *
+from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
+from sequences.rowReaders import *
+
+class Saving_Model(nn.Module):
+    """docstring for Saving_Model"""
+    def __init__(self, model, constEmbeddings):
+        super().__init__()
+        self.model = model
+        self.constEmbeddings = constEmbeddings
+        self.initialLayers = [None for _ in range(len(model))]
+        for i, layers in enumerate(model):
+            if layers.initialLayer is not None:
+                self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters,
+                                         "charLookupParameters":layers.initialLayer.charLookupParameters,
+                                         "charRnnBuilder":layers.initialLayer.charRnnBuilder}
+        
+    def forward(self, word_ids, char_ids_list):
+
+        #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
+        for i, layers in enumerate(self.model):
+            if self.initialLayers[i]:
+                embeddings = constEmbeddings.emb(idxs)
+                learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids)
+                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list])
+                embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+                embedParts = [ep for ep in embedParts if ep is not None]
+                embed = torch.cat(embedParts, dim=1)
+            for j, il in enumerate(layers.intermediateLayers):
+                dummy_input = il(dummy_input)
+            if layers.finalLayer is not None:
+                output = layers.finalLayer(dummy_input)
+        return output
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--config', type=str, help='Filename of the configuration.')
+    parser.add_argument('--seed', type=int, default=1234)
+    args = parser.parse_args()
+
+    config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
+    taskManager = TaskManager(config, args.seed)
+    modelName = args.model_file
+    model = Metal.load(modelName)
+    for layers in model:
+        layers.start_eval()
+    constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
+    export_model = Saving_Model(model, constEmbeddings)
+    export_model.eval()
+
+    torch.manual_seed(taskManager.random)
+    random.seed(taskManager.random)
+
+    for i, layers in enumerate(model):
+        if layers.initialLayer is not None:
+            c2i = layers.initialLayer.c2i
+
+    for taskId in range(0, taskManager.taskCount):
+        taskName = taskManager.tasks[taskId].taskName
+        testSentences = taskManager.tasks[taskId].testSentences
+        if testSentences:
+            reader = MetalRowReader()
+            annotatedSentences = reader.toAnnotatedSentences(testSentences[0])
+
+            asent = annotatedSentences[0]
+            sentence = asent[0]
+            goldLabels = asent[1]
+
+            words = sentence.words
+
+            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words]
+
+            dummy_input = [word_ids, char_ids_list]
+
+            input_names = [ "input1", "input2" ]
+            output_names = [ "output" ]
+
+    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)
+
+    

From d9ed82c99d89a917d49e9adf5af7e11e42809589 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 1 Dec 2021 23:28:54 -0700
Subject: [PATCH 043/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index fe3426678..880fafd82 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -78,7 +78,7 @@ def forward(self, word_ids, char_ids_list):
             words = sentence.words
 
             word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words]
+            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) for word in words]
 
             dummy_input = [word_ids, char_ids_list]
 

From f29403e2ce6ab9ca805f4800447e03ef0bca5de6 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 1 Dec 2021 23:59:31 -0700
Subject: [PATCH 044/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 30 +++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 880fafd82..49bb86aac 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -22,21 +22,21 @@ def __init__(self, model, constEmbeddings):
                                          "charLookupParameters":layers.initialLayer.charLookupParameters,
                                          "charRnnBuilder":layers.initialLayer.charRnnBuilder}
         
-    def forward(self, word_ids, char_ids_list):
-
+    def forward(self, input_list):
+        word_ids, char_ids_list = input_list
         #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
         for i, layers in enumerate(self.model):
             if self.initialLayers[i]:
-                embeddings = constEmbeddings.emb(idxs)
-                learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids)
-                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list])
+                embeddings = constEmbeddings.emb(word_ids)
+                learnedWordEmbeddings = self.initialLayers[i]["wordLookupParameters"](word_ids)
+                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i]["charLookupParameters"], self.initialLayers[i]["charRnnBuilder"]) for char_ids in char_ids_list])
                 embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
                 embedParts = [ep for ep in embedParts if ep is not None]
                 embed = torch.cat(embedParts, dim=1)
             for j, il in enumerate(layers.intermediateLayers):
-                dummy_input = il(dummy_input)
+                output = il(embed, False)
             if layers.finalLayer is not None:
-                output = layers.finalLayer(dummy_input)
+                output = layers.finalLayer(output, False, None)#headPositions set to be None for now, we can add it in input list later
         return output
 
 if __name__ == '__main__':
@@ -54,6 +54,7 @@ def forward(self, word_ids, char_ids_list):
     for layers in model:
         layers.start_eval()
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
+
     export_model = Saving_Model(model, constEmbeddings)
     export_model.eval()
 
@@ -82,9 +83,20 @@ def forward(self, word_ids, char_ids_list):
 
             dummy_input = [word_ids, char_ids_list]
 
-            input_names = [ "input1", "input2" ]
+            output = export_model(dummy_input)
+
+            input_names = [ "input_list"]
             output_names = [ "output" ]
 
-    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)
+    torch.onnx.export(export_model,               # model being run
+                  dummy_input,                         # model input (or a tuple for multiple inputs)
+                  "model.onnx",   # where to save the model (can be a file or file-like object)
+                  export_params=True,        # store the trained parameter weights inside the model file
+                  opset_version=10,          # the ONNX version to export the model to
+                  do_constant_folding=True,  # whether to execute constant folding for optimization
+                  input_names = ['input'],   # the model's input names
+                  output_names = ['output'], # the model's output names
+                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
+                                'output' : {0 : 'batch_size'}})
 
     

From b36cbabaab153cbb7ba45157b7e3357526219079 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 2 Dec 2021 09:53:13 -0700
Subject: [PATCH 045/134] converted the list in the model to nnModuleList

---
 main/src/main/python/pytorch2onnx.py | 44 ++++++++++++++++------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 49bb86aac..ad8260449 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -9,34 +9,40 @@
 from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 from sequences.rowReaders import *
 
-class Saving_Model(nn.Module):
+class Saving_Model(torch.nn.Module):
     """docstring for Saving_Model"""
     def __init__(self, model, constEmbeddings):
         super().__init__()
-        self.model = model
+        self.model_length = len(model)
         self.constEmbeddings = constEmbeddings
-        self.initialLayers = [None for _ in range(len(model))]
+        self.initialLayers = [None for _ in range(self.model_length)]
+        self.intermediateLayerss = [None for _ in range(self.model_length)]
+        self.finalLayers = [None for _ in range(self.model_length)]
         for i, layers in enumerate(model):
             if layers.initialLayer is not None:
-                self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters,
-                                         "charLookupParameters":layers.initialLayer.charLookupParameters,
-                                         "charRnnBuilder":layers.initialLayer.charRnnBuilder}
-        
+                self.initialLayers[i] = nn.ModuleList([layers.initialLayer.wordLookupParameters,
+                                         layers.initialLayer.charLookupParameters,
+                                         layers.initialLayer.charRnnBuilder])
+            self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers)
+            self.finalLayers[i] = layers.finalLayer
+        self.initialLayers = nn.ModuleList(self.initialLayers)
+        self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
+        self.finalLayers = nn.ModuleList(self.finalLayers)
     def forward(self, input_list):
         word_ids, char_ids_list = input_list
         #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
-        for i, layers in enumerate(self.model):
+        for i in range(self.model_length):
             if self.initialLayers[i]:
                 embeddings = constEmbeddings.emb(word_ids)
-                learnedWordEmbeddings = self.initialLayers[i]["wordLookupParameters"](word_ids)
-                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i]["charLookupParameters"], self.initialLayers[i]["charRnnBuilder"]) for char_ids in char_ids_list])
+                learnedWordEmbeddings = self.initialLayers[i][0](word_ids)
+                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i][1], self.initialLayers[i][2]) for char_ids in char_ids_list])
                 embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
                 embedParts = [ep for ep in embedParts if ep is not None]
                 embed = torch.cat(embedParts, dim=1)
-            for j, il in enumerate(layers.intermediateLayers):
+            for il in self.intermediateLayerss[i]:
                 output = il(embed, False)
-            if layers.finalLayer is not None:
-                output = layers.finalLayer(output, False, None)#headPositions set to be None for now, we can add it in input list later
+            if self.finalLayers[i]:
+                output = self.finalLayers[i](output, False, None)#headPositions set to be None for now, we can add it in input list later
         return output
 
 if __name__ == '__main__':
@@ -57,6 +63,8 @@ def forward(self, input_list):
 
     export_model = Saving_Model(model, constEmbeddings)
     export_model.eval()
+    for param in export_model.parameters():
+        param.requires_grad = False
 
     torch.manual_seed(taskManager.random)
     random.seed(taskManager.random)
@@ -78,14 +86,14 @@ def forward(self, input_list):
 
             words = sentence.words
 
-            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) for word in words]
+            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach()
+            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]).detach() for word in words]
 
             dummy_input = [word_ids, char_ids_list]
 
             output = export_model(dummy_input)
 
-            input_names = [ "input_list"]
+            input_names = ["input_list"]
             output_names = [ "output" ]
 
     torch.onnx.export(export_model,               # model being run
@@ -94,9 +102,9 @@ def forward(self, input_list):
                   export_params=True,        # store the trained parameter weights inside the model file
                   opset_version=10,          # the ONNX version to export the model to
                   do_constant_folding=True,  # whether to execute constant folding for optimization
-                  input_names = ['input'],   # the model's input names
+                  input_names  = ['input'],   # the model's input names
                   output_names = ['output'], # the model's output names
-                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
+                  dynamic_axes = {'input' : {0 : 'batch_size'},    # variable length axes
                                 'output' : {0 : 'batch_size'}})
 
     

From 045c581a6db55f5c25d85ab1e3ed24d682ff6cfa Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 2 Dec 2021 09:54:34 -0700
Subject: [PATCH 046/134] remove the redundant

---
 main/src/main/python/pytorch/pytorch2onnx.py | 86 --------------------
 1 file changed, 86 deletions(-)
 delete mode 100644 main/src/main/python/pytorch/pytorch2onnx.py

diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py
deleted file mode 100644
index 5a9cae82e..000000000
--- a/main/src/main/python/pytorch/pytorch2onnx.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import torch
-
-from pytorch.metal import Metal
-from pytorch.utils import *
-from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
-from sequences.rowReaders import *
-
-class Saving_Model(nn.Module):
-    """docstring for Saving_Model"""
-    def __init__(self, model, constEmbeddings):
-        super().__init__()
-        self.model = model
-        self.constEmbeddings = constEmbeddings
-        self.initialLayers = [None for _ in range(len(model))]
-        for i, layers in enumerate(model):
-            if layers.initialLayer is not None:
-                self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters,
-                                         "charLookupParameters":layers.initialLayer.charLookupParameters,
-                                         "charRnnBuilder":layers.initialLayer.charRnnBuilder}
-        
-    def forward(self, word_ids, char_ids_list):
-
-        #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
-        for i, layers in enumerate(self.model):
-            if self.initialLayers[i]:
-                embeddings = constEmbeddings.emb(idxs)
-                learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids)
-                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list])
-                embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
-                embedParts = [ep for ep in embedParts if ep is not None]
-                embed = torch.cat(embedParts, dim=1)
-            for j, il in enumerate(layers.intermediateLayers):
-                dummy_input = il(dummy_input)
-            if layers.finalLayer is not None:
-                output = layers.finalLayer(dummy_input)
-        return output
-
-if __name__ == '__main__':
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_file', type=str, help='Filename of the model.')
-    parser.add_argument('--config', type=str, help='Filename of the configuration.')
-    parser.add_argument('--seed', type=int, default=1234)
-    args = parser.parse_args()
-
-    config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
-    taskManager = TaskManager(config, args.seed)
-    modelName = args.model_file
-    model = Metal.load(modelName)
-    for layers in model:
-        layers.start_eval()
-    constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
-    export_model = Saving_Model(model, constEmbeddings)
-    export_model.eval()
-
-    torch.manual_seed(taskManager.random)
-    random.seed(taskManager.random)
-
-    for i, layers in enumerate(model):
-        if layers.initialLayer is not None:
-            c2i = layers.initialLayer.c2i
-
-    for taskId in range(0, taskManager.taskCount):
-        taskName = taskManager.tasks[taskId].taskName
-        testSentences = taskManager.tasks[taskId].testSentences
-        if testSentences:
-            reader = MetalRowReader()
-            annotatedSentences = reader.toAnnotatedSentences(testSentences[0])
-
-            asent = annotatedSentences[0]
-            sentence = asent[0]
-            goldLabels = asent[1]
-
-            words = sentence.words
-
-            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words]
-
-            dummy_input = [word_ids, char_ids_list]
-
-            input_names = [ "input1", "input2" ]
-            output_names = [ "output" ]
-
-    torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)
-
-    
\ No newline at end of file

From de010c3bfb39215c9c40000e0ce757d8b395664c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 6 Dec 2021 12:32:58 -0700
Subject: [PATCH 047/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index ad8260449..d73bb19e9 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -9,6 +9,9 @@
 from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 from sequences.rowReaders import *
 
+import onnx
+import onnxruntime
+
 class Saving_Model(torch.nn.Module):
     """docstring for Saving_Model"""
     def __init__(self, model, constEmbeddings):
@@ -107,4 +110,21 @@ def forward(self, input_list):
                   dynamic_axes = {'input' : {0 : 'batch_size'},    # variable length axes
                                 'output' : {0 : 'batch_size'}})
 
+    onnx_model = onnx.load("model.onnx")
+    onnx.checker.check_model(onnx_model)
+
+    ort_session = onnxruntime.InferenceSession("model.onnx")
+
+    def to_numpy(tensor):
+        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+
+    # compute ONNX Runtime output prediction
+    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
+    ort_outs = ort_session.run(None, ort_inputs)
+
+    # compare ONNX Runtime and PyTorch results
+    np.testing.assert_allclose(to_numpy(output), ort_outs[0], rtol=1e-03, atol=1e-05)
+
+    print("Exported model has been tested with ONNXRuntime, and the result looks good!")
+
     

From 78c2f2743016b61aea5bda25c29982f6f9d10b28 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 6 Dec 2021 14:20:52 -0700
Subject: [PATCH 048/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index d73bb19e9..122555729 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -119,7 +119,7 @@ def to_numpy(tensor):
         return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
 
     # compute ONNX Runtime output prediction
-    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
+    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
 
     # compare ONNX Runtime and PyTorch results

From 2347df5a11cde43623855767bf494048a8a15b13 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 6 Dec 2021 15:28:40 -0700
Subject: [PATCH 049/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 122555729..fa47c190b 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -31,8 +31,7 @@ def __init__(self, model, constEmbeddings):
         self.initialLayers = nn.ModuleList(self.initialLayers)
         self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
         self.finalLayers = nn.ModuleList(self.finalLayers)
-    def forward(self, input_list):
-        word_ids, char_ids_list = input_list
+    def forward(self, word_ids, char_ids_list):
         #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
         for i in range(self.model_length):
             if self.initialLayers[i]:
@@ -90,14 +89,11 @@ def forward(self, input_list):
             words = sentence.words
 
             word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach()
-            char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]).detach() for word in words]
+            char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]).detach()
 
-            dummy_input = [word_ids, char_ids_list]
+            dummy_input = (word_ids, char_ids_list)
 
-            output = export_model(dummy_input)
-
-            input_names = ["input_list"]
-            output_names = [ "output" ]
+            output = export_model(word_ids, char_ids_list)
 
     torch.onnx.export(export_model,               # model being run
                   dummy_input,                         # model input (or a tuple for multiple inputs)
@@ -105,7 +101,7 @@ def forward(self, input_list):
                   export_params=True,        # store the trained parameter weights inside the model file
                   opset_version=10,          # the ONNX version to export the model to
                   do_constant_folding=True,  # whether to execute constant folding for optimization
-                  input_names  = ['input'],   # the model's input names
+                  input_names  = ['words', 'chars'],   # the model's input names
                   output_names = ['output'], # the model's output names
                   dynamic_axes = {'input' : {0 : 'batch_size'},    # variable length axes
                                 'output' : {0 : 'batch_size'}})
@@ -116,10 +112,11 @@ def forward(self, input_list):
     ort_session = onnxruntime.InferenceSession("model.onnx")
 
     def to_numpy(tensor):
-        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+        return tensor.cpu().numpy()
 
     # compute ONNX Runtime output prediction
-    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)}
+    print ([i.name for i in ort_session.get_inputs()])
+    ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
 
     # compare ONNX Runtime and PyTorch results

From 70a9370245b57e4c69c244e06734566c756d857f Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Mon, 6 Dec 2021 19:20:43 -0700
Subject: [PATCH 050/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index fa47c190b..0e3a1f08d 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -88,8 +88,8 @@ def forward(self, word_ids, char_ids_list):
 
             words = sentence.words
 
-            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach()
-            char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]).detach()
+            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+            char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words])
 
             dummy_input = (word_ids, char_ids_list)
 
@@ -112,7 +112,7 @@ def forward(self, word_ids, char_ids_list):
     ort_session = onnxruntime.InferenceSession("model.onnx")
 
     def to_numpy(tensor):
-        return tensor.cpu().numpy()
+        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
 
     # compute ONNX Runtime output prediction
     print ([i.name for i in ort_session.get_inputs()])

From d22925996409c2e049011dcb97fccb2c029f6c43 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 9 Dec 2021 00:47:24 -0700
Subject: [PATCH 051/134] test the onnx model

I have to save the character RNN in separate model, since the sentence length and word length are all varied and onnx is not happy with these kind of input...
---
 main/src/main/python/pytorch2onnx.py | 114 ++++++++++++++++++---------
 main/src/main/python/test_onnx.py    |  81 +++++++++++++++++++
 2 files changed, 157 insertions(+), 38 deletions(-)
 create mode 100644 main/src/main/python/test_onnx.py

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 0e3a1f08d..5fc059a31 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -12,40 +12,52 @@
 import onnx
 import onnxruntime
 
+import json
+
+def to_numpy(tensor):
+    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+
+class Char_RNN(torch.nn.Module):
+
+    def __init__(self, model):
+        super().__init__()
+        for i, layers in enumerate(model):
+            if layers.initialLayer is not None:
+                self.char_lookup = layers.initialLayer.charLookupParameters
+                self.char_rnn = layers.initialLayer.charRnnBuilder
+
+    def forward(self, char_ids):
+        charEmbedding = mkCharacterEmbedding2(char_ids, self.char_lookup, self.char_rnn)
+        return charEmbedding
+
 class Saving_Model(torch.nn.Module):
     """docstring for Saving_Model"""
     def __init__(self, model, constEmbeddings):
         super().__init__()
         self.model_length = len(model)
         self.constEmbeddings = constEmbeddings
-        self.initialLayers = [None for _ in range(self.model_length)]
         self.intermediateLayerss = [None for _ in range(self.model_length)]
         self.finalLayers = [None for _ in range(self.model_length)]
         for i, layers in enumerate(model):
             if layers.initialLayer is not None:
-                self.initialLayers[i] = nn.ModuleList([layers.initialLayer.wordLookupParameters,
-                                         layers.initialLayer.charLookupParameters,
-                                         layers.initialLayer.charRnnBuilder])
+                self.word_lookup = layers.initialLayer.wordLookupParameters
             self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers)
             self.finalLayers[i] = layers.finalLayer
-        self.initialLayers = nn.ModuleList(self.initialLayers)
         self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
         self.finalLayers = nn.ModuleList(self.finalLayers)
-    def forward(self, word_ids, char_ids_list):
-        #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant...
+    def forward(self, embed_ids, word_ids, charEmbedding):
+        # Can I assuem there is only one initial layer?
+        embeddings = constEmbeddings.emb(embed_ids)
+        learnedWordEmbeddings = self.word_lookup(word_ids)
+        embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+        embedParts = [ep for ep in embedParts if ep is not None]
+        state = torch.cat(embedParts, dim=1)
         for i in range(self.model_length):
-            if self.initialLayers[i]:
-                embeddings = constEmbeddings.emb(word_ids)
-                learnedWordEmbeddings = self.initialLayers[i][0](word_ids)
-                charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i][1], self.initialLayers[i][2]) for char_ids in char_ids_list])
-                embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
-                embedParts = [ep for ep in embedParts if ep is not None]
-                embed = torch.cat(embedParts, dim=1)
             for il in self.intermediateLayerss[i]:
-                output = il(embed, False)
+                state = il(state, False)
             if self.finalLayers[i]:
-                output = self.finalLayers[i](output, False, None)#headPositions set to be None for now, we can add it in input list later
-        return output
+                state = self.finalLayers[i](state, False, None)#headPositions set to be None for now, we can add it in input list later
+        return state
 
 if __name__ == '__main__':
 
@@ -63,37 +75,55 @@ def forward(self, word_ids, char_ids_list):
         layers.start_eval()
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
 
+    export_char  = Char_RNN(model)
     export_model = Saving_Model(model, constEmbeddings)
     export_model.eval()
+    export_char.eval()
     for param in export_model.parameters():
         param.requires_grad = False
+    for param in export_char.parameters():
+        param.requires_grad = False
 
     torch.manual_seed(taskManager.random)
     random.seed(taskManager.random)
 
-    for i, layers in enumerate(model):
-        if layers.initialLayer is not None:
-            c2i = layers.initialLayer.c2i
+    x2i = json.load(open(args.model_file+".json"))
+
+    c2i = x2i[0]['x2i']['initialLayer']['c2i']
+    w2i = x2i[0]['x2i']['initialLayer']['w2i']
 
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         testSentences = taskManager.tasks[taskId].testSentences
         if testSentences:
             reader = MetalRowReader()
-            annotatedSentences = reader.toAnnotatedSentences(testSentences[0])
+            annotatedSentences = reader.toAnnotatedSentences(testSentences[1])
 
             asent = annotatedSentences[0]
             sentence = asent[0]
             goldLabels = asent[1]
 
             words = sentence.words
-
-            word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-            char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words])
-
-            dummy_input = (word_ids, char_ids_list)
-
-            output = export_model(word_ids, char_ids_list)
+            char_embs = []
+            for word in words:
+                char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
+                char_out = export_char(char_ids)
+                char_embs.append(char_out)
+            char_embs = torch.stack(char_embs)
+            embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+            word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
+            output = export_model(embed_ids, word_ids, char_embs)
+
+            dummy_input = (embed_ids, word_ids, char_embs)
+
+    torch.onnx.export(export_char,
+                    char_ids,
+                    "char.onnx",
+                    export_params=True,
+                    do_constant_folding=True,
+                    input_names = ['char_ids'],
+                    output_names = ['chars'],
+                    dynamic_axes = {"char_ids": {0: 'word length'}})
 
     torch.onnx.export(export_model,               # model being run
                   dummy_input,                         # model input (or a tuple for multiple inputs)
@@ -101,26 +131,34 @@ def forward(self, word_ids, char_ids_list):
                   export_params=True,        # store the trained parameter weights inside the model file
                   opset_version=10,          # the ONNX version to export the model to
                   do_constant_folding=True,  # whether to execute constant folding for optimization
-                  input_names  = ['words', 'chars'],   # the model's input names
+                  input_names  = ['embed', 'words', 'chars'],   # the model's input names
                   output_names = ['output'], # the model's output names
-                  dynamic_axes = {'input' : {0 : 'batch_size'},    # variable length axes
-                                'output' : {0 : 'batch_size'}})
+                  dynamic_axes = {'embed' : {0 : 'sentence length'},
+                                  'words' : {0 : 'sentence length'},
+                                  'chars' : {0 : 'sentence length'}})
 
     onnx_model = onnx.load("model.onnx")
     onnx.checker.check_model(onnx_model)
+    char_model = onnx.load("char.onnx")
+    onnx.checker.check_model(char_model)
 
     ort_session = onnxruntime.InferenceSession("model.onnx")
+    ort_char = onnxruntime.InferenceSession("char.onnx")
+    # compute ONNX Runtime output prediction
 
-    def to_numpy(tensor):
-        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+    ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])}
+    ort_outs = ort_char.run(None, ort_inputs)
+    try:
+        np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05)
+    except AssertionError as e:
+        print (e)
 
-    # compute ONNX Runtime output prediction
-    print ([i.name for i in ort_session.get_inputs()])
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
-
-    # compare ONNX Runtime and PyTorch results
-    np.testing.assert_allclose(to_numpy(output), ort_outs[0], rtol=1e-03, atol=1e-05)
+    try:
+        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
+    except AssertionError as e:
+        print (e)
 
     print("Exported model has been tested with ONNXRuntime, and the result looks good!")
 
diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
new file mode 100644
index 000000000..670278c91
--- /dev/null
+++ b/main/src/main/python/test_onnx.py
@@ -0,0 +1,81 @@
+from pytorch2onnx import *
+import json
+import numpy as np
+from pytorch.seqScorer import *
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--config', type=str, help='Filename of the configuration.')
+    parser.add_argument('--seed', type=int, default=1234)
+    args = parser.parse_args()
+
+    config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
+    taskManager = TaskManager(config, args.seed)
+    constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
+
+    x2i = json.load(open(args.model_file+".json"))
+
+    c2i = x2i[0]['x2i']['initialLayer']['c2i']
+    w2i = x2i[0]['x2i']['initialLayer']['w2i']
+    t2i = x2i[1]['x2i']['finalLayer']["t2i"]
+    i2t = {i:t for t, i in t2i.items()}
+
+    torch.manual_seed(taskManager.random)
+    random.seed(taskManager.random)
+
+    onnx_model = onnx.load("model.onnx")
+    onnx.checker.check_model(onnx_model)
+    char_model = onnx.load("char.onnx")
+    onnx.checker.check_model(char_model)
+
+    ort_session = onnxruntime.InferenceSession("model.onnx")
+    ort_char = onnxruntime.InferenceSession("char.onnx")
+
+    scoreCountsByLabel = ScoreCountsByLabel()
+
+    for taskId in range(0, taskManager.taskCount):
+        taskName = taskManager.tasks[taskId].taskName
+        sentences = taskManager.tasks[taskId].testSentences
+        if sentences:
+            reader = MetalRowReader()
+            for sent in sentences:
+                annotatedSentences = reader.toAnnotatedSentences(sent)
+
+                for asent in annotatedSentences:
+                    sentence = asent[0]
+                    goldLabels = asent[1]
+
+                    words = sentence.words
+
+                    char_embs = []
+                    for word in words:
+                        char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word])
+                        ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])}
+                        ort_outs = ort_char.run(None, ort_inputs)
+                        char_embs.append(ort_outs[0])
+                    char_embs = np.stack(char_embs)
+                    embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+                    word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
+
+                    dummy_input = (embed_ids, word_ids, char_embs)
+
+                    ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
+                    ort_outs = ort_session.run(None, ort_inputs)
+
+                    emissionScores = ort_outs[0]
+                    preds = [i2t[np.argmax(es)] for es in emissionScores]
+
+                    sc = SeqScorer.f1(goldLabels, preds)
+                    scoreCountsByLabel.incAll(sc)
+
+
+    print (f"Accuracy : {scoreCountsByLabel.accuracy()}")
+    print (f"Precision : {scoreCountsByLabel.precision()}")
+    print (f"Recall on : {scoreCountsByLabel.recall()}")
+    print (f"Micro F1 : {scoreCountsByLabel.f1()}")
+    for label in scoreCountsByLabel.labels():
+        print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+
+            
\ No newline at end of file

From 5f328f0eb0cb178ac4ffed28e4dd52ed1fe69d89 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Tue, 14 Dec 2021 23:35:27 -0700
Subject: [PATCH 052/134] Create test_onnx.scala

---
 main/src/main/scala/org/clulab/dynet/test_onnx.scala | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 main/src/main/scala/org/clulab/dynet/test_onnx.scala

diff --git a/main/src/main/scala/org/clulab/dynet/test_onnx.scala b/main/src/main/scala/org/clulab/dynet/test_onnx.scala
new file mode 100644
index 000000000..3a337f4b7
--- /dev/null
+++ b/main/src/main/scala/org/clulab/dynet/test_onnx.scala
@@ -0,0 +1,12 @@
+package org.clulab.processors.clu
+
+import org.clulab.dynet.ConstEmbeddingsGlove
+import org.clulab.embeddings.WordEmbeddingMapPool
+
+object GetWordEmbeddings extends App {
+  val constEmbeddingsGlove = ConstEmbeddingsGlove // Make sure that the embeddings have been loaded.
+  val wordEmbeddingMap = WordEmbeddingMapPool.get("glove.840B.300d.10f", compact = true).get
+  val embedding = wordEmbeddingMap.get("this").get
+
+  println(embedding.mkString(" "))
+}
\ No newline at end of file

From 424e67a706b5f6bf4a45ea41a00df5b8e132ee91 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Dec 2021 02:26:52 -0700
Subject: [PATCH 053/134] Delete test_onnx.scala

---
 main/src/main/scala/org/clulab/dynet/test_onnx.scala | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 main/src/main/scala/org/clulab/dynet/test_onnx.scala

diff --git a/main/src/main/scala/org/clulab/dynet/test_onnx.scala b/main/src/main/scala/org/clulab/dynet/test_onnx.scala
deleted file mode 100644
index 3a337f4b7..000000000
--- a/main/src/main/scala/org/clulab/dynet/test_onnx.scala
+++ /dev/null
@@ -1,12 +0,0 @@
-package org.clulab.processors.clu
-
-import org.clulab.dynet.ConstEmbeddingsGlove
-import org.clulab.embeddings.WordEmbeddingMapPool
-
-object GetWordEmbeddings extends App {
-  val constEmbeddingsGlove = ConstEmbeddingsGlove // Make sure that the embeddings have been loaded.
-  val wordEmbeddingMap = WordEmbeddingMapPool.get("glove.840B.300d.10f", compact = true).get
-  val embedding = wordEmbeddingMap.get("this").get
-
-  println(embedding.mkString(" "))
-}
\ No newline at end of file

From 80aae2d507ecfa5a0887aaeda6bca3375bfec244 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Dec 2021 16:03:55 -0700
Subject: [PATCH 054/134] change the onnx model to fit scala code

---
 main/src/main/python/pytorch2onnx.py | 13 ++++++-------
 main/src/main/python/test_onnx.py    |  3 ++-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 5fc059a31..ec30bfe6c 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -32,10 +32,9 @@ def forward(self, char_ids):
 
 class Saving_Model(torch.nn.Module):
     """docstring for Saving_Model"""
-    def __init__(self, model, constEmbeddings):
+    def __init__(self, model):
         super().__init__()
         self.model_length = len(model)
-        self.constEmbeddings = constEmbeddings
         self.intermediateLayerss = [None for _ in range(self.model_length)]
         self.finalLayers = [None for _ in range(self.model_length)]
         for i, layers in enumerate(model):
@@ -45,9 +44,8 @@ def __init__(self, model, constEmbeddings):
             self.finalLayers[i] = layers.finalLayer
         self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
         self.finalLayers = nn.ModuleList(self.finalLayers)
-    def forward(self, embed_ids, word_ids, charEmbedding):
+    def forward(self, embeddings, word_ids, charEmbedding):
         # Can I assuem there is only one initial layer?
-        embeddings = constEmbeddings.emb(embed_ids)
         learnedWordEmbeddings = self.word_lookup(word_ids)
         embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
         embedParts = [ep for ep in embedParts if ep is not None]
@@ -76,7 +74,7 @@ def forward(self, embed_ids, word_ids, charEmbedding):
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
 
     export_char  = Char_RNN(model)
-    export_model = Saving_Model(model, constEmbeddings)
+    export_model = Saving_Model(model)
     export_model.eval()
     export_char.eval()
     for param in export_model.parameters():
@@ -111,10 +109,11 @@ def forward(self, embed_ids, word_ids, charEmbedding):
                 char_embs.append(char_out)
             char_embs = torch.stack(char_embs)
             embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+            embeddings = constEmbeddings.emb(embed_ids)
             word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
-            output = export_model(embed_ids, word_ids, char_embs)
+            output = export_model(embeddings, word_ids, char_embs)
 
-            dummy_input = (embed_ids, word_ids, char_embs)
+            dummy_input = (embeddings, word_ids, char_embs)
 
     torch.onnx.export(export_char,
                     char_ids,
diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index 670278c91..da191f7b3 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -57,9 +57,10 @@
                         char_embs.append(ort_outs[0])
                     char_embs = np.stack(char_embs)
                     embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+                    embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
 
-                    dummy_input = (embed_ids, word_ids, char_embs)
+                    dummy_input = (embeddings, word_ids, char_embs)
 
                     ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
                     ort_outs = ort_session.run(None, ort_inputs)

From 3c902277539010d69a2bb55f1e0d461cfe4618fb Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Dec 2021 16:28:44 -0700
Subject: [PATCH 055/134] Update test_onnx.py

---
 main/src/main/python/test_onnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index da191f7b3..5c0231992 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -56,8 +56,8 @@
                         ort_outs = ort_char.run(None, ort_inputs)
                         char_embs.append(ort_outs[0])
                     char_embs = np.stack(char_embs)
-                    embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-                    embeddings = constEmbeddings.emb(embed_ids)
+                    embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+                    embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy()
                     word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
 
                     dummy_input = (embeddings, word_ids, char_embs)

From 90079833e454a2987658c1b52bbb13066d263f76 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Dec 2021 22:00:57 -0700
Subject: [PATCH 056/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index ec30bfe6c..f3d3b58c3 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -134,7 +134,8 @@ def forward(self, embeddings, word_ids, charEmbedding):
                   output_names = ['output'], # the model's output names
                   dynamic_axes = {'embed' : {0 : 'sentence length'},
                                   'words' : {0 : 'sentence length'},
-                                  'chars' : {0 : 'sentence length'}})
+                                  'chars' : {0 : 'sentence length'},
+                                  'output': {0 : 'sentence length'}})
 
     onnx_model = onnx.load("model.onnx")
     onnx.checker.check_model(onnx_model)

From 82ba7006a9eaeca1f94ca9b421734999eb7952ae Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 15 Dec 2021 23:27:41 -0700
Subject: [PATCH 057/134] Update test_onnx.py

---
 main/src/main/python/test_onnx.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index 5c0231992..dc5d7ae1e 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -2,6 +2,7 @@
 import json
 import numpy as np
 from pytorch.seqScorer import *
+import time
 
 if __name__ == '__main__':
 
@@ -34,7 +35,7 @@
     ort_char = onnxruntime.InferenceSession("char.onnx")
 
     scoreCountsByLabel = ScoreCountsByLabel()
-
+    start_time = time.time()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         sentences = taskManager.tasks[taskId].testSentences
@@ -78,5 +79,6 @@
     print (f"Micro F1 : {scoreCountsByLabel.f1()}")
     for label in scoreCountsByLabel.labels():
         print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
-
+    duration = time.time() - start_time
+    print (duration)
             
\ No newline at end of file

From 46fb8ad0b3aeefa1e46407d6afb49becbdf8a990 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 14:12:39 -0700
Subject: [PATCH 058/134] set random seed for onnx

---
 main/src/main/python/test_onnx.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index dc5d7ae1e..1f2082ad4 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -3,9 +3,12 @@
 import numpy as np
 from pytorch.seqScorer import *
 import time
+import random
 
 if __name__ == '__main__':
 
+    random.seed(100)
+
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_file', type=str, help='Filename of the model.')
     parser.add_argument('--config', type=str, help='Filename of the configuration.')

From a734a8924d0034652313490c866161db4e8a532d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 14:15:17 -0700
Subject: [PATCH 059/134] Update test_onnx.py

---
 main/src/main/python/test_onnx.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index 1f2082ad4..dc5d7ae1e 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -3,12 +3,9 @@
 import numpy as np
 from pytorch.seqScorer import *
 import time
-import random
 
 if __name__ == '__main__':
 
-    random.seed(100)
-
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_file', type=str, help='Filename of the model.')
     parser.add_argument('--config', type=str, help='Filename of the configuration.')

From c38dc95da7074570dabc3026a17f63c6409dbe7d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 14:23:52 -0700
Subject: [PATCH 060/134] paths to data and embeddings

---
 main/src/main/resources/org/clulab/glove.conf      | 2 +-
 main/src/main/resources/org/clulab/mtl-en-ner.conf | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/main/src/main/resources/org/clulab/glove.conf b/main/src/main/resources/org/clulab/glove.conf
index 22f1e4b36..6b8bd08b0 100644
--- a/main/src/main/resources/org/clulab/glove.conf
+++ b/main/src/main/resources/org/clulab/glove.conf
@@ -1,5 +1,5 @@
 
 glove {
-  matrixResourceName = "/org/clulab/glove/glove.840B.300d.10f"
+  matrixResourceName = "glove.840B.300d.10f.txt"
   isResource = true
 }
\ No newline at end of file
diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf
index 6cd5eecc2..9c07bdd75 100644
--- a/main/src/main/resources/org/clulab/mtl-en-ner.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf
@@ -20,9 +20,9 @@ mtl {
 
   task1 {
     name = "En NER"
-    train = "dynet/en/ner/train.txt"
-    dev = "dynet/en/ner/dev.txt"
-    test = "dynet/en/ner/test.txt"
+    train = "ner/train.txt"
+    dev = "ner/dev.txt"
+    test = "ner/test.txt"
 
     layers {
       final {

From 753c20e3c5a7fc6ef2a057e5462ca62fe0dd53f1 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 14:34:35 -0700
Subject: [PATCH 061/134] debug the randomness

---
 main/src/main/python/pytorch/metal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 298320d08..175ee040e 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -266,7 +266,7 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        torch.manual_seed(self.taskManager.random)
+        # torch.manual_seed(self.taskManager.random)
         random.seed(self.taskManager.random)
 
         for taskId in range(0, self.taskManager.taskCount):

From dd6517b3e3bacf24baa1504d36085d3660635f87 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 14:39:38 -0700
Subject: [PATCH 062/134] debug randomness

---
 main/src/main/python/pytorch/metal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 175ee040e..588d40272 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        # torch.manual_seed(self.taskManager.random)
-        random.seed(self.taskManager.random)
+        torch.manual_seed(self.taskManager.random)
+        # random.seed(self.taskManager.random)
 
         for taskId in range(0, self.taskManager.taskCount):
             taskName = self.taskManager.tasks[taskId].taskName

From fea59e0c7f8ecdbc6c872a1668d1bf59abc331a0 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 15:25:46 -0700
Subject: [PATCH 063/134] Update metal.py

---
 main/src/main/python/pytorch/metal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 588d40272..298320d08 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -267,7 +267,7 @@ def parse(self, sentence, constEmbeddings):
     def test(self):
 
         torch.manual_seed(self.taskManager.random)
-        # random.seed(self.taskManager.random)
+        random.seed(self.taskManager.random)
 
         for taskId in range(0, self.taskManager.taskCount):
             taskName = self.taskManager.tasks[taskId].taskName

From 6d8446ae46281fb64a44c788eda9d2cc1430966f Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 15:30:31 -0700
Subject: [PATCH 064/134] debug randomness

---
 main/src/main/python/pytorch/metal.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 298320d08..c605c42c9 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -243,18 +243,18 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
         return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() )
 
     def predictJointly(self, sentence, constEmbeddings):
-        for layers in self.model:
-            layers.start_eval()
+        # for layers in self.model:
+        #     layers.start_eval()
         return Layers.predictJointly(self.model, sentence, constEmbeddings)
 
     def predict(self, taskId, sentence, constEmbeddings):
-        for layers in self.model:
-            layers.start_eval()
+        # for layers in self.model:
+        #     layers.start_eval()
         return Layers.predict(self.model, taskId, sentence, constEmbeddings)
 
     def predictWithScores(self, taskId, sentence, constEmbeddings):
-        for layers in self.model:
-            layers.start_eval()
+        # for layers in self.model:
+        #     layers.start_eval()
         return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings)
 
     # Custom method for the parsing algorithm
@@ -268,7 +268,8 @@ def test(self):
 
         torch.manual_seed(self.taskManager.random)
         random.seed(self.taskManager.random)
-
+        for layers in self.model:
+            layers.start_eval()
         for taskId in range(0, self.taskManager.taskCount):
             taskName = self.taskManager.tasks[taskId].taskName
             testSentences = self.taskManager.tasks[taskId].testSentences

From a4a1db634e51979774e70f9f674afeee5869e1dd Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 15:30:48 -0700
Subject: [PATCH 065/134] Update metal.py

---
 main/src/main/python/pytorch/metal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index c605c42c9..312d762e8 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        torch.manual_seed(self.taskManager.random)
-        random.seed(self.taskManager.random)
+        # torch.manual_seed(self.taskManager.random)
+        # random.seed(self.taskManager.random)
         for layers in self.model:
             layers.start_eval()
         for taskId in range(0, self.taskManager.taskCount):

From 4889201dec3bb5763139cc090431e0f61b7ae261 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 15:44:45 -0700
Subject: [PATCH 066/134] move dropout inside model

---
 main/src/main/python/pytorch/forwardLayer.py | 20 ++++++++++----------
 main/src/main/python/pytorch/utils.py        |  7 -------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 2241db86a..d1bebbdab 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -20,7 +20,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
         self.pH = nn.Linear(actualInputSize, len(t2i))
         nn.init.xavier_uniform_(self.pH.weight)
         self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here
-        self.dropoutProb = dropoutProb
+        self.dropout = nn.Dropout(dropoutProb)
 
         self.inDim = spanLength(spans) if spans is not None else inputSize
         self.outDim = len(t2i)
@@ -37,18 +37,18 @@ def pickSpan(self, v, i):
                 vs.append(e)
             return torch.cat(vs, dim=i)
 
-    def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
+    def forward(self, inputExpressions, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            argExp = expressionDropout(self.pickSpan(inputExpressions, 1), self.dropoutProb, doDropout)
-            emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
+            argExp = self.dropout(self.pickSpan(inputExpressions, 1))
+            emissionScores = self.dropout(self.pH(argExp))
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:
                 emissionScores = F.relu(emissionScores)
             # for i, e in enumerate(inputExpressions):
-            #     argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout)
-            #     l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout)
+            #     argExp = self.dropout(self.pickSpan(e))
+            #     l1 = self.dropout(self.pH(argExp))
             #     if self.nonlinearity == NONLIN_TANH:
             #         l1 = F.tanh(l1)
             #     elif self.nonlinearity == NONLIN_RELU:
@@ -60,15 +60,15 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                argExp = expressionDropout(self.pickSpan(e, 0), self.dropoutProb, doDropout)
+                argExp = self.dropout(self.pickSpan(e, 0))
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition], 0), self.dropoutProb, doDropout)
+                    predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    predExp = expressionDropout(self.pickSpan(self.pRoot, 0), self.dropoutProb, doDropout)
+                    predExp = self.dropout(self.pickSpan(self.pRoot, 0))
                 ss = torch.cat([argExp, predExp])
-                l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout)
+                l1 = self.dropout(self.pH(ss))
                 if self.nonlinearity == NONLIN_TANH:
                     l1 = F.tanh(l1)
                 elif self.nonlinearity == NONLIN_RELU:
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index b6218ba2f..d996deb02 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -101,13 +101,6 @@ def transduce(embeddings, builder):
 
     return output
 
-def expressionDropout(expression, dropoutProb, doDropout):
-    if doDropout and dropoutProb > 0:
-        dropout = nn.Dropout(dropoutProb)
-        return dropout(expression)
-    else:
-        return expression
-
 def sentenceLossGreedy(emissionScoresForSeq, golds):
     assert(emissionScoresForSeq.size(0) == len(golds))
     criterion = nn.CrossEntropyLoss()

From 31810313e6a21cdcdc6fb652d8987fed266be715 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 15:49:58 -0700
Subject: [PATCH 067/134] Update layers.py

---
 main/src/main/python/pytorch/layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 4acf2f2cf..cd1616e56 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, doDropout, sentence.headPositions)
+            states = self.finalLayer(states, sentence.headPositions)
 
         return states
 
@@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, doDropout, headPositions)
+            states = self.finalLayer(states, headPositions)
 
         return states
 

From 593bc7f7390113b431d20695d0cb8a94b0210a87 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:08:44 -0700
Subject: [PATCH 068/134] move RNNs inside model...

---
 main/src/main/python/pytorch/embeddingLayer.py | 6 +++---
 main/src/main/python/pytorch/rnnLayer.py       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 502bb21bb..85dc6ffb0 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -57,7 +57,7 @@ def __init__(self, w2i, # word to index
         self.useIsPredicate = useIsPredicate
         self.wordLookupParameters = wordLookupParameters
         self.charLookupParameters = charLookupParameters
-        self.charRnnBuilder = charRnnBuilder
+        self.charRnnBuilder = nn.LSTM(*charRnnBuilder)
         self.posTagLookupParameters = posTagLookupParameters
         self.neTagLookupParameters = neTagLookupParameters
         self.distanceLookupParameters = distanceLookupParameters
@@ -222,7 +222,7 @@ def load(cls, x2i):
         wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         
-        charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         posTagLookupParameters   = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None
         neTagLookupParameters    = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None
@@ -276,7 +276,7 @@ def initialize(cls, config, paramPrefix, wordCounter):
 
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         nn.init.xavier_uniform_(charLookupParameters.weight)
-        charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
 
         if(posTagEmbeddingSize > 0):
             tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt"))
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index 5c8681269..f83fb2420 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -18,7 +18,7 @@ def __init__(self,
         self.rnnStateSize = rnnStateSize 
         self.useHighwayConnections = useHighwayConnections
         self.rnnType = rnnType
-        self.wordRnnBuilder = wordRnnBuilder
+        self.wordRnnBuilder = mkBuilder(*wordRnnBuilder)
         self.dropoutProb = dropoutProb
 
         highwaySize = inputSize if useHighwayConnections else 0
@@ -57,7 +57,7 @@ def load(cls, x2i):
         useHighwayConnections = x2i['useHighwayConnections'] == 1
         dropoutProb = x2i['dropoutProb']
 
-        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
+        builder = (rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
 
         return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
 
@@ -73,7 +73,7 @@ def initialize(cls, config, paramPrefix, inputSize):
         rnnType = config.get_string(paramPrefix + ".type", "lstm")
         dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY)
 
-        builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
+        builder = (rnnType, numLayers, inputSize, rnnStateSize, dropoutProb)
 
         return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb)
 

From df5cc6ab5249894a3055f7b016e38193b545f908 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:14:23 -0700
Subject: [PATCH 069/134] Update embeddingLayer.py

---
 main/src/main/python/pytorch/embeddingLayer.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 85dc6ffb0..821147075 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -57,7 +57,7 @@ def __init__(self, w2i, # word to index
         self.useIsPredicate = useIsPredicate
         self.wordLookupParameters = wordLookupParameters
         self.charLookupParameters = charLookupParameters
-        self.charRnnBuilder = nn.LSTM(*charRnnBuilder)
+        self.charRnnBuilder = mkBuilder(*charRnnBuilder)
         self.posTagLookupParameters = posTagLookupParameters
         self.neTagLookupParameters = neTagLookupParameters
         self.distanceLookupParameters = distanceLookupParameters
@@ -222,7 +222,7 @@ def load(cls, x2i):
         wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize)
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         
-        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, True, dropoutProb)
 
         posTagLookupParameters   = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None
         neTagLookupParameters    = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None
@@ -276,7 +276,7 @@ def initialize(cls, config, paramPrefix, wordCounter):
 
         charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize)
         nn.init.xavier_uniform_(charLookupParameters.weight)
-        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb)
+        charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, True, dropoutProb)
 
         if(posTagEmbeddingSize > 0):
             tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt"))
@@ -324,11 +324,9 @@ def initialize(cls, config, paramPrefix, wordCounter):
                   positionLookupParameters,
                   dropoutProb)
 
-
-
-
-
-
+def mkBuilder(inputSize, rnnStateSize, numLayers, bi, dropoutProb):
+    return nn.LSTM(inputSize, rnnStateSize, numLayers, bidirectional=bi, dropout=dropoutProb)
+    
 
 
 

From cd5368a93d4117ce381f151e518c91c9f55e1abe Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:29:50 -0700
Subject: [PATCH 070/134] debug randomness

---
 main/src/main/python/pytorch/forwardLayer.py | 19 ++++++++++++-------
 main/src/main/python/pytorch/layers.py       |  4 ++--
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index d1bebbdab..af7455438 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -37,11 +37,12 @@ def pickSpan(self, v, i):
                 vs.append(e)
             return torch.cat(vs, dim=i)
 
-    def forward(self, inputExpressions, headPositionsOpt = None):
+    def forward(self, inputExpressions, , doDropout, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            argExp = self.dropout(self.pickSpan(inputExpressions, 1))
-            emissionScores = self.dropout(self.pH(argExp))
+            if doDropout:
+                argExp = self.dropout(self.pickSpan(inputExpressions, 1))
+                emissionScores = self.dropout(self.pH(argExp))
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:
@@ -60,15 +61,19 @@ def forward(self, inputExpressions, headPositionsOpt = None):
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                argExp = self.dropout(self.pickSpan(e, 0))
+                if doDropout:
+                    argExp = self.dropout(self.pickSpan(e, 0))
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
+                    if doDropout:
+                        predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    predExp = self.dropout(self.pickSpan(self.pRoot, 0))
+                    if doDropout:
+                        predExp = self.dropout(self.pickSpan(self.pRoot, 0))
                 ss = torch.cat([argExp, predExp])
-                l1 = self.dropout(self.pH(ss))
+                if doDropout:
+                    l1 = self.dropout(self.pH(ss))
                 if self.nonlinearity == NONLIN_TANH:
                     l1 = F.tanh(l1)
                 elif self.nonlinearity == NONLIN_RELU:
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index cd1616e56..4acf2f2cf 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, sentence.headPositions)
+            states = self.finalLayer(states, doDropout, sentence.headPositions)
 
         return states
 
@@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, headPositions)
+            states = self.finalLayer(states, doDropout, headPositions)
 
         return states
 

From f19eaf14339b28cc83d7d807db2fa8cc72cd07ad Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:33:34 -0700
Subject: [PATCH 071/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index af7455438..7a901bbe9 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -37,7 +37,7 @@ def pickSpan(self, v, i):
                 vs.append(e)
             return torch.cat(vs, dim=i)
 
-    def forward(self, inputExpressions, , doDropout, headPositionsOpt = None):
+    def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
             if doDropout:

From 31201f0ccc4552bb9b605483bd9cbced06be1334 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:41:46 -0700
Subject: [PATCH 072/134] dropout

---
 main/src/main/python/pytorch/forwardLayer.py | 19 +++++++------------
 main/src/main/python/pytorch/layers.py       |  4 ++--
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 7a901bbe9..d1bebbdab 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -37,12 +37,11 @@ def pickSpan(self, v, i):
                 vs.append(e)
             return torch.cat(vs, dim=i)
 
-    def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
+    def forward(self, inputExpressions, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            if doDropout:
-                argExp = self.dropout(self.pickSpan(inputExpressions, 1))
-                emissionScores = self.dropout(self.pH(argExp))
+            argExp = self.dropout(self.pickSpan(inputExpressions, 1))
+            emissionScores = self.dropout(self.pH(argExp))
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:
@@ -61,19 +60,15 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None):
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                if doDropout:
-                    argExp = self.dropout(self.pickSpan(e, 0))
+                argExp = self.dropout(self.pickSpan(e, 0))
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    if doDropout:
-                        predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
+                    predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    if doDropout:
-                        predExp = self.dropout(self.pickSpan(self.pRoot, 0))
+                    predExp = self.dropout(self.pickSpan(self.pRoot, 0))
                 ss = torch.cat([argExp, predExp])
-                if doDropout:
-                    l1 = self.dropout(self.pH(ss))
+                l1 = self.dropout(self.pH(ss))
                 if self.nonlinearity == NONLIN_TANH:
                     l1 = F.tanh(l1)
                 elif self.nonlinearity == NONLIN_RELU:
diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index 4acf2f2cf..cd1616e56 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, doDropout, sentence.headPositions)
+            states = self.finalLayer(states, sentence.headPositions)
 
         return states
 
@@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout):
         for intermediateLayer in self.intermediateLayers:
             states = intermediateLayer(states, doDropout)
         if self.finalLayer is not None:
-            states = self.finalLayer(states, doDropout, headPositions)
+            states = self.finalLayer(states, headPositions)
 
         return states
 

From f00163edb0c22d6b1429f0cb7e4a1e131cf576f4 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:43:51 -0700
Subject: [PATCH 073/134] debug dropout

---
 main/src/main/python/pytorch/forwardLayer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index d1bebbdab..22fde017e 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -41,7 +41,13 @@ def forward(self, inputExpressions, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
             argExp = self.dropout(self.pickSpan(inputExpressions, 1))
-            emissionScores = self.dropout(self.pH(argExp))
+            temp = self.pH(argExp)
+            emissionScores = self.dropout(temp)
+
+            try:
+                np.testing.assert_allclose(temp.cpu().numpy(), emissionScores.cpu().numpy(), rtol=1e-03, atol=1e-05)
+            except AssertionError as e:
+                print (e)
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:

From 4a194a7dad18cdf017d75308190f8d21cc533ce4 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 16:45:46 -0700
Subject: [PATCH 074/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 22fde017e..f4dd4c702 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -45,7 +45,7 @@ def forward(self, inputExpressions, headPositionsOpt = None):
             emissionScores = self.dropout(temp)
 
             try:
-                np.testing.assert_allclose(temp.cpu().numpy(), emissionScores.cpu().numpy(), rtol=1e-03, atol=1e-05)
+                np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05)
             except AssertionError as e:
                 print (e)
             if self.nonlinearity == NONLIN_TANH:

From 0d3f146fc17caaae691087b2e497de0c75fe3c0d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 17:39:43 -0700
Subject: [PATCH 075/134] average models

---
 main/src/main/python/pytorch/layers.py | 28 ++++++++++++++++++++++++++
 main/src/main/python/pytorch/metal.py  | 19 +++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index cd1616e56..b93d61edc 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -97,6 +97,34 @@ def load_state_dict(self, params):
         if self.finalLayer is not None:
             self.finalLayer.load_state_dict(params['finalLayer'])
 
+    def add_state_dict(self, layers):
+        if self.initialLayer is not None:
+            for key in self.initialLayer.state_dict():
+                if self.initialLayer.state_dict()[key].data.dtype == torch.float32:
+                    self.initialLayer.state_dict()[key].data += layers.initialLayer.state_dict()[key].data.clone()
+        for i, il in enumerate(self.intermediateLayers):
+            for key in il.state_dict():
+                if il.state_dict()[key].data.dtype == torch.float32:
+                    il.state_dict()[key].data += layers.intermediateLayers[i].state_dict()[key].data.clone()
+        if self.finalLayer is not None:
+            for key in self.finalLayer.state_dict():
+                if self.finalLayer.state_dict()[key].data.dtype == torch.float32:
+                    self.finalLayer.state_dict()[key].data += layers.finalLayer.state_dict()[key].data.clone()
+
+    def avg_state_dict(self, num_models):
+        if self.initialLayer is not None:
+            for key in self.initialLayer.state_dict():
+                if self.initialLayer.state_dict()[key].data.dtype == torch.float32:
+                    self.initialLayer.state_dict()[key].data /= num_models
+        for i, il in enumerate(self.intermediateLayers):
+            for key in il.state_dict():
+                if il.state_dict()[key].data.dtype == torch.float32:
+                    il.state_dict()[key].data /= num_models
+        if self.finalLayer is not None:
+            for key in self.finalLayer.state_dict():
+                if self.finalLayer.state_dict()[key].data.dtype == torch.float32:
+                    self.finalLayer.state_dict()[key].data /= num_models
+
 
     def forward(self, sentence, constEmbeddings, doDropout):
         if self.initialLayer is None:
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 312d762e8..b7eee5892 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -312,6 +312,25 @@ def load(cls, modelFilenamePrefix):
 
         return layersSeq
 
+    @classmethod
+    def load_multi(cls, models):
+        print (f"Loading MTL models from {models}...")
+
+        layersSeq = list()
+        for model in models:
+            checkpoint = torch.load(model+".torch")
+            for i, param in enumerate(checkpoint):
+                layers = Layers.loadX2i(param['x2i'])
+                layers.load_state_dict(param['model'])
+                if len(layersSeq)<len(checkpoint):
+                    layersSeq.append(layers)
+                else:
+                    layersSeq[i].add_state_dict(layers)
+        for layers in layersSeq:
+            layers.avg_state_dict(len(models))
+        rint (f"Loading MTL models from {models} complete.")
+        return layersSeq
+
     @classmethod
     def apply(cls, modelFilenamePrefix, taskManager=None):
         model = Metal.load(modelFilenamePrefix)

From 981a45ae20578320863458a2fbecab785bc4868c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 17:41:32 -0700
Subject: [PATCH 076/134] Update run.py

---
 main/src/main/python/run.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index 01c37404b..b9808ce56 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -25,7 +25,10 @@
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
         modelName = args.model_file
-        model = Metal.load(modelName)
+        if len(modelName.split())==1:
+            model = Metal.load(modelName)
+        else:
+            model = Metal.load_multi(modelName.split())
         mtl = Metal(taskManager, model)
         mtl.test()
     elif args.shell:

From b9a8ced1065c2f5b6e1e51a76a29c128447b3881 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 17:42:05 -0700
Subject: [PATCH 077/134] Update metal.py

---
 main/src/main/python/pytorch/metal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index b7eee5892..22e204c2d 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        # torch.manual_seed(self.taskManager.random)
-        # random.seed(self.taskManager.random)
+        torch.manual_seed(self.taskManager.random)
+        random.seed(self.taskManager.random)
         for layers in self.model:
             layers.start_eval()
         for taskId in range(0, self.taskManager.taskCount):

From f4c9f93571e7bbe871519104c9dda580dd9f7f76 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 17:54:22 -0700
Subject: [PATCH 078/134] Update run.py

---
 main/src/main/python/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index b9808ce56..2bec47674 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -6,7 +6,7 @@
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--model_file', type=str, help='Filename of the model.', nargs='+')
     parser.add_argument('--train', action='store_true', help='Set the code to training purpose.')
     parser.add_argument('--test', action='store_true', help='Set the code to testing purpose.')
     parser.add_argument('--shell', action='store_true', help='Set the code to shell mode.')

From 36adfb645d885975fbb070724f8a793b18ea8693 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 17:57:51 -0700
Subject: [PATCH 079/134] Update run.py

---
 main/src/main/python/run.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index 2bec47674..2301d856d 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -25,10 +25,11 @@
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
         modelName = args.model_file
-        if len(modelName.split())==1:
-            model = Metal.load(modelName)
+        print (modelName)
+        if len(modelName)==1:
+            model = Metal.load(modelName[0])
         else:
-            model = Metal.load_multi(modelName.split())
+            model = Metal.load_multi(modelName)
         mtl = Metal(taskManager, model)
         mtl.test()
     elif args.shell:

From aa491ffa74d9e41b7ce9165f4a96bdfea62e2884 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 18:00:16 -0700
Subject: [PATCH 080/134] fixed typo

---
 main/src/main/python/pytorch/metal.py | 2 +-
 main/src/main/python/run.py           | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 22e204c2d..70ed4c3d2 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -328,7 +328,7 @@ def load_multi(cls, models):
                     layersSeq[i].add_state_dict(layers)
         for layers in layersSeq:
             layers.avg_state_dict(len(models))
-        rint (f"Loading MTL models from {models} complete.")
+        print (f"Loading MTL models from {models} complete.")
         return layersSeq
 
     @classmethod
diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index 2301d856d..4cea28453 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -25,7 +25,6 @@
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
         modelName = args.model_file
-        print (modelName)
         if len(modelName)==1:
             model = Metal.load(modelName[0])
         else:

From dd0dd2a1c8c71c7df1b616911316747dbb3c8b5b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 20:25:30 -0700
Subject: [PATCH 081/134] debug randomness

---
 main/src/main/python/pytorch/embeddingLayer.py | 5 +++--
 main/src/main/python/pytorch/forwardLayer.py   | 3 ++-
 main/src/main/python/pytorch/metal.py          | 4 ++--
 main/src/main/python/pytorch/rnnLayer.py       | 1 +
 main/src/main/python/pytorch/utils.py          | 1 +
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 821147075..7ba4c7272 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -81,6 +81,7 @@ def forward(self, sentence, constEmbeddings, doDropout):
 
         # const word embeddings such as GloVe
         constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings)
+        print (constEmbeddingsExpressions)
         assert(constEmbeddingsExpressions.size(0) == len(words))
         if(tags): assert(len(tags) == len(words))
         if(nes): assert(len(nes) == len(words))
@@ -110,12 +111,12 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
             if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0
             ids.append(id) 
         learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids))
-
+        print ("learnedWordEmbeddings", learnedWordEmbeddings)
         #
         # biLSTM over character embeddings
         #
         charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
-
+        print ("charEmbedding", charEmbedding)
         #
         # POS tag embedding
         #
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index f4dd4c702..2ca54d132 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -43,7 +43,8 @@ def forward(self, inputExpressions, headPositionsOpt = None):
             argExp = self.dropout(self.pickSpan(inputExpressions, 1))
             temp = self.pH(argExp)
             emissionScores = self.dropout(temp)
-
+            print ("final, argExp",argExp)
+            print ("final, emissionScores",emissionScores)
             try:
                 np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05)
             except AssertionError as e:
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 70ed4c3d2..113b8442a 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -219,7 +219,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1):
 
             annotatedSentences = reader.toAnnotatedSentences(sent)
 
-            for asent in annotatedSentences:
+            for asent in annotatedSentences[:1]:
                 sentence = asent[0]
                 goldLabels = asent[1]
 
@@ -266,7 +266,7 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        torch.manual_seed(self.taskManager.random)
+        # torch.manual_seed(self.taskManager.random)
         random.seed(self.taskManager.random)
         for layers in self.model:
             layers.start_eval()
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index f83fb2420..b257c4b16 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -29,6 +29,7 @@ def forward(self, inputExpressions, dropout):
         assert(inputExpressions is not None)
 
         States = transduce(inputExpressions, self.wordRnnBuilder)
+        print ("Intermediate, States",States)
         States = States.squeeze(1)
         if self.useHighwayConnections:
             States = torch.cat([States, inputExpressions], dim=1)
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index d996deb02..b99ba767b 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -44,6 +44,7 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
+    print ("charEmbeddings",charEmbeddings)
     output = transduce(charEmbeddings, charRnnBuilder)
     result = output.squeeze(1)[-1]
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states

From eeab1b08da5d9818b0bcf7c67c16a6627ef111c2 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 20:28:37 -0700
Subject: [PATCH 082/134] Update mtl-en-ner.conf

---
 main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf
index 9c07bdd75..f97791325 100644
--- a/main/src/main/resources/org/clulab/mtl-en-ner.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf
@@ -22,7 +22,7 @@ mtl {
     name = "En NER"
     train = "ner/train.txt"
     dev = "ner/dev.txt"
-    test = "ner/test.txt"
+    test = "ner/test2.txt"
 
     layers {
       final {

From 6cda51e331e62f5f9fd4146ba67c468329d916a4 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 21:39:02 -0700
Subject: [PATCH 083/134] Update utils.py

---
 main/src/main/python/pytorch/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index b99ba767b..a6168b9aa 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -87,17 +87,17 @@ def transduce(embeddings, builder):
     if mode == 'LSTM':
         if bi_direct:
             # change 1 to the layers we need
-            (h, c) =  (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) 
+            (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
             output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
         else:
-            (h, c) =  (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) 
+            (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
             output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
     elif mode == 'GRU':
         if bi_direct:
-            h =  torch.rand(2, 1, hidden_dim) 
+            h =  torch.zeros(2, 1, hidden_dim) 
             output, h = builder(embeddings.unsqueeze(1), h)
         else:
-            h =  torch.rand(1, 1, hidden_dim)
+            h =  torch.zeros(1, 1, hidden_dim)
             output, h = builder(embeddings.unsqueeze(1), h)
 
     return output

From d8e6734a18cc18674aef25e0c99dbc4166da1344 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 21:43:56 -0700
Subject: [PATCH 084/134] Update utils.py

---
 main/src/main/python/pytorch/utils.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index a6168b9aa..e5ba2ac8e 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -87,18 +87,14 @@ def transduce(embeddings, builder):
     if mode == 'LSTM':
         if bi_direct:
             # change 1 to the layers we need
-            (h, c) =  (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) 
-            output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
+            output, (h, c) = builder(embeddings.unsqueeze(1))
         else:
-            (h, c) =  (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) 
-            output, (h, c) = builder(embeddings.unsqueeze(1), (h, c))
+            output, (h, c) = builder(embeddings.unsqueeze(1))
     elif mode == 'GRU':
         if bi_direct:
-            h =  torch.zeros(2, 1, hidden_dim) 
-            output, h = builder(embeddings.unsqueeze(1), h)
+            output, h = builder(embeddings.unsqueeze(1))
         else:
-            h =  torch.zeros(1, 1, hidden_dim)
-            output, h = builder(embeddings.unsqueeze(1), h)
+            output, h = builder(embeddings.unsqueeze(1))
 
     return output
 

From 2b8a78c5cfb1be632c1188f2b4eff514262e2c9e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 21:50:55 -0700
Subject: [PATCH 085/134] solve the randomness

---
 main/src/main/python/pytorch/embeddingLayer.py     | 2 --
 main/src/main/python/pytorch/forwardLayer.py       | 2 --
 main/src/main/python/pytorch/rnnLayer.py           | 1 -
 main/src/main/python/pytorch/utils.py              | 1 -
 main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +-
 5 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 7ba4c7272..2177f502e 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -111,12 +111,10 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
             if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0
             ids.append(id) 
         learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids))
-        print ("learnedWordEmbeddings", learnedWordEmbeddings)
         #
         # biLSTM over character embeddings
         #
         charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words])
-        print ("charEmbedding", charEmbedding)
         #
         # POS tag embedding
         #
diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 2ca54d132..825e2e3a5 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -43,8 +43,6 @@ def forward(self, inputExpressions, headPositionsOpt = None):
             argExp = self.dropout(self.pickSpan(inputExpressions, 1))
             temp = self.pH(argExp)
             emissionScores = self.dropout(temp)
-            print ("final, argExp",argExp)
-            print ("final, emissionScores",emissionScores)
             try:
                 np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05)
             except AssertionError as e:
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index b257c4b16..f83fb2420 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -29,7 +29,6 @@ def forward(self, inputExpressions, dropout):
         assert(inputExpressions is not None)
 
         States = transduce(inputExpressions, self.wordRnnBuilder)
-        print ("Intermediate, States",States)
         States = States.squeeze(1)
         if self.useHighwayConnections:
             States = torch.cat([States, inputExpressions], dim=1)
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index e5ba2ac8e..2d9775eae 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -44,7 +44,6 @@ def save(file, values, comment):
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
     hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
-    print ("charEmbeddings",charEmbeddings)
     output = transduce(charEmbeddings, charRnnBuilder)
     result = output.squeeze(1)[-1]
     # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf
index f97791325..9c07bdd75 100644
--- a/main/src/main/resources/org/clulab/mtl-en-ner.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf
@@ -22,7 +22,7 @@ mtl {
     name = "En NER"
     train = "ner/train.txt"
     dev = "ner/dev.txt"
-    test = "ner/test2.txt"
+    test = "ner/test.txt"
 
     layers {
       final {

From 7c83bfe166b5a60a2eadfa38938b315df82c8077 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 21:53:10 -0700
Subject: [PATCH 086/134] Update embeddingLayer.py

---
 main/src/main/python/pytorch/embeddingLayer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 2177f502e..21e199116 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -81,7 +81,6 @@ def forward(self, sentence, constEmbeddings, doDropout):
 
         # const word embeddings such as GloVe
         constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings)
-        print (constEmbeddingsExpressions)
         assert(constEmbeddingsExpressions.size(0) == len(words))
         if(tags): assert(len(tags) == len(words))
         if(nes): assert(len(nes) == len(words))

From f44f00dc2e9fb1097b3c1b84c8279b168771ad7b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 22:09:16 -0700
Subject: [PATCH 087/134] Update mtl-en-ner.conf

---
 main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf
index 9c07bdd75..aacc06054 100644
--- a/main/src/main/resources/org/clulab/mtl-en-ner.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf
@@ -8,7 +8,7 @@ mtl {
       learnedWordEmbeddingSize = 128
       charEmbeddingSize = 32
       charRnnStateSize = 16
-      c2i = "org/clulab/c2i-en.txt"
+      c2i = "../resources/org/clulab/c2i-en.txt"
     }
 
     intermediate1 {

From 14bede9e79491f0e0a56fb6e9f87f3e07c17dda0 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 22:11:51 -0700
Subject: [PATCH 088/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 825e2e3a5..d1bebbdab 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -41,12 +41,7 @@ def forward(self, inputExpressions, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
             argExp = self.dropout(self.pickSpan(inputExpressions, 1))
-            temp = self.pH(argExp)
-            emissionScores = self.dropout(temp)
-            try:
-                np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05)
-            except AssertionError as e:
-                print (e)
+            emissionScores = self.dropout(self.pH(argExp))
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:

From 382958b55a19491ac990a5bc5701976b7fdc6feb Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 22:12:08 -0700
Subject: [PATCH 089/134] Update metal.py

---
 main/src/main/python/pytorch/metal.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 113b8442a..59b009f59 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -266,8 +266,6 @@ def parse(self, sentence, constEmbeddings):
 
     def test(self):
 
-        # torch.manual_seed(self.taskManager.random)
-        random.seed(self.taskManager.random)
         for layers in self.model:
             layers.start_eval()
         for taskId in range(0, self.taskManager.taskCount):

From a67bc71d1bd361b45ba857ebbab1596b70efc4a4 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 26 Jan 2022 23:40:34 -0700
Subject: [PATCH 090/134] fix bugs

---
 main/src/main/python/pytorch/greedyForwardLayer.py  | 1 -
 main/src/main/python/pytorch/viterbiForwardLayer.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index b913c10d5..f74829577 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -18,7 +18,6 @@ def saveX2i(self):
         x2i["span"] = spanToString(self.spans) if self.spans else ""
         x2i["nonlinearity"] = self.nonlinearity
         x2i["t2i"] = self.t2i
-        x2i["dropoutProb"] = self.dropoutProb
 
         return x2i
 
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 5aa9e6669..06aa72728 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -114,7 +114,6 @@ def saveX2i(self):
         x2i["span"] = spanToString(self.spans) if self.spans else ""
         x2i["nonlinearity"] = self.nonlinearity
         x2i["t2i"] = self.t2i
-        x2i["dropoutProb"] = self.dropoutProb
 
         return x2i
 

From 89104bbbfcf71874d91aa34e6a9ac4bd4f0d4b74 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 27 Jan 2022 00:59:40 -0700
Subject: [PATCH 091/134] Update run.py

---
 main/src/main/python/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py
index 4cea28453..7d22e56fe 100644
--- a/main/src/main/python/run.py
+++ b/main/src/main/python/run.py
@@ -17,7 +17,7 @@
     if args.train:
         config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
         taskManager = TaskManager(config, args.seed)
-        modelName = args.model_file
+        modelName = args.model_file[0]
 
         mtl = Metal(taskManager, None)
         mtl.train(modelName)

From 8d31a31baa801dcb9229290e3cbf9b5e8fc3363f Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 27 Jan 2022 10:18:03 -0700
Subject: [PATCH 092/134] fix bug

---
 main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 06aa72728..45dafb44b 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -108,7 +108,7 @@ def loss(self, finalStates, goldLabelStrings):
     
     def saveX2i(self):
         x2i = dict()
-        x2i["inferenceType"] = TYPE_GREEDY
+        x2i["inferenceType"] = TYPE_VITERBI_STRING
         x2i["inputSize"] = self.inputSize
         x2i["isDual"] = 1 if self.isDual else 0
         x2i["span"] = spanToString(self.spans) if self.spans else ""

From 9435f6da7ff389dabf5995137e44e06d4f2977c2 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 27 Jan 2022 10:18:36 -0700
Subject: [PATCH 093/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index d1bebbdab..a783154c7 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -82,10 +82,11 @@ def load(x2i):
         from pytorch.greedyForwardLayer import GreedyForwardLayer
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
-        if inferenceType == TYPE_VITERBI:
-            return ViterbiForwardLayer.load(x2i)
-        elif inferenceType == TYPE_GREEDY:
-            return GreedyForwardLayer.load(x2i)
+        return ViterbiForwardLayer.load(x2i)
+        # if inferenceType == TYPE_VITERBI:
+        #     return ViterbiForwardLayer.load(x2i)
+        # elif inferenceType == TYPE_GREEDY:
+        #     return GreedyForwardLayer.load(x2i)
         else:
             raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
 

From 1752b2637b90c9d81c22f811b29d2a188f0336d3 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 27 Jan 2022 10:20:44 -0700
Subject: [PATCH 094/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index a783154c7..0ff193ff8 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -87,8 +87,8 @@ def load(x2i):
         #     return ViterbiForwardLayer.load(x2i)
         # elif inferenceType == TYPE_GREEDY:
         #     return GreedyForwardLayer.load(x2i)
-        else:
-            raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
+        # else:
+        #     raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
 
     @staticmethod
     def initialize(config, paramPrefix, labelCounter, isDual, inputSize):

From c6d8fcc122f2d3f809cc7de552ed6f56e4002325 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 27 Jan 2022 10:22:37 -0700
Subject: [PATCH 095/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 0ff193ff8..d1bebbdab 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -82,13 +82,12 @@ def load(x2i):
         from pytorch.greedyForwardLayer import GreedyForwardLayer
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
-        return ViterbiForwardLayer.load(x2i)
-        # if inferenceType == TYPE_VITERBI:
-        #     return ViterbiForwardLayer.load(x2i)
-        # elif inferenceType == TYPE_GREEDY:
-        #     return GreedyForwardLayer.load(x2i)
-        # else:
-        #     raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
+        if inferenceType == TYPE_VITERBI:
+            return ViterbiForwardLayer.load(x2i)
+        elif inferenceType == TYPE_GREEDY:
+            return GreedyForwardLayer.load(x2i)
+        else:
+            raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
 
     @staticmethod
     def initialize(config, paramPrefix, labelCounter, isDual, inputSize):

From 249afc96b3b84d11e18518fcdc7f67480240782f Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:25:06 -0700
Subject: [PATCH 096/134] Update forwardLayer.py

---
 main/src/main/python/pytorch/forwardLayer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index d1bebbdab..3d945837d 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -82,6 +82,7 @@ def load(x2i):
         from pytorch.greedyForwardLayer import GreedyForwardLayer
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
+        print (inferenceType)
         if inferenceType == TYPE_VITERBI:
             return ViterbiForwardLayer.load(x2i)
         elif inferenceType == TYPE_GREEDY:

From fe4367b4b0cbeeeb184ab6870062e1f82d554f09 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:31:30 -0700
Subject: [PATCH 097/134] fix bug

---
 main/src/main/python/pytorch/forwardLayer.py        | 6 +++---
 main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 3d945837d..3a310aa84 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -82,10 +82,10 @@ def load(x2i):
         from pytorch.greedyForwardLayer import GreedyForwardLayer
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
-        print (inferenceType)
-        if inferenceType == TYPE_VITERBI:
+        print (inferenceType, TYPE_VITERBI)
+        if inferenceType == TYPE_VITERBI or inferenceType == TYPE_VITERBI_STRING:#this is a temporary solution to handle a typo in viterbi forward layer...
             return ViterbiForwardLayer.load(x2i)
-        elif inferenceType == TYPE_GREEDY:
+        elif inferenceType == TYPE_GREEDY or inferenceType == TYPE_GREEDY_STRING:
             return GreedyForwardLayer.load(x2i)
         else:
             raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!")
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 45dafb44b..77c0514f9 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -108,7 +108,7 @@ def loss(self, finalStates, goldLabelStrings):
     
     def saveX2i(self):
         x2i = dict()
-        x2i["inferenceType"] = TYPE_VITERBI_STRING
+        x2i["inferenceType"] = TYPE_VITERBI
         x2i["inputSize"] = self.inputSize
         x2i["isDual"] = 1 if self.isDual else 0
         x2i["span"] = spanToString(self.spans) if self.spans else ""

From 10301770a30513ac71a1c3e424c4e1b81fa77501 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:31:49 -0700
Subject: [PATCH 098/134] remove debug print

---
 main/src/main/python/pytorch/forwardLayer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 3a310aa84..33040cf95 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -82,7 +82,6 @@ def load(x2i):
         from pytorch.greedyForwardLayer import GreedyForwardLayer
         from pytorch.viterbiForwardLayer import ViterbiForwardLayer
         inferenceType = x2i["inferenceType"]
-        print (inferenceType, TYPE_VITERBI)
         if inferenceType == TYPE_VITERBI or inferenceType == TYPE_VITERBI_STRING:#this is a temporary solution to handle a typo in viterbi forward layer...
             return ViterbiForwardLayer.load(x2i)
         elif inferenceType == TYPE_GREEDY or inferenceType == TYPE_GREEDY_STRING:

From 0b57ad8db080deabdc5ab7b193e3c4a14ef59d47 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:51:28 -0700
Subject: [PATCH 099/134] add averaging models feature

---
 main/src/main/python/pytorch2onnx.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index f3d3b58c3..5fa59992a 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -60,7 +60,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--model_file', type=str, help='Filename of the model.')
+    parser.add_argument('--model_file', type=str, help='Filename of the model.', nargs='+')
     parser.add_argument('--config', type=str, help='Filename of the configuration.')
     parser.add_argument('--seed', type=int, default=1234)
     args = parser.parse_args()
@@ -68,7 +68,10 @@ def forward(self, embeddings, word_ids, charEmbedding):
     config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf')
     taskManager = TaskManager(config, args.seed)
     modelName = args.model_file
-    model = Metal.load(modelName)
+    if len(modelName)==1:
+        model = Metal.load(modelName[0])
+    else:
+        model = Metal.load_multi(modelName)
     for layers in model:
         layers.start_eval()
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()

From bab975ef44f11d207e303685c51fc61089eca6fc Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:53:31 -0700
Subject: [PATCH 100/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 5fa59992a..2df5bd32b 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -54,7 +54,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
             for il in self.intermediateLayerss[i]:
                 state = il(state, False)
             if self.finalLayers[i]:
-                state = self.finalLayers[i](state, False, None)#headPositions set to be None for now, we can add it in input list later
+                state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
         return state
 
 if __name__ == '__main__':

From 50c112740d0a0bab0e6e8ee788725e6417bcdd06 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 20:56:03 -0700
Subject: [PATCH 101/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 2df5bd32b..926ed1000 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -88,7 +88,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     torch.manual_seed(taskManager.random)
     random.seed(taskManager.random)
 
-    x2i = json.load(open(args.model_file+".json"))
+    x2i = json.load(open(args.model_file[0]+".json"))
 
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']

From 05c090a68ab9b419abd8c29243e249b74a4b7ef5 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:15:20 -0700
Subject: [PATCH 102/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 926ed1000..c0b6cdea8 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -72,6 +72,10 @@ def forward(self, embeddings, word_ids, charEmbedding):
         model = Metal.load(modelName[0])
     else:
         model = Metal.load_multi(modelName)
+
+    mtl = Metal(taskManager, model)
+    mtl.test()
+
     for layers in model:
         layers.start_eval()
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()

From 08fd0d04b25f3001b877768d662312c768fc006a Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:31:26 -0700
Subject: [PATCH 103/134] debug performance difference between torch and onnx

---
 main/src/main/python/pytorch2onnx.py | 39 ++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index c0b6cdea8..ea55627a3 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -97,6 +97,45 @@ def forward(self, embeddings, word_ids, charEmbedding):
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']
 
+    for taskId in range(0, taskManager.taskCount):
+        taskName = taskManager.tasks[taskId].taskName
+        sentences = taskManager.tasks[taskId].testSentences
+        if sentences:
+            reader = MetalRowReader()
+            for sent in sentences:
+                annotatedSentences = reader.toAnnotatedSentences(sent)
+
+                for asent in annotatedSentences:
+                    sentence = asent[0]
+                    goldLabels = asent[1]
+
+                    words = sentence.words
+
+                    char_embs = []
+                    for word in words:
+                        char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word])
+                        char_out = export_char(char_ids)
+                        char_embs.append(char_out)
+                    char_embs = np.stack(char_embs)
+                    embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+                    embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy()
+                    word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
+
+                    emissionScores = export_model(embeddings, word_ids, char_embs)
+
+                    preds = [i2t[np.argmax(es)] for es in emissionScores]
+
+                    sc = SeqScorer.f1(goldLabels, preds)
+                    scoreCountsByLabel.incAll(sc)
+
+
+    print (f"Accuracy : {scoreCountsByLabel.accuracy()}")
+    print (f"Precision : {scoreCountsByLabel.precision()}")
+    print (f"Recall on : {scoreCountsByLabel.recall()}")
+    print (f"Micro F1 : {scoreCountsByLabel.f1()}")
+    for label in scoreCountsByLabel.labels():
+        print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         testSentences = taskManager.tasks[taskId].testSentences

From f56722000e272eb7d3aa2cce7d172d22b3fc0511 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:38:36 -0700
Subject: [PATCH 104/134] debug performance difference between torch and onnx

---
 main/src/main/python/pytorch2onnx.py | 46 ++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index ea55627a3..1b46d2349 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -192,6 +192,52 @@ def forward(self, embeddings, word_ids, charEmbedding):
     ort_char = onnxruntime.InferenceSession("char.onnx")
     # compute ONNX Runtime output prediction
 
+    for taskId in range(0, taskManager.taskCount):
+        taskName = taskManager.tasks[taskId].taskName
+        sentences = taskManager.tasks[taskId].testSentences
+        if sentences:
+            reader = MetalRowReader()
+            for sent in sentences:
+                annotatedSentences = reader.toAnnotatedSentences(sent)
+
+                for asent in annotatedSentences:
+                    sentence = asent[0]
+                    goldLabels = asent[1]
+
+                    words = sentence.words
+
+                    char_embs = []
+                    for word in words:
+                        char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word])
+                        ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])}
+                        ort_outs = ort_char.run(None, ort_inputs)
+                        char_embs.append(ort_outs[0])
+                    char_embs = np.stack(char_embs)
+                    embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
+                    embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy()
+                    word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
+
+                    dummy_input = (embeddings, word_ids, char_embs)
+
+                    ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
+                    ort_outs = ort_session.run(None, ort_inputs)
+
+                    emissionScores = ort_outs[0]
+                    preds = [i2t[np.argmax(es)] for es in emissionScores]
+
+                    sc = SeqScorer.f1(goldLabels, preds)
+                    scoreCountsByLabel.incAll(sc)
+
+
+    print (f"Accuracy : {scoreCountsByLabel.accuracy()}")
+    print (f"Precision : {scoreCountsByLabel.precision()}")
+    print (f"Recall on : {scoreCountsByLabel.recall()}")
+    print (f"Micro F1 : {scoreCountsByLabel.f1()}")
+    for label in scoreCountsByLabel.labels():
+        print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+    duration = time.time() - start_time
+    print (duration)
+
     ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])}
     ort_outs = ort_char.run(None, ort_inputs)
     try:

From f8f1ca17da5a3732b7b32e5f47de42219ab93f50 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:46:43 -0700
Subject: [PATCH 105/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 1b46d2349..009f5d273 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -113,7 +113,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
 
                     char_embs = []
                     for word in words:
-                        char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word])
+                        char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
                         char_out = export_char(char_ids)
                         char_embs.append(char_out)
                     char_embs = np.stack(char_embs)

From 298bdc48a242316f3e62d5a97599f1bc40828455 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:53:06 -0700
Subject: [PATCH 106/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 009f5d273..aedbc905a 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -118,8 +118,8 @@ def forward(self, embeddings, word_ids, charEmbedding):
                         char_embs.append(char_out)
                     char_embs = np.stack(char_embs)
                     embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-                    embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy()
-                    word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
+                    embeddings = constEmbeddings.emb(embed_ids)
+                    word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
 
                     emissionScores = export_model(embeddings, word_ids, char_embs)
 

From 949522ca4322b48f57a2c6c681c9c33d38ef1f9b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 21:59:20 -0700
Subject: [PATCH 107/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index aedbc905a..b617b6c35 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -116,7 +116,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
                         char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
                         char_out = export_char(char_ids)
                         char_embs.append(char_out)
-                    char_embs = np.stack(char_embs)
+                    char_embs = torch.stack(char_embs)
                     embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
                     embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])

From d05d81b927b3c9d6bd29e843b30f4ee887c50a27 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 22:05:27 -0700
Subject: [PATCH 108/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index b617b6c35..d1b93ccda 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -97,6 +97,9 @@ def forward(self, embeddings, word_ids, charEmbedding):
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']
 
+    t2i = x2i[1]['x2i']['finalLayer']["t2i"]
+    i2t = {i:t for t, i in t2i.items()}
+
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         sentences = taskManager.tasks[taskId].testSentences

From 9bc0ac8291054c85b6579a80ffb1b103711bac37 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 22:12:06 -0700
Subject: [PATCH 109/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index d1b93ccda..4bcd3f836 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -124,7 +124,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
                     embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
 
-                    emissionScores = export_model(embeddings, word_ids, char_embs)
+                    emissionScores = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
 
                     preds = [i2t[np.argmax(es)] for es in emissionScores]
 

From ebbfdcf1081140090f59d895543e30658c18ec30 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 22:18:06 -0700
Subject: [PATCH 110/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 4bcd3f836..3675d9d48 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -8,6 +8,7 @@
 from pytorch.utils import *
 from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove
 from sequences.rowReaders import *
+from pytorch.seqScorer import *
 
 import onnx
 import onnxruntime

From 12a777dbd09057f11c5dd02150b16174c27a8d00 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 22:45:46 -0700
Subject: [PATCH 111/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 3675d9d48..28c548b60 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -100,7 +100,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
 
     t2i = x2i[1]['x2i']['finalLayer']["t2i"]
     i2t = {i:t for t, i in t2i.items()}
-
+    scoreCountsByLabel = ScoreCountsByLabel()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         sentences = taskManager.tasks[taskId].testSentences
@@ -139,7 +139,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     print (f"Micro F1 : {scoreCountsByLabel.f1()}")
     for label in scoreCountsByLabel.labels():
         print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
-
+    scoreCountsByLabel = ScoreCountsByLabel()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         testSentences = taskManager.tasks[taskId].testSentences

From 84d551786cf151b657f07b00539d15403a8c1d4c Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 22:59:56 -0700
Subject: [PATCH 112/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 47 +---------------------------
 1 file changed, 1 insertion(+), 46 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 28c548b60..7b4b65eb2 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -56,6 +56,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
                 state = il(state, False)
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
+        state = self.finalLayers[i].inference(state)
         return state
 
 if __name__ == '__main__':
@@ -196,52 +197,6 @@ def forward(self, embeddings, word_ids, charEmbedding):
     ort_char = onnxruntime.InferenceSession("char.onnx")
     # compute ONNX Runtime output prediction
 
-    for taskId in range(0, taskManager.taskCount):
-        taskName = taskManager.tasks[taskId].taskName
-        sentences = taskManager.tasks[taskId].testSentences
-        if sentences:
-            reader = MetalRowReader()
-            for sent in sentences:
-                annotatedSentences = reader.toAnnotatedSentences(sent)
-
-                for asent in annotatedSentences:
-                    sentence = asent[0]
-                    goldLabels = asent[1]
-
-                    words = sentence.words
-
-                    char_embs = []
-                    for word in words:
-                        char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word])
-                        ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])}
-                        ort_outs = ort_char.run(None, ort_inputs)
-                        char_embs.append(ort_outs[0])
-                    char_embs = np.stack(char_embs)
-                    embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-                    embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy()
-                    word_ids = np.array([w2i[word] if word in w2i else 0 for word in words])
-
-                    dummy_input = (embeddings, word_ids, char_embs)
-
-                    ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
-                    ort_outs = ort_session.run(None, ort_inputs)
-
-                    emissionScores = ort_outs[0]
-                    preds = [i2t[np.argmax(es)] for es in emissionScores]
-
-                    sc = SeqScorer.f1(goldLabels, preds)
-                    scoreCountsByLabel.incAll(sc)
-
-
-    print (f"Accuracy : {scoreCountsByLabel.accuracy()}")
-    print (f"Precision : {scoreCountsByLabel.precision()}")
-    print (f"Recall on : {scoreCountsByLabel.recall()}")
-    print (f"Micro F1 : {scoreCountsByLabel.f1()}")
-    for label in scoreCountsByLabel.labels():
-        print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
-    duration = time.time() - start_time
-    print (duration)
-
     ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])}
     ort_outs = ort_char.run(None, ort_inputs)
     try:

From a2bfc832fea368b87111259a8fdf34de6fbdcc19 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 23:22:42 -0700
Subject: [PATCH 113/134] fix bug in viterbi decoding

---
 main/src/main/python/pytorch/viterbiForwardLayer.py |  4 ++++
 main/src/main/python/pytorch2onnx.py                | 12 +++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 77c0514f9..c96ac2caa 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -124,6 +124,10 @@ def inference(self, emissionScores):
         score, labelsIds = self._viterbi_decode(emissionScores)
         return [self.i2t[i] for i in labelsIds]
 
+    def inference2(self, emissionScores):
+        score, labelsIds = self._viterbi_decode(emissionScores)
+        return labelsIds
+
     def inferenceWithScores(emissionScores):
         raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!")
 
diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 7b4b65eb2..c70e3603b 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -56,8 +56,8 @@ def forward(self, embeddings, word_ids, charEmbedding):
                 state = il(state, False)
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
-        state = self.finalLayers[i].inference(state)
-        return state
+        ids = self.finalLayers[i].inference2(state)
+        return ids
 
 if __name__ == '__main__':
 
@@ -101,6 +101,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
 
     t2i = x2i[1]['x2i']['finalLayer']["t2i"]
     i2t = {i:t for t, i in t2i.items()}
+
     scoreCountsByLabel = ScoreCountsByLabel()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
@@ -126,9 +127,9 @@ def forward(self, embeddings, word_ids, charEmbedding):
                     embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
 
-                    emissionScores = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
+                    ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
 
-                    preds = [i2t[np.argmax(es)] for es in emissionScores]
+                    preds = [i2t[i] for i in ids]
 
                     sc = SeqScorer.f1(goldLabels, preds)
                     scoreCountsByLabel.incAll(sc)
@@ -140,6 +141,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     print (f"Micro F1 : {scoreCountsByLabel.f1()}")
     for label in scoreCountsByLabel.labels():
         print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
+
     scoreCountsByLabel = ScoreCountsByLabel()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
@@ -207,7 +209,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
     try:
-        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
+        np.testing.assert_allclose(np.array(output), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
 

From 3269cd4c41eedf285178b21528a2b8fadd7d6505 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 23:28:30 -0700
Subject: [PATCH 114/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index c70e3603b..acc24c121 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -127,7 +127,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
                     embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
 
-                    ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
+                    ids = export_model(embeddings, word_ids, char_embs)
 
                     preds = [i2t[i] for i in ids]
 

From c90015c7ae7c24426426a49e176562b356474328 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 23:38:12 -0700
Subject: [PATCH 115/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index acc24c121..9a869860b 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
         ids = self.finalLayers[i].inference2(state)
-        return ids
+        return torch.LongTensor(ids)
 
 if __name__ == '__main__':
 
@@ -127,7 +127,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
                     embeddings = constEmbeddings.emb(embed_ids)
                     word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
 
-                    ids = export_model(embeddings, word_ids, char_embs)
+                    ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
 
                     preds = [i2t[i] for i in ids]
 

From 81c1bc51d51afe0748f3f2de26039cc454c9fd9a Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 23:49:18 -0700
Subject: [PATCH 116/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 9a869860b..5eb5ef047 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -205,11 +205,11 @@ def forward(self, embeddings, word_ids, charEmbedding):
         np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
-
+    print (ort_session.get_inputs())
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
     try:
-        np.testing.assert_allclose(np.array(output), ort_outs[0], rtol=1e-03, atol=1e-05)
+        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
 

From 91911ae227758c0e1561116284d766728d350cef Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Feb 2022 23:49:48 -0700
Subject: [PATCH 117/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 44 ----------------------------
 1 file changed, 44 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 5eb5ef047..54645bd94 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -99,50 +99,6 @@ def forward(self, embeddings, word_ids, charEmbedding):
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']
 
-    t2i = x2i[1]['x2i']['finalLayer']["t2i"]
-    i2t = {i:t for t, i in t2i.items()}
-
-    scoreCountsByLabel = ScoreCountsByLabel()
-    for taskId in range(0, taskManager.taskCount):
-        taskName = taskManager.tasks[taskId].taskName
-        sentences = taskManager.tasks[taskId].testSentences
-        if sentences:
-            reader = MetalRowReader()
-            for sent in sentences:
-                annotatedSentences = reader.toAnnotatedSentences(sent)
-
-                for asent in annotatedSentences:
-                    sentence = asent[0]
-                    goldLabels = asent[1]
-
-                    words = sentence.words
-
-                    char_embs = []
-                    for word in words:
-                        char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
-                        char_out = export_char(char_ids)
-                        char_embs.append(char_out)
-                    char_embs = torch.stack(char_embs)
-                    embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
-                    embeddings = constEmbeddings.emb(embed_ids)
-                    word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
-
-                    ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy()
-
-                    preds = [i2t[i] for i in ids]
-
-                    sc = SeqScorer.f1(goldLabels, preds)
-                    scoreCountsByLabel.incAll(sc)
-
-
-    print (f"Accuracy : {scoreCountsByLabel.accuracy()}")
-    print (f"Precision : {scoreCountsByLabel.precision()}")
-    print (f"Recall on : {scoreCountsByLabel.recall()}")
-    print (f"Micro F1 : {scoreCountsByLabel.f1()}")
-    for label in scoreCountsByLabel.labels():
-        print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}")
-
-    scoreCountsByLabel = ScoreCountsByLabel()
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
         testSentences = taskManager.tasks[taskId].testSentences

From b1be4b57b78e82b529484c84dd2e3edf01f78a4d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:03:13 -0700
Subject: [PATCH 118/134] debug decoder

---
 main/src/main/python/pytorch/greedyForwardLayer.py  | 4 ++++
 main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +-
 main/src/main/python/pytorch2onnx.py                | 7 ++-----
 main/src/main/python/test_onnx.py                   | 3 +--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index f74829577..56d668c02 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -28,6 +28,10 @@ def inference(self, emissionScores):
         emissionScores = emissionScoresToArrays(emissionScores)
         return [self.i2t[np.argmax(es)] for es in emissionScores]
 
+    def inference2(self, emissionScores):
+        emissionScores = emissionScoresToArrays(emissionScores)
+        return torch.LongTensor([np.argmax(es) for es in emissionScores])
+
     def inferenceWithScores(self, emissionScores):
         emissionScores = emissionScoresToArrays(emissionScores)
         return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores]
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index c96ac2caa..a4be12236 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -126,7 +126,7 @@ def inference(self, emissionScores):
 
     def inference2(self, emissionScores):
         score, labelsIds = self._viterbi_decode(emissionScores)
-        return labelsIds
+        return torch.LongTensor(labelsIds)
 
     def inferenceWithScores(emissionScores):
         raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!")
diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 54645bd94..112401178 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -56,8 +56,8 @@ def forward(self, embeddings, word_ids, charEmbedding):
                 state = il(state, False)
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
-        ids = self.finalLayers[i].inference2(state)
-        return torch.LongTensor(ids)
+        ids = self.finalLayers[-1].inference2(state)
+        return ids
 
 if __name__ == '__main__':
 
@@ -75,9 +75,6 @@ def forward(self, embeddings, word_ids, charEmbedding):
     else:
         model = Metal.load_multi(modelName)
 
-    mtl = Metal(taskManager, model)
-    mtl.test()
-
     for layers in model:
         layers.start_eval()
     constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams()
diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index dc5d7ae1e..fb8e76757 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -66,8 +66,7 @@
                     ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
                     ort_outs = ort_session.run(None, ort_inputs)
 
-                    emissionScores = ort_outs[0]
-                    preds = [i2t[np.argmax(es)] for es in emissionScores]
+                    preds = [i2t[i] for i in ort_outs[0]]
 
                     sc = SeqScorer.f1(goldLabels, preds)
                     scoreCountsByLabel.incAll(sc)

From 40654a6e0a63365711ddeefd646280cd86bd41d9 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:07:40 -0700
Subject: [PATCH 119/134] decoder error...

---
 main/src/main/python/pytorch/greedyForwardLayer.py  | 2 +-
 main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +-
 main/src/main/python/pytorch2onnx.py                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index 56d668c02..19d788977 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -30,7 +30,7 @@ def inference(self, emissionScores):
 
     def inference2(self, emissionScores):
         emissionScores = emissionScoresToArrays(emissionScores)
-        return torch.LongTensor([np.argmax(es) for es in emissionScores])
+        return [np.argmax(es) for es in emissionScores]
 
     def inferenceWithScores(self, emissionScores):
         emissionScores = emissionScoresToArrays(emissionScores)
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index a4be12236..c96ac2caa 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -126,7 +126,7 @@ def inference(self, emissionScores):
 
     def inference2(self, emissionScores):
         score, labelsIds = self._viterbi_decode(emissionScores)
-        return torch.LongTensor(labelsIds)
+        return labelsIds
 
     def inferenceWithScores(emissionScores):
         raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!")
diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 112401178..58439d0cc 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -162,7 +162,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
     try:
-        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
+        np.testing.assert_allclose(output, ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
 

From 7cda4dd57a6896438aca13ca1d22dd9eb4d1f341 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:23:31 -0700
Subject: [PATCH 120/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 58439d0cc..4abc154d7 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
         ids = self.finalLayers[-1].inference2(state)
-        return ids
+        return [ids]
 
 if __name__ == '__main__':
 

From 7e673e74078d52e7364c6d3fc70ff53fa24caacc Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:29:24 -0700
Subject: [PATCH 121/134] Update viterbiForwardLayer.py

---
 main/src/main/python/pytorch/viterbiForwardLayer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index c96ac2caa..4b2e14060 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -125,6 +125,7 @@ def inference(self, emissionScores):
         return [self.i2t[i] for i in labelsIds]
 
     def inference2(self, emissionScores):
+        print (self.transitions)
         score, labelsIds = self._viterbi_decode(emissionScores)
         return labelsIds
 

From 1857a5ffc1f14c364235b217e1ee674b0dcabb23 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:42:33 -0700
Subject: [PATCH 122/134] trying to fix the viterbi decoder

---
 main/src/main/python/pytorch/greedyForwardLayer.py  | 3 +--
 main/src/main/python/pytorch/viterbiForwardLayer.py | 5 ++---
 main/src/main/python/pytorch2onnx.py                | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py
index 19d788977..145878c91 100644
--- a/main/src/main/python/pytorch/greedyForwardLayer.py
+++ b/main/src/main/python/pytorch/greedyForwardLayer.py
@@ -29,8 +29,7 @@ def inference(self, emissionScores):
         return [self.i2t[np.argmax(es)] for es in emissionScores]
 
     def inference2(self, emissionScores):
-        emissionScores = emissionScoresToArrays(emissionScores)
-        return [np.argmax(es) for es in emissionScores]
+        return torch.argmax(emissionScores, dim=1)
 
     def inferenceWithScores(self, emissionScores):
         emissionScores = emissionScoresToArrays(emissionScores)
diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index 4b2e14060..f3edbeb4f 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -125,9 +125,8 @@ def inference(self, emissionScores):
         return [self.i2t[i] for i in labelsIds]
 
     def inference2(self, emissionScores):
-        print (self.transitions)
-        score, labelsIds = self._viterbi_decode(emissionScores)
-        return labelsIds
+        def inference2(self, emissionScores):
+        return torch.argmax(emissionScores, dim=1)
 
     def inferenceWithScores(emissionScores):
         raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!")
diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 4abc154d7..112401178 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
         ids = self.finalLayers[-1].inference2(state)
-        return [ids]
+        return ids
 
 if __name__ == '__main__':
 
@@ -162,7 +162,7 @@ def forward(self, embeddings, word_ids, charEmbedding):
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
     try:
-        np.testing.assert_allclose(output, ort_outs[0], rtol=1e-03, atol=1e-05)
+        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
 

From 14beb420178e4f892a9ba7855ae19ac250723c2e Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Feb 2022 00:42:57 -0700
Subject: [PATCH 123/134] Update viterbiForwardLayer.py

---
 main/src/main/python/pytorch/viterbiForwardLayer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py
index f3edbeb4f..77025fd70 100644
--- a/main/src/main/python/pytorch/viterbiForwardLayer.py
+++ b/main/src/main/python/pytorch/viterbiForwardLayer.py
@@ -125,7 +125,6 @@ def inference(self, emissionScores):
         return [self.i2t[i] for i in labelsIds]
 
     def inference2(self, emissionScores):
-        def inference2(self, emissionScores):
         return torch.argmax(emissionScores, dim=1)
 
     def inferenceWithScores(emissionScores):

From 3aee7e7333c33eb3656b9f71f111ddd3214f29cc Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Mar 2022 22:57:23 -0700
Subject: [PATCH 124/134] add other embeddings to onnx model

---
 main/src/main/python/pytorch2onnx.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 112401178..2533ea19d 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -41,14 +41,37 @@ def __init__(self, model):
         for i, layers in enumerate(model):
             if layers.initialLayer is not None:
                 self.word_lookup = layers.initialLayer.wordLookupParameters
+                self.postag_lookup = layers.initialLayer.posTagLookupParameters
+                self.netag_lookup = layers.initialLayer.neTagLookupParameters
+                self.dist_lookup = layers.initialLayer.distanceLookupParameters
+                self.pos_lookup = layers.initialLayer.positionLookupParameters
+                self.useIsPredicate = layers.initialLayer.useIsPredicate
             self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers)
             self.finalLayers[i] = layers.finalLayer
         self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
         self.finalLayers = nn.ModuleList(self.finalLayers)
-    def forward(self, embeddings, word_ids, charEmbedding):
+    def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, headPositions=None):
         # Can I assuem there is only one initial layer?
         learnedWordEmbeddings = self.word_lookup(word_ids)
-        embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+        posTagEmbed = self.postag_lookup(tags) if tags and self.postag_lookup else None
+        neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None
+        predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None
+        if headPositions and self.dist_lookup:
+            dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)]
+            for i in range(dists):
+                if dists[i] < -self.distanceWindowSize:
+                    dists[i] = self.distanceWindowSize-1
+                if dists[i] > self.distanceWindowSize:
+                    dist[i] = self.distanceWindowSize+1
+            distanceEmbedding = self.dist_lookup(torch.LongTensor(dists))
+        else:
+            distanceEmbedding = None
+        if self.pos_lookup:
+            values = [i if i<100 else 100 for i, wid in enumerate(word_ids)]
+            positionEmbedding = self.pos_lookup(torch.LongTensor(values))
+        else:
+            positionEmbedding = None
+        embedParts = [embeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
         embedParts = [ep for ep in embedParts if ep is not None]
         state = torch.cat(embedParts, dim=1)
         for i in range(self.model_length):

From cd46faa68e10e4f9ff95697cec2cb87bb33a4a27 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 2 Mar 2022 23:49:43 -0700
Subject: [PATCH 125/134] Update embeddingLayer.py

---
 main/src/main/python/pytorch/embeddingLayer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 21e199116..ce8dffef3 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -142,7 +142,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
         #
         if headPositions and self.distanceLookupParameters:
             dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)]
-            for i in range(dists):
+            for i in range(len(dists)):
                 if dists[i] < -self.distanceWindowSize:
                     dists[i] = self.distanceWindowSize-1
                 if dists[i] > self.distanceWindowSize:

From fd514a22e8a25e0a0f61833631bb707e0393e039 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Mar 2022 00:05:09 -0700
Subject: [PATCH 126/134] fix bug in distance embeddings

---
 main/src/main/python/pytorch/embeddingLayer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index ce8dffef3..6239e8746 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -141,12 +141,12 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
         # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize]
         #
         if headPositions and self.distanceLookupParameters:
-            dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)]
+            dists = [i-predicatePosition+51 for i, predicatePosition in enumerate(headPositions)]
             for i in range(len(dists)):
-                if dists[i] < -self.distanceWindowSize:
-                    dists[i] = self.distanceWindowSize-1
-                if dists[i] > self.distanceWindowSize:
-                    dist[i] = self.distanceWindowSize+1
+                if dists[i] < 1:
+                    dists[i] = 0
+                if dists[i] > self.distanceWindowSize + 51:
+                    dists[i] = self.distanceWindowSize + 52
             distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists))
         else:
             distanceEmbedding = None

From 278697147ea515e8299e4162c4282a5f18e7f1a9 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Mar 2022 00:16:11 -0700
Subject: [PATCH 127/134] Update embeddingLayer.py

---
 main/src/main/python/pytorch/embeddingLayer.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 6239e8746..86c8b8414 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -141,12 +141,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
         # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize]
         #
         if headPositions and self.distanceLookupParameters:
-            dists = [i-predicatePosition+51 for i, predicatePosition in enumerate(headPositions)]
-            for i in range(len(dists)):
-                if dists[i] < 1:
-                    dists[i] = 0
-                if dists[i] > self.distanceWindowSize + 51:
-                    dists[i] = self.distanceWindowSize + 52
+            dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)]
             distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists))
         else:
             distanceEmbedding = None

From 9b97c68773717a2ff669b970102967a19fa4acbd Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 3 Mar 2022 00:40:02 -0700
Subject: [PATCH 128/134] Update embeddingLayer.py

---
 main/src/main/python/pytorch/embeddingLayer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py
index 86c8b8414..e3479119a 100644
--- a/main/src/main/python/pytorch/embeddingLayer.py
+++ b/main/src/main/python/pytorch/embeddingLayer.py
@@ -66,7 +66,7 @@ def __init__(self, w2i, # word to index
 
         posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0
         neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0
-        distanceDim = distanceWindowSize if distanceLookupParameters else 0
+        distanceDim = distanceEmbeddingSize if distanceLookupParameters else 0
         predicateDim = 1 if distanceLookupParameters and useIsPredicate else 0
         positionDim = positionEmbeddingSize if positionLookupParameters else 0
         self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim
@@ -132,7 +132,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h
         # 1 if this word is the predicate
         #
         if headPositions and self.useIsPredicate:
-            predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)])
+            predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]).unsqueeze(1)
         else:
             predEmbed = None
 

From a20b2c58c789cda22784a3ee6dd055e2cf06e329 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 9 Mar 2022 15:38:35 -0700
Subject: [PATCH 129/134] Update pytorch2onnx.py

---
 main/src/main/python/pytorch2onnx.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 2533ea19d..12a2c9a80 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -57,12 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
         neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None
         predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None
         if headPositions and self.dist_lookup:
-            dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)]
-            for i in range(dists):
-                if dists[i] < -self.distanceWindowSize:
-                    dists[i] = self.distanceWindowSize-1
-                if dists[i] > self.distanceWindowSize:
-                    dist[i] = self.distanceWindowSize+1
+            dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)]
             distanceEmbedding = self.dist_lookup(torch.LongTensor(dists))
         else:
             distanceEmbedding = None
@@ -118,6 +113,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
 
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']
+    t2i = x2i[0]['x2i']['initialLayer']['tag2i']
+    n2i = x2i[0]['x2i']['initialLayer']['ne2i']
 
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
@@ -131,6 +128,9 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
             goldLabels = asent[1]
 
             words = sentence.words
+            tags = sentence.posTags
+            nes = sentence.neTags
+            headPositions = sentence.headPositions
             char_embs = []
             for word in words:
                 char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
@@ -140,9 +140,11 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
             embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
             embeddings = constEmbeddings.emb(embed_ids)
             word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
-            output = export_model(embeddings, word_ids, char_embs)
+            tags_ids = torch.LongTensor([t2i[tag] if tag in t2i else 0 for tag in tags])
+            nes_ids = torch.LongTensor([n2i[ne] if ne in n2i else 0 for ne in nes])
+            output = export_model(embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions)
 
-            dummy_input = (embeddings, word_ids, char_embs)
+            dummy_input = (embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions)
 
     torch.onnx.export(export_char,
                     char_ids,
@@ -159,11 +161,14 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
                   export_params=True,        # store the trained parameter weights inside the model file
                   opset_version=10,          # the ONNX version to export the model to
                   do_constant_folding=True,  # whether to execute constant folding for optimization
-                  input_names  = ['embed', 'words', 'chars'],   # the model's input names
+                  input_names  = ['embed', 'words', 'chars', 'tags_ids', 'nes_ids', 'headPositions'],   # the model's input names
                   output_names = ['output'], # the model's output names
                   dynamic_axes = {'embed' : {0 : 'sentence length'},
                                   'words' : {0 : 'sentence length'},
                                   'chars' : {0 : 'sentence length'},
+                                  'tags_ids' : {0 : 'sentence length'},
+                                  'nes_ids' : {0 : 'sentence length'},
+                                  'headPositions' : {0 : 'sentence length'},
                                   'output': {0 : 'sentence length'}})
 
     onnx_model = onnx.load("model.onnx")

From a0022227c6e20eb9bca650eacff28d3be0ed588d Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Wed, 9 Mar 2022 23:49:13 -0700
Subject: [PATCH 130/134] implement viterbi decoding

---
 main/src/main/python/pytorch2onnx.py | 56 ++++++++--------------------
 main/src/main/python/test_onnx.py    | 48 +++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py
index 12a2c9a80..080b0efd8 100644
--- a/main/src/main/python/pytorch2onnx.py
+++ b/main/src/main/python/pytorch2onnx.py
@@ -15,6 +15,8 @@
 
 import json
 
+import numpy as np
+
 def to_numpy(tensor):
     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
 
@@ -41,32 +43,14 @@ def __init__(self, model):
         for i, layers in enumerate(model):
             if layers.initialLayer is not None:
                 self.word_lookup = layers.initialLayer.wordLookupParameters
-                self.postag_lookup = layers.initialLayer.posTagLookupParameters
-                self.netag_lookup = layers.initialLayer.neTagLookupParameters
-                self.dist_lookup = layers.initialLayer.distanceLookupParameters
-                self.pos_lookup = layers.initialLayer.positionLookupParameters
-                self.useIsPredicate = layers.initialLayer.useIsPredicate
             self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers)
             self.finalLayers[i] = layers.finalLayer
         self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss)
         self.finalLayers = nn.ModuleList(self.finalLayers)
-    def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, headPositions=None):
+    def forward(self, embeddings, word_ids, charEmbedding):
         # Can I assuem there is only one initial layer?
         learnedWordEmbeddings = self.word_lookup(word_ids)
-        posTagEmbed = self.postag_lookup(tags) if tags and self.postag_lookup else None
-        neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None
-        predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None
-        if headPositions and self.dist_lookup:
-            dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)]
-            distanceEmbedding = self.dist_lookup(torch.LongTensor(dists))
-        else:
-            distanceEmbedding = None
-        if self.pos_lookup:
-            values = [i if i<100 else 100 for i, wid in enumerate(word_ids)]
-            positionEmbedding = self.pos_lookup(torch.LongTensor(values))
-        else:
-            positionEmbedding = None
-        embedParts = [embeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed]
+        embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]
         embedParts = [ep for ep in embedParts if ep is not None]
         state = torch.cat(embedParts, dim=1)
         for i in range(self.model_length):
@@ -74,8 +58,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
                 state = il(state, False)
             if self.finalLayers[i]:
                 state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later
-        ids = self.finalLayers[-1].inference2(state)
-        return ids
+        transitions = self.finalLayers[-1].transitions
+        return state, transitions
 
 if __name__ == '__main__':
 
@@ -113,8 +97,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
 
     c2i = x2i[0]['x2i']['initialLayer']['c2i']
     w2i = x2i[0]['x2i']['initialLayer']['w2i']
-    t2i = x2i[0]['x2i']['initialLayer']['tag2i']
-    n2i = x2i[0]['x2i']['initialLayer']['ne2i']
+    t2i = x2i[1]['x2i']['finalLayer']["t2i"]
+    i2t = {i:t for t, i in t2i.items()}
 
     for taskId in range(0, taskManager.taskCount):
         taskName = taskManager.tasks[taskId].taskName
@@ -128,9 +112,7 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
             goldLabels = asent[1]
 
             words = sentence.words
-            tags = sentence.posTags
-            nes = sentence.neTags
-            headPositions = sentence.headPositions
+            
             char_embs = []
             for word in words:
                 char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])
@@ -140,11 +122,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
             embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words])
             embeddings = constEmbeddings.emb(embed_ids)
             word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words])
-            tags_ids = torch.LongTensor([t2i[tag] if tag in t2i else 0 for tag in tags])
-            nes_ids = torch.LongTensor([n2i[ne] if ne in n2i else 0 for ne in nes])
-            output = export_model(embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions)
-
-            dummy_input = (embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions)
+            state, transitions = export_model(embeddings, word_ids, char_embs)
+            dummy_input = (embeddings, word_ids, char_embs)
 
     torch.onnx.export(export_char,
                     char_ids,
@@ -161,15 +140,12 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
                   export_params=True,        # store the trained parameter weights inside the model file
                   opset_version=10,          # the ONNX version to export the model to
                   do_constant_folding=True,  # whether to execute constant folding for optimization
-                  input_names  = ['embed', 'words', 'chars', 'tags_ids', 'nes_ids', 'headPositions'],   # the model's input names
-                  output_names = ['output'], # the model's output names
+                  input_names  = ['embed', 'words', 'chars'],   # the model's input names
+                  output_names = ['state', 'transitions'], # the model's output names
                   dynamic_axes = {'embed' : {0 : 'sentence length'},
                                   'words' : {0 : 'sentence length'},
                                   'chars' : {0 : 'sentence length'},
-                                  'tags_ids' : {0 : 'sentence length'},
-                                  'nes_ids' : {0 : 'sentence length'},
-                                  'headPositions' : {0 : 'sentence length'},
-                                  'output': {0 : 'sentence length'}})
+                                  'state': {0 : 'sentence length'}})
 
     onnx_model = onnx.load("model.onnx")
     onnx.checker.check_model(onnx_model)
@@ -186,11 +162,11 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head
         np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
-    print (ort_session.get_inputs())
     ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)}
     ort_outs = ort_session.run(None, ort_inputs)
+    
     try:
-        np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
+        np.testing.assert_allclose(state.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05)
     except AssertionError as e:
         print (e)
 
diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py
index fb8e76757..7c33d4894 100644
--- a/main/src/main/python/test_onnx.py
+++ b/main/src/main/python/test_onnx.py
@@ -4,6 +4,50 @@
 from pytorch.seqScorer import *
 import time
 
+def viterbi_decode(feats, transitions, t2i):
+    backpointers = []
+
+    # Initialize the viterbi variables in log space
+    init_vvars = np.full((1, len(t2i)), -10000.)
+    init_vvars[0][t2i[START_TAG]] = 0
+
+    # forward_var at step i holds the viterbi variables for step i-1
+    forward_var = init_vvars
+    for feat in feats:
+        bptrs_t = []  # holds the backpointers for this step
+        viterbivars_t = []  # holds the viterbi variables for this step
+
+        for next_tag in range(len(t2i)):
+            # next_tag_var[i] holds the viterbi variable for tag i at the
+            # previous step, plus the score of transitioning
+            # from tag i to next_tag.
+            # We don't include the emission scores here because the max
+            # does not depend on them (we add them in below)
+            next_tag_var = forward_var + transitions[next_tag]
+            best_tag_id = np.argmax(next_tag_var, 1)[0]
+            bptrs_t.append(best_tag_id)
+            viterbivars_t.append(next_tag_var[0][best_tag_id].reshape(1))
+        # Now add in the emission scores, and assign forward_var to the set
+        # of viterbi variables we just computed
+        forward_var = (np.concatenate(viterbivars_t) + feat).reshape(1, -1)
+        backpointers.append(bptrs_t)
+
+    # Transition to STOP_TAG
+    terminal_var = forward_var + transitions[t2i[STOP_TAG]]
+    best_tag_id = np.argmax(terminal_var, 1)[0]
+    path_score = terminal_var[0][best_tag_id]
+
+    # Follow the back pointers to decode the best path.
+    best_path = [best_tag_id]
+    for bptrs_t in reversed(backpointers):
+        best_tag_id = bptrs_t[best_tag_id]
+        best_path.append(best_tag_id)
+    # Pop off the start tag (we dont want to return that to the caller)
+    start = best_path.pop()
+    assert start == t2i[START_TAG]  # Sanity check
+    best_path.reverse()
+    return path_score, best_path
+
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
@@ -66,7 +110,9 @@
                     ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)}
                     ort_outs = ort_session.run(None, ort_inputs)
 
-                    preds = [i2t[i] for i in ort_outs[0]]
+                    _, ids = viterbi_decode(ort_outs[0], ort_outs[1], t2i)
+
+                    preds = [i2t[i] for i in ids]
 
                     sc = SeqScorer.f1(goldLabels, preds)
                     scoreCountsByLabel.incAll(sc)

From 743bbc532f162b522ef00911f9b1975451e6a43b Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 10 Mar 2022 11:02:21 -0700
Subject: [PATCH 131/134] remove pick span and transduce to simplify the model

---
 main/src/main/python/pytorch/forwardLayer.py | 38 ++++++++------------
 main/src/main/python/pytorch/rnnLayer.py     |  2 +-
 main/src/main/python/pytorch/utils.py        | 30 ++--------------
 3 files changed, 18 insertions(+), 52 deletions(-)

diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py
index 33040cf95..d071c66a8 100644
--- a/main/src/main/python/pytorch/forwardLayer.py
+++ b/main/src/main/python/pytorch/forwardLayer.py
@@ -25,48 +25,40 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d
         self.inDim = spanLength(spans) if spans is not None else inputSize
         self.outDim = len(t2i)
 
-
-    def pickSpan(self, v, i):
-        if self.spans is None:
-            return v
-        else:
-            # Zheng: Will spans overlap?
-            vs = list()
-            for span in self.spans:
-                e = torch.index_select(v, i, torch.tensor(range(span[0], span[1])))
-                vs.append(e)
-            return torch.cat(vs, dim=i)
+    # remove pick span part to simplify the ONNX converting
+    # def pickSpan(self, v, i):
+    #     if self.spans is None:
+    #         return v
+    #     else:
+    #         # Zheng: Will spans overlap?
+    #         vs = list()
+    #         for span in self.spans:
+    #             e = torch.index_select(v, i, torch.tensor(range(span[0], span[1])))
+    #             vs.append(e)
+    #         return torch.cat(vs, dim=i)
 
     def forward(self, inputExpressions, headPositionsOpt = None):
         if not self.isDual:
             # Zheng: Why the for loop here? Can we just use matrix manipulation?
-            argExp = self.dropout(self.pickSpan(inputExpressions, 1))
+            argExp = self.dropout(inputExpressions)
             emissionScores = self.dropout(self.pH(argExp))
             if self.nonlinearity == NONLIN_TANH:
                 emissionScores = F.tanh(emissionScores)
             elif self.nonlinearity == NONLIN_RELU:
                 emissionScores = F.relu(emissionScores)
-            # for i, e in enumerate(inputExpressions):
-            #     argExp = self.dropout(self.pickSpan(e))
-            #     l1 = self.dropout(self.pH(argExp))
-            #     if self.nonlinearity == NONLIN_TANH:
-            #         l1 = F.tanh(l1)
-            #     elif self.nonlinearity == NONLIN_RELU:
-            #         l1 = F.relu(l1)
-            #     emissionScores.append(l1)
         else:
             emissionScores = list()
             if headPositionsOpt is None:
                 raise RuntimeError("ERROR: dual task without information about head positions!")
             for i, e in enumerate(inputExpressions):
                 headPosition = headPositionsOpt[i]
-                argExp = self.dropout(self.pickSpan(e, 0))
+                argExp = self.dropout(e)
                 if headPosition >= 0:
                     # there is an explicit head in the sentence
-                    predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0))
+                    predExp = self.dropout(inputExpressions[headPosition])
                 else:
                     # the head is root. we used a dedicated Parameter for root
-                    predExp = self.dropout(self.pickSpan(self.pRoot, 0))
+                    predExp = self.dropout(self.pRoot)
                 ss = torch.cat([argExp, predExp])
                 l1 = self.dropout(self.pH(ss))
                 if self.nonlinearity == NONLIN_TANH:
diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py
index f83fb2420..32cb6edb1 100644
--- a/main/src/main/python/pytorch/rnnLayer.py
+++ b/main/src/main/python/pytorch/rnnLayer.py
@@ -28,7 +28,7 @@ def forward(self, inputExpressions, dropout):
 
         assert(inputExpressions is not None)
 
-        States = transduce(inputExpressions, self.wordRnnBuilder)
+        States, _ = self.wordRnnBuilder(inputExpressions.unsqueeze(1))
         States = States.squeeze(1)
         if self.useHighwayConnections:
             States = torch.cat([States, inputExpressions], dim=1)
diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py
index 2d9775eae..abcb1e6f9 100644
--- a/main/src/main/python/pytorch/utils.py
+++ b/main/src/main/python/pytorch/utils.py
@@ -42,19 +42,15 @@ def save(file, values, comment):
     file.write("\n")
 
 def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder):
-    hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]))
-    output = transduce(charEmbeddings, charRnnBuilder)
+    output, _ = charRnnBuilder(charEmbeddings.unsqueeze(1))
     result = output.squeeze(1)[-1]
-    # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
     return result
 
 def mkCharacterEmbedding2(char_ids, charLookupParameters, charRnnBuilder):
-    hidden_dim = charRnnBuilder.hidden_size
     charEmbeddings = charLookupParameters(char_ids)
-    output = transduce(charEmbeddings, charRnnBuilder)
+    output, _ = charRnnBuilder(charEmbeddings.unsqueeze(1))
     result = output.squeeze(1)[-1]
-    # Zheng: Not sure if this is the right way to concatenate the two direction hidden states
     return result
 
 def readString2Ids(s2iFilename):
@@ -75,28 +71,6 @@ def readChar2Ids(s2iFilename):
                 s2i[chr(int(k))] = int(v)
     return s2i
 
-def transduce(embeddings, builder):
-
-    builder = builder.float()
-
-    hidden_dim = builder.hidden_size
-    bi_direct = builder.bidirectional
-    mode = builder.mode
-
-    if mode == 'LSTM':
-        if bi_direct:
-            # change 1 to the layers we need
-            output, (h, c) = builder(embeddings.unsqueeze(1))
-        else:
-            output, (h, c) = builder(embeddings.unsqueeze(1))
-    elif mode == 'GRU':
-        if bi_direct:
-            output, h = builder(embeddings.unsqueeze(1))
-        else:
-            output, h = builder(embeddings.unsqueeze(1))
-
-    return output
-
 def sentenceLossGreedy(emissionScoresForSeq, golds):
     assert(emissionScoresForSeq.size(0) == len(golds))
     criterion = nn.CrossEntropyLoss()

From ca34b1c0a2310fb2a391a18a1494748ad5fbdedd Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Thu, 10 Mar 2022 11:06:26 -0700
Subject: [PATCH 132/134] Update mtl-en-pos-chunk-srlp.conf

---
 .../org/clulab/mtl-en-pos-chunk-srlp.conf     | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf b/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf
index 828fd973d..b23692fff 100644
--- a/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf
@@ -9,7 +9,7 @@ mtl {
 			learnedWordEmbeddingSize = 128
 			charEmbeddingSize = 32
 			charRnnStateSize = 16
-			c2i = "org/clulab/c2i-en.txt"
+			c2i = "../resources/org/clulab/c2i-en.txt"
 		}
 
 		intermediate1 {
@@ -21,9 +21,9 @@ mtl {
 
 	task1 {
 		name = "En POS tagging"
-		train = "dynet/en/pos/train.txt"
-		dev = "dynet/en/pos/dev.txt"
-		test = "dynet/en/pos/test.txt"
+		train = "/data/nlp/corpora/processors-dynet/en/pos/train.txt"
+		dev = "/data/nlp/corpora/processors-dynet/en/pos/dev.txt"
+		test = "/data/nlp/corpora/processors-dynet/en/pos/test.txt"
 
 		layers {
 			final {
@@ -34,9 +34,9 @@ mtl {
 
 	task2 {
 		name = "En chunking"
-		train = "dynet/en/chunking/train.txt"
-		dev = "dynet/en/chunking/test.txt"
-		test = "dynet/en/chunking/test.txt"
+		train = "/data/nlp/corpora/processors-dynet/en/chunking/train.txt"
+		dev = "/data/nlp/corpora/processors-dynet/en/chunking/test.txt"
+		test = "/data/nlp/corpora/processors-dynet/en/chunking/test.txt"
 
 		layers {
 			final {
@@ -47,9 +47,9 @@ mtl {
 
 	task3 {
 		name = "En SRL predicates"
-		train = "dynet/en/srl/train.preds"
-		dev = "dynet/en/srl/dev.preds"
-		test = "dynet/en/srl/test-wsj.preds"
+		train = "/data/nlp/corpora/processors-dynet/en/srl/train.preds"
+		dev = "/data/nlp/corpora/processors-dynet/en/srl/dev.preds"
+		test = "/data/nlp/corpora/processors-dynet/en/srl/test-wsj.preds"
 
 		layers {
 			final {

From 52d903c04bf8d2f7f0241209fcacc7b544507951 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Sun, 20 Mar 2022 22:10:30 -0700
Subject: [PATCH 133/134] save the json only once to save memory and space

---
 main/src/main/python/pytorch/layers.py |  7 +-----
 main/src/main/python/pytorch/metal.py  | 31 ++++++++++++++++----------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py
index b93d61edc..926bc6606 100644
--- a/main/src/main/python/pytorch/layers.py
+++ b/main/src/main/python/pytorch/layers.py
@@ -74,20 +74,15 @@ def start_eval(self):
 
     def get_state_dict(self):
         params = dict()
-        j_params = dict()
         if self.initialLayer is not None:
             params['initialLayer'] = self.initialLayer.state_dict()
-            j_params['initialLayer'] = {k:v.data.tolist() for k, v in params['initialLayer'].items()}
         if self.intermediateLayers:
             params['intermediateLayers'] = list()
-            j_params['intermediateLayers'] = list()
         for il in self.intermediateLayers:
             params['intermediateLayers'].append(il.state_dict())
-            j_params['intermediateLayers'].append({k:v.data.tolist() for k, v in params['intermediateLayers'][-1].items()})
         if self.finalLayer is not None:
             params['finalLayer'] = self.finalLayer.state_dict()
-            j_params['finalLayer'] = {k:v.data.tolist() for k, v in params['finalLayer'].items()}
-        return params, j_params
+        return params
 
     def load_state_dict(self, params):
         if self.initialLayer is not None:
diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py
index 59b009f59..1c6deb81d 100644
--- a/main/src/main/python/pytorch/metal.py
+++ b/main/src/main/python/pytorch/metal.py
@@ -277,12 +277,14 @@ def test(self):
     def save(self, baseFilename):
 
         params = list()
-        j_params = list()
+        if "-epoch0" in baseFilename:
+            j_params = list()
         for layers in self.model:
-            sd, j_sd = layers.get_state_dict()
-            x2i = layers.saveX2i()
-            params.append({"model": sd, "x2i": x2i})
-            j_params.append({"x2i": x2i})
+            sd = layers.get_state_dict()
+            params.append(sd)
+            if "-epoch0" in baseFilename:
+                x2i = layers.saveX2i()
+                j_params.append({"x2i": x2i})
 
         # torch pickle save
         try:
@@ -292,8 +294,9 @@ def save(self, baseFilename):
             print("[Warning: Saving failed... continuing anyway.]")
 
         # We can also save as text json file:
-        with open(baseFilename+".json", "w") as f:
-            f.write(json.dumps(j_params))
+        if "-epoch0" in baseFilename:
+            with open(baseFilename.replace("-epoch0", "")+".json", "w") as f:
+                f.write(json.dumps(j_params))
 
 
     @classmethod
@@ -301,9 +304,11 @@ def load(cls, modelFilenamePrefix):
         print (f"Loading MTL model from {modelFilenamePrefix}...")
         layersSeq = list()
         checkpoint = torch.load(modelFilenamePrefix+".torch")
-        for param in checkpoint:
-            layers = Layers.loadX2i(param['x2i'])
-            layers.load_state_dict(param['model'])
+        with open(modelFilenamePrefix+".json") as f:
+            x2i = josn.load(f)
+        for i, param in enumerate(checkpoint):
+            layers = Layers.loadX2i(x2i[i])
+            layers.load_state_dict(param)
             layersSeq.append(layers)
 
         print (f"Loading MTL model from {modelFilenamePrefix} complete.")
@@ -317,9 +322,11 @@ def load_multi(cls, models):
         layersSeq = list()
         for model in models:
             checkpoint = torch.load(model+".torch")
+            with open(model+".json") as f:
+                x2i = josn.load(f)
             for i, param in enumerate(checkpoint):
-                layers = Layers.loadX2i(param['x2i'])
-                layers.load_state_dict(param['model'])
+                layers = Layers.loadX2i(x2i[i])
+                layers.load_state_dict(param)
                 if len(layersSeq)<len(checkpoint):
                     layersSeq.append(layers)
                 else:

From 2057c8ad3df7781f5c8efd8b6fa5ce5553be0dd4 Mon Sep 17 00:00:00 2001
From: Zheng Tang <oscartang1120@gmail.com>
Date: Sun, 20 Mar 2022 22:23:53 -0700
Subject: [PATCH 134/134] Update mtl-en-srla.conf

---
 main/src/main/resources/org/clulab/mtl-en-srla.conf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/main/src/main/resources/org/clulab/mtl-en-srla.conf b/main/src/main/resources/org/clulab/mtl-en-srla.conf
index 6dcf5bbd8..8f5181484 100644
--- a/main/src/main/resources/org/clulab/mtl-en-srla.conf
+++ b/main/src/main/resources/org/clulab/mtl-en-srla.conf
@@ -16,9 +16,9 @@ mtl {
       distanceEmbeddingSize = 16
       distanceWindowSize = 50
       useIsPredicate = true
-      c2i = "org/clulab/c2i-en.txt"
-      tag2i = "org/clulab/tag2i-en.txt"
-      ne2i = "org/clulab/ne2i-en.txt"
+      c2i = "../resources/org/clulab/c2i-en.txt"
+      tag2i = "../resources/org/clulab/tag2i-en.txt"
+      ne2i = "../resources/org/clulab/ne2i-en.txt"
     }
 
     intermediate1 {
@@ -31,9 +31,9 @@ mtl {
 
   task1 {
     name = "En SRL arguments"
-    train = "dynet/en/srl/train.args"
-    dev = "dynet/en/srl/dev.args"
-    test = "dynet/en/srl/test-wsj.args"
+    train = "/data/nlp/corpora/processors-dynet/en/srl/train.args"
+    dev = "/data/nlp/corpora/processors-dynet/en/srl/dev.args"
+    test = "/data/nlp/corpora/processors-dynet/en/srl/test-wsj.args"
     type = "dual"
 
     layers {