From d86fccbc50b217dac7974cc24db5ca6ace193b0c Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Sep 2021 18:58:32 -0700 Subject: [PATCH 001/134] init code --- main/src/main/python/__init__.py | 0 main/src/main/python/pytorch/__init__.py | 0 main/src/main/python/pytorch/metal.py | 0 main/src/main/python/pytorch/taskManager.py | 213 ++++++++++++++++++ main/src/main/python/run.py | 25 ++ main/src/main/python/sequences/__init__.py | 0 .../src/main/python/sequences/columnReader.py | 48 ++++ 7 files changed, 286 insertions(+) create mode 100644 main/src/main/python/__init__.py create mode 100644 main/src/main/python/pytorch/__init__.py create mode 100644 main/src/main/python/pytorch/metal.py create mode 100644 main/src/main/python/pytorch/taskManager.py create mode 100644 main/src/main/python/run.py create mode 100644 main/src/main/python/sequences/__init__.py create mode 100644 main/src/main/python/sequences/columnReader.py diff --git a/main/src/main/python/__init__.py b/main/src/main/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/pytorch/__init__.py b/main/src/main/python/pytorch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py new file mode 100644 index 000000000..f9fede902 --- /dev/null +++ b/main/src/main/python/pytorch/taskManager.py @@ -0,0 +1,213 @@ +import random +import math +from sequences.columnReader import ColumnReader + +TYPE_BASIC = 0 +TYPE_DUAL = 1 + +class TaskManager: + + def __init__(self, config, seed): + + self.config = config + self.random = seed + + # How many shards to have per epoch + self.shardsPerEpoch = config.get_int("mtl.shardsPerEpoch", 10) + + # Total number of epochs + self.maxEpochs:Int = config.get_int("mtl.maxEpochs", 100) + + # Training patience in number of epochs + self.epochPatience:Int = config.get_int("mtl.epochPatience", 5) + + # Array of all tasks to be managed + self.tasks = self.readTasks() + + self.taskCount = len(self.tasks) + self.indices = range(self.taskCount) + + # Training shards from all tasks + self.shards = self.mkShards() + + # Construct training shards by interleaving shards from all tasks + def mkShards(self): + shardsByTasks = list() + + # construct the shards for each task + for i in self.indices: + shardsByTasks += [self.tasks[i].mkShards()] + assert(len(shardsByTasks[i]) == self.shardsPerEpoch) + + # now interleave the tasks + interleavedShards = list() + for i in range(self.shardsPerEpoch): + for j in self.indices: + crtShard = shardsByTasks[j][i] + interleavedShards += [crtShard] + + + # print ("All shards:") + # for(i <- interleavedShards.indices) + # print (s"${interleavedShards(i)}") + + + return interleavedShards + + # Iterator over all sentences coming from all interleaved shards + def getSentences(self): + return SentenceIterator(self.tasks, self.shards, self.random) + + # Reads all tasks from disk in memory + def readTasks(self): + numberOfTasks = self.config.get_int("mtl.numberOfTasks", None) + tasks = list() + for i in range(numberOfTasks): + tasks += [self.readTask(i + 1)] + + print (f"Read {numberOfTasks} tasks from config file.") + return tasks + + def readTask(self, taskNumber): + taskName = self.config.get_string(f"mtl.task{taskNumber}.name", None) + train = self.config.get_string(f"mtl.task{taskNumber}.train", None) + + dev = self.config.get_string(f"mtl.task{taskNumber}.dev", None) if f"mtl.task{taskNumber}.dev" in self.config else None + test = self.config.get_string(f"mtl.task{taskNumber}.test", None) if f"mtl.task{taskNumber}.test" in self.config else None + + taskType = self.parseType(self.config.get_string(f"mtl.task{taskNumber}.type", "basic")) + + weight = self.config.get_float(f"mtl.task{taskNumber}.weight", 1.0) + + return Task(taskNumber - 1, taskName, taskType, self.shardsPerEpoch, weight, train, dev, test) + + def parseType(self, inf): + if inf == "basic": return TYPE_BASIC + elif inf == "dual": return TYPE_DUAL + else: raise ValueError(f"ERROR: unknown task type {inf}!") + + def debugTraversal(self): + for epoch in range(self.maxEpochs): + print (f"Started epoch {epoch}") + sentCount = 0 + taskId = 0 + totalSents = 0 + for sentence in getSentences(): + totalSents += 1 + if(sentence[0] != taskId): + print (f"Read {sentCount} sentences from task {taskId}") + taskId = sentence[0] + sentCount = 1 + else: + sentCount += 1 + print (f"Read {sentCount} sentences from task {taskId}") + print (f"Read {totalSents} sentences in epoch {epoch}.") + +class SentenceIterator(object): + def __init__(tasks, shards, random): + + self.tasks = tasks + self.shards = shards + self.random = random #random seed + + # Offset in randomizedSentencePositions array + self.sentenceOffset = 0 + self.randomizedSentencePositions = randomizeSentences() + + class Sentence: + def __init__(self, taskId, sentencePosition): + self.taskId = taskId + self.sentencePosition = sentencePosition + + # Randomizes all sentences across all tasks + def randomizeSentences(): + # first, randomize the shards + random.seed(self.random) + randomizedShards = random.shuffle(self.shards) + randomizedSents = list() + for shard in randomizedShards: + # second, randomize the sentences inside each shard + sents = random.shuffle(list(range(shard.startPosition, shard.endPosition))) + for sent in sents: + # store the randomized sentences + randomizedSents += [Sentence(shard.taskId, sent)] + return randomizedSents + + def __len__(self): + return len(self.randomizedSentencePositions) + + def __iter__(self): + return self + + def hasNext(self): return self.sentenceOffset < len(self.randomizedSentencePositions) + + def __next__(self): + assert(self.sentenceOffset >= 0 and self.sentenceOffset < len(self.randomizedSentencePositions)) + + s = self.randomizedSentencePositions[sentenceOffset] + tid = s.taskId + sentence = self.tasks[tid].trainSentences[s.sentencePosition] + self.sentenceOffset += 1 + + #print ("shardPosition = $shardPosition, sentencePosition = $sentencePosition") + + return (tid, sentence) + +class Shard: + def __init__(self, taskId, startPosition, endPosition): + self.taskId = taskId + self.startPosition = startPosition + self.endPosition = endPosition + +class Task: + def __init__(self, + taskId, # this starts at 0 so we can use it as an index in the array of tasks + taskName:str, + taskType:int, + shardsPerEpoch:int, + taskWeight:float, + trainFileName:str, + devFileName:str = None, + testFileName:str = None): + self.taskId = taskId + taskNumber = taskId + 1 + print (f"Reading task {taskNumber} ({taskName})...") + self.trainSentences = ColumnReader.readColumns(trainFileName) + self.devSentences = ColumnReader.readColumns(devFileName) if devFileName else None + self.testSentences = ColumnReader.readColumns(testFileName) if testFileName else None + + self.isBasic:Boolean = taskType == TYPE_BASIC + self.isDual:Boolean = taskType == TYPE_DUAL + + if taskType == TYPE_BASIC: + self.prettyType = "basic" + elif taskType == TYPE_DUAL: + self.prettyType = "dual" + else: + self.prettyType = "unknown" + + # The size of the training shard for this task + self.shardSize = math.ceil(len(self.trainSentences) / shardsPerEpoch) + + # Current position in the training sentences when we iterate during training + currentTrainingSentencePosition = 0 + + print (f"============ starting task {taskNumber} ============") + print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.") + if(self.devSentences is not None): + print (f"Read {len(self.devSentences)} development sentences for task {taskNumber}.") + if(self.testSentences is not None): + print (f"Read {len(self.testSentences)} testing sentences for task {taskNumber}.") + print (f"Using taskWeight = {taskWeight}") + print (f"Task type = {self.prettyType}.") + print (f"============ completed task {taskNumber} ============") + + # Construct the shards from all training sentences in this task + def mkShards(self): + shards = list() + crtPos = 0 + while(crtPos < len(self.trainSentences)): + endPos = min(crtPos + self.shardSize, len(self.trainSentences)) + shards += [Shard(self.taskId, crtPos, endPos)] + crtPos = endPos + return shards diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py new file mode 100644 index 000000000..5d6ea1586 --- /dev/null +++ b/main/src/main/python/run.py @@ -0,0 +1,25 @@ +from pyhocon import ConfigFactory +import argparse +from pytorch.taskManager import TaskManager + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--train', action='store_true', help='Set the code to training purpose.') + parser.add_argument('--test', action='store_true', help='Set the code to testing purpose.') + parser.add_argument('--shell', action='store_true', help='Set the code to shell mode.') + parser.add_argument('--config', type=str, help='Filename of the configuration.') + parser.add_argument('--seed', type=int, default=1234) + args = parser.parse_args() + + if args.train: + config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') + taskManager = TaskManager(config, args.seed) + # modelName = args.model_file + # mtl = Metal(taskManager, parameters, None) + # mtl.train(modelName) + elif args.test: + pass + elif args.shell: + pass \ No newline at end of file diff --git a/main/src/main/python/sequences/__init__.py b/main/src/main/python/sequences/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py new file mode 100644 index 000000000..c38aa66c7 --- /dev/null +++ b/main/src/main/python/sequences/columnReader.py @@ -0,0 +1,48 @@ +#----------------------------------------------------------- +# Reads the CoNLL-like column format +#----------------------------------------------------------- +class ColumnReader: + + def readColumns(source): + if type(source) is str: + source = open(source) + sentence = list() + sentences = list() + for line in source: + print (line) + l = line.strip() + if (l is ""): + # end of sentence + if (sentence): + sentences += [sentence] + sentence = list() + else: + # within the same sentence + bits = l.split("\\s") + if (len(bits) < 2): + raise RuntimeError(f"ERROR: invalid line {l}!") + sentence += Row(bits) + + if (sentence): + sentences += [sentence] + + source.close() + return sentences + +# ----------------------------------------------------------- +# Stores training data for sequence modeling +# Mandatory columns: 0 - word, 1 - label +# Optional columns: 2 - POS tag, 3+ SRL arguments +# @param tokens +# ----------------------------------------------------------- + +class Row: + + def __init__(self, tokens): + self.tokens = tokens + self.length = len(tokens) + + def get(self, idx): + if(idx >= self.length): + raise RuntimeError(f"ERROR: trying to read field #{idx}, which does not exist in this row: {tokens}!") + return tokens[idx] From 07c4142ebc8a804a09ceb207e4d1070eaee620c2 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Sep 2021 19:12:17 -0700 Subject: [PATCH 002/134] Update columnReader.py --- main/src/main/python/sequences/columnReader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py index c38aa66c7..0f8c04610 100644 --- a/main/src/main/python/sequences/columnReader.py +++ b/main/src/main/python/sequences/columnReader.py @@ -9,7 +9,6 @@ def readColumns(source): sentence = list() sentences = list() for line in source: - print (line) l = line.strip() if (l is ""): # end of sentence @@ -18,10 +17,10 @@ def readColumns(source): sentence = list() else: # within the same sentence - bits = l.split("\\s") + bits = l.split("\t") if (len(bits) < 2): raise RuntimeError(f"ERROR: invalid line {l}!") - sentence += Row(bits) + sentence += [Row(bits)] if (sentence): sentences += [sentence] From c9ec5b8d398232c61cf552eb479b615a2e348239 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 16 Sep 2021 12:14:25 -0700 Subject: [PATCH 003/134] refined the code and fixed few bugs --- main/src/main/python/pytorch/taskManager.py | 65 +++------------------ 1 file changed, 7 insertions(+), 58 deletions(-) diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index f9fede902..069600d93 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -46,17 +46,16 @@ def mkShards(self): crtShard = shardsByTasks[j][i] interleavedShards += [crtShard] - - # print ("All shards:") - # for(i <- interleavedShards.indices) - # print (s"${interleavedShards(i)}") - - return interleavedShards # Iterator over all sentences coming from all interleaved shards def getSentences(self): - return SentenceIterator(self.tasks, self.shards, self.random) + random.seed(self.random) + randomizedShards = random.sample(self.shards, len(self.shards)) + for shard in randomizedShards: + sents = random.sample(range(shard.startPosition, shard.endPosition), shard.endPosition-shard.startPosition) + for sent in sents: + yield (shard.taskId, self.tasks[shard.taskId].trainSentences[sent]) # Reads all tasks from disk in memory def readTasks(self): @@ -92,7 +91,7 @@ def debugTraversal(self): sentCount = 0 taskId = 0 totalSents = 0 - for sentence in getSentences(): + for sentence in self.getSentences(): totalSents += 1 if(sentence[0] != taskId): print (f"Read {sentCount} sentences from task {taskId}") @@ -103,56 +102,6 @@ def debugTraversal(self): print (f"Read {sentCount} sentences from task {taskId}") print (f"Read {totalSents} sentences in epoch {epoch}.") -class SentenceIterator(object): - def __init__(tasks, shards, random): - - self.tasks = tasks - self.shards = shards - self.random = random #random seed - - # Offset in randomizedSentencePositions array - self.sentenceOffset = 0 - self.randomizedSentencePositions = randomizeSentences() - - class Sentence: - def __init__(self, taskId, sentencePosition): - self.taskId = taskId - self.sentencePosition = sentencePosition - - # Randomizes all sentences across all tasks - def randomizeSentences(): - # first, randomize the shards - random.seed(self.random) - randomizedShards = random.shuffle(self.shards) - randomizedSents = list() - for shard in randomizedShards: - # second, randomize the sentences inside each shard - sents = random.shuffle(list(range(shard.startPosition, shard.endPosition))) - for sent in sents: - # store the randomized sentences - randomizedSents += [Sentence(shard.taskId, sent)] - return randomizedSents - - def __len__(self): - return len(self.randomizedSentencePositions) - - def __iter__(self): - return self - - def hasNext(self): return self.sentenceOffset < len(self.randomizedSentencePositions) - - def __next__(self): - assert(self.sentenceOffset >= 0 and self.sentenceOffset < len(self.randomizedSentencePositions)) - - s = self.randomizedSentencePositions[sentenceOffset] - tid = s.taskId - sentence = self.tasks[tid].trainSentences[s.sentencePosition] - self.sentenceOffset += 1 - - #print ("shardPosition = $shardPosition, sentencePosition = $sentencePosition") - - return (tid, sentence) - class Shard: def __init__(self, taskId, startPosition, endPosition): self.taskId = taskId From 9311763f54504602b6813fe10e6c0ae9bf5ef19a Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 20 Sep 2021 12:34:23 -0700 Subject: [PATCH 004/134] initial code for metal --- main/src/main/python/pytorch/metal.py | 58 ++++++++++++ main/src/main/python/pytorch/taskManager.py | 9 +- main/src/main/python/pytorch/utils.py | 22 +++++ main/src/main/python/sequences/rowReaders.py | 96 ++++++++++++++++++++ 4 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 main/src/main/python/pytorch/utils.py create mode 100644 main/src/main/python/sequences/rowReaders.py diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index e69de29bb..d9f13a02e 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -0,0 +1,58 @@ +from utils import Utils +from collections import Counter +from sequences.rowReader import * + +class Metal(): + """docstring for Metal""" + def __init__(self, taskManager, parameters, modelOpt): + # One Layers object per task; model(0) contains the Layers shared between all tasks (if any) + if modelOpt: + self.model = modelOpt + else: + self.model = self.initialize() + self.taskManager = taskManager + + def initialize(self): + + taskWords, taskLabels = mkVocabularies() + + layersPerTask = [None for _ in range(taskManager.taskCount + 1)] + + layersPerTask[0] = Layers(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None) + + inputSize = layersPerTask[0].outDim + + for i in taskManager.indices: + layersPerTask[i+1] = Layers(taskManager, s"mtl.task${i + 1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize) + + for i in range(len(layersPerTask)): + print (s"Summary of layersPerTask({i}):") + print (layersPerTask[i]) + + return layersPerTask + + def mkVocabularies(self): + # index 0 reserved for the shared Layers; tid + 1 corresponds to each task + labels = [Counter() for _ in range(taskManager.taskCount + 1)] + for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer + labels[i][Utils.START_TAG] += 1 + labels[i][Utils.STOP_TAG] += 1 + + words = [Counter() for _ in range(taskManager.taskCount + 1)] + + reader = MetalRowReader() + + for tid in taskManager.indices: + for sentence in taskManager.tasks[tid].trainSentences + annotatedSentences = reader.toAnnotatedSentences(sentence) + + for asent in annotatedSentences: + annotatedSentence = asent[0] + sentenceLabels = asent[1] + for i in annotatedSentence.indices: + words[tid + 1][annotatedSentence.words[i]] += 1 + words[0][annotatedSentence.words[i]] += 1 + labels[tid + 1][sentenceLabels[i]] += 1 + + return words, labels + diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index 069600d93..ef4c18bbf 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -1,6 +1,7 @@ import random import math from sequences.columnReader import ColumnReader +from dataclasses import dataclass TYPE_BASIC = 0 TYPE_DUAL = 1 @@ -102,11 +103,11 @@ def debugTraversal(self): print (f"Read {sentCount} sentences from task {taskId}") print (f"Read {totalSents} sentences in epoch {epoch}.") +@dataclass class Shard: - def __init__(self, taskId, startPosition, endPosition): - self.taskId = taskId - self.startPosition = startPosition - self.endPosition = endPosition + taskId: int + startPosition: int + endPosition: int class Task: def __init__(self, diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py new file mode 100644 index 000000000..bbe4bfb93 --- /dev/null +++ b/main/src/main/python/pytorch/utils.py @@ -0,0 +1,22 @@ + +class Utils: + def __init__(self): + self.concatenateCount = 0 + + self.UNK_WORD = "" + self.EOS_WORD = "" + + self.UNK_EMBEDDING = 0 + + self.START_TAG = "" + self.STOP_TAG = "" + + self.RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data + self.WEIGHT_DECAY = 1e-5 + + self.LOG_MIN_VALUE = -10000.0 + + self.DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by default + + self.IS_DYNET_INITIALIZED = False + diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py new file mode 100644 index 000000000..c11c39865 --- /dev/null +++ b/main/src/main/python/sequences/rowReaders.py @@ -0,0 +1,96 @@ + +class AnnotatedSentence: + + def __init__(self, words, posTags = None, neTags = None, headPositions = None): + self.words = words + self.posTags = posTags + self.neTags = neTags + self.headPositions = headPositions + self.size = len(words) + self.indicies = range(self.size) + +class RowReader(object): + + def __init__(self): + raise NotImplementedError + + def toAnnotatedSentences(self, rows): + raise NotImplementedError + +class MetalRowReader(RowReader): + + def __init__(self): + self.WORD_POSITION = 0 + self.POS_TAG_POSITION = 1 + self.NE_LABEL_POSITION = 2 + self.LABEL_START_OFFSET = 3 + + def toAnnotatedSentences(self, rows): + if (len(rows.head) == 2): + self.parseSimple(rows) + elif (len(rows.head) == 4): + self.parseSimpleExtended(rows) + elif (len(rows.head) >= 5): + self.parseFull(rows) + else: + raise RuntimeError("ERROR: the Metal format expects 2, 4, or 5+ columns!") + + # Parser for the simple format: word, label + def parseSimple(rows): + assert(len(rows.head) == 2) + words = list() + labels = list() + + for row in rows: + words += [row.get(self.WORD_POSITION)] + labels += [row.get(self.WORD_POSITION + 1)] + + return AnnotatedSentence(words), labels + + # Parser for the simple extended format: word, POS tag, NE label, label + def parseSimpleExtended(rows): + assert(len(rows.head) == 4) + words = list() + posTags = list() + neLabels = list() + labels = list() + + for row in rows: + words += [row.get(self.WORD_POSITION)] + posTags += [row.get(self.POS_TAG_POSITION)] + neLabels += [row.get(self.NE_LABEL_POSITION)] + labels += [row.get(self.LABEL_START_OFFSET)] + + return AnnotatedSentence(words), posTags, neLabels, labels + + # Parser for the full format: word, POS tag, NE label, (label head)+ + def parseFull(rows: IndexedSeq[Row]): + assert(len(rows.head) >= 5) + numSent = (len(rows.head) - 3) / 2 + assert(numSent >= 1) + + words = list() + posTags = list() + neLabels = list() + headPositions = [list() for i in range(numSent)] + labels = [list() for i in range(numSent)] + + for row in rows: + words += [row.get(self.WORD_POSITION)] + posTags += [row.get(self.POS_TAG_POSITION)] + neLabels += [row.get(self.NE_LABEL_POSITION)] + + for j in range(numSent): + labels[j]+= [row.get(self.LABEL_START_OFFSET + (j * 2))] + try: + headPositions[j] += [int(row.get(self.LABEL_START_OFFSET + (j * 2) + 1))] + except: + raise RuntimeError # not sure about this part + + sentences = list() + for i in range(numSent): + annotatedSent = AnnotatedSentence(words, posTags, neLabels, headPositions[i]) + sentLabels = labels[i] + sentences += [(annotatedSent, sentLabels)] + + return sentences From a33fe369f162409247393276bde85ce11469c592 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 20 Sep 2021 20:21:00 -0700 Subject: [PATCH 005/134] refine metal, added layers(partial) --- main/src/main/python/pytorch/layers.py | 151 +++++++++++++++++++++++++ main/src/main/python/pytorch/metal.py | 20 ++-- main/src/main/python/pytorch/utils.py | 33 +++--- 3 files changed, 181 insertions(+), 23 deletions(-) create mode 100644 main/src/main/python/pytorch/layers.py diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py new file mode 100644 index 000000000..c33f3b3e1 --- /dev/null +++ b/main/src/main/python/pytorch/layers.py @@ -0,0 +1,151 @@ +import torch.nn as nn +from utils import * + +class Layers(nn.Module): + def __init__(self, initialLayer, intermediateLayers, finalLayer): + super().__init__() + if finalLayer: + self.outDim = finalLayer.outDim + elif intermediateLayers: + self.outDim = intermediateLayers[-1].outDim + elif initialLayer: + self.outDim = initialLayer.outDim + else: + self.outDim = None + + if initialLayer and intermediateLayers and finalLayer: + self.nonEmpty = True + self.isEmpty = not self.nonEmpty + + self.initialLayer = initialLayer + self.intermediateLayers = intermediateLayers + self.finalLayer = finalLayer + + def __str__(self): + s = "" + started = False + if(initialLayer.nonEmpty): + s += "initial = " + initialLayer + started = True + for i in intermediateLayers.indices: + if(started) s += " " + s += s"intermediate ({i+1}) = " + intermediateLayers[i] + started = True + if(finalLayer.nonEmpty): + if(started) s += " " + s += "final = " + finalLayer + return s + + def forward(self, sentence, constEmnbeddings, doDropout): + if self.initialLayer.isEmpty: + raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!") + states = self.initialLayer(sentence, constEmnbeddings, doDropout) + for intermediateLayer in self.intermediateLayers: + states = intermediateLayer(states, doDropout) + if self.finalLayer.nonEmpty: + states = self.finalLayer(states, sentence.headPositions, doDropout) + + return states + + def forwardFrom(self, inStates, headPositions, doDropout): + if self.initialLayer.nonEmpty: + raise RuntimeError(f"ERROR: you can't call forwardFrom() on a Layers object that has an initial layer: {self}") + states = inStates + for intermediateLayer in self.intermediateLayers: + states = intermediateLayer(states, doDropout) + if self.finalLayer.nonEmpty: + states = self.finalLayer(states, sentence.headPositions, doDropout) + + return states + + def saveX2i(self): + x2i = dict() + if self.initialLayer.nonEmpty: + x2i['hasInitial'] = 1 + x2i['initialLayer'] = self.initialLayer.saveX2i() + else: + x2i['hasInitial'] = 0 + x2i['intermediateCount'] = len(intermediateLayers) + for il in self.intermediateLayers: + il.saveX2i() + if self.finalLayer.nonEmpty: + x2i['hasFinal'] = 1 + x2i['finalLayer'] = self.finalLayer.saveX2i() + else: + x2i['finalLayer'] = 0 + + return x2i + + @classmethod + def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDual, providedInputSize): + initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", parameters, wordCounter) + + if(initialLayer): + inputSize = initialLayer.outDim + elif(providedInputSize): + inputSize = providedInputSize + else: + inputSize = None + + intermediateLayers = list() + done = False + MAX_INTERMEDIATE_LAYERS = 10 + + for i in range(1, MAX_INTERMEDIATE_LAYERS): + if done: + break + if inputSize is None: + raise RuntimeError("ERROR: trying to construct an intermediate layer without a known input size!") + + intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", parameters, inputSize) + + if intermediateLayer: + intermediateLayers.append(intermediateLayer) + inputSize = intermediateLayer.outDim + else: + done = True + + if labelCounter: + if inputSize is None: + raise RuntimeError("ERROR: trying to construct a final layer without a known input size!") + else: + finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", parameters, labelCounter, isDual, inputSize) + else: + finalLayer = None + + return cls(initialLayer, intermediateLayers, finalLayer) + + @classmethod + def loadX2i(cls, models, x2i): + hasInitial = x2i['hasInitial'] + initialLayer = EmbeddingLayer.load(models, x2i) if hasInitial == 1 else None + + intermediateLayers = list() + intermediateCount = x2i['intermediateCount'] + for _ in range(intermediateCount): + il = RnnLayer.load(models, x2i) + intermediateLayers.append(il) + + hasFinal = x2i['hasFinal'] + finalLayer = ForwardLayer.load(models, x2i) if hasFinal == 1 else none + + return cls(initialLayer, intermediateLayers, finalLayer) + + def predictJointly(layers, sentence, constEmnbeddings): + TODO + def forwardForTask(layers, taskId, sentence, constEmnbeddings, doDropout): + TODO + def predict(layers, taskId, sentence, constEmnbeddings): + TODO + def predictWithScores(layers, taskId, sentence, constEmnbeddings): + TODO + def parse(layers, sentence, constEmnbeddings): + TODO + def loss(layers, taskId, sentence, goldLabels): + TODO + + + + + + diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index d9f13a02e..23f3f2b16 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -1,4 +1,4 @@ -from utils import Utils +from utils import * from collections import Counter from sequences.rowReader import * @@ -18,15 +18,15 @@ def initialize(self): layersPerTask = [None for _ in range(taskManager.taskCount + 1)] - layersPerTask[0] = Layers(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None) + layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None) inputSize = layersPerTask[0].outDim for i in taskManager.indices: - layersPerTask[i+1] = Layers(taskManager, s"mtl.task${i + 1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize) + layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize) for i in range(len(layersPerTask)): - print (s"Summary of layersPerTask({i}):") + print (f"Summary of layersPerTask({i}):") print (layersPerTask[i]) return layersPerTask @@ -35,23 +35,23 @@ def mkVocabularies(self): # index 0 reserved for the shared Layers; tid + 1 corresponds to each task labels = [Counter() for _ in range(taskManager.taskCount + 1)] for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer - labels[i][Utils.START_TAG] += 1 - labels[i][Utils.STOP_TAG] += 1 + labels[i][START_TAG] += 1 + labels[i][STOP_TAG] += 1 words = [Counter() for _ in range(taskManager.taskCount + 1)] reader = MetalRowReader() for tid in taskManager.indices: - for sentence in taskManager.tasks[tid].trainSentences + for sentence in taskManager.tasks[tid].trainSentences: annotatedSentences = reader.toAnnotatedSentences(sentence) for asent in annotatedSentences: annotatedSentence = asent[0] sentenceLabels = asent[1] - for i in annotatedSentence.indices: - words[tid + 1][annotatedSentence.words[i]] += 1 - words[0][annotatedSentence.words[i]] += 1 + for i, word in enumerate(annotatedSentence.words): + words[tid + 1][word] += 1 + words[0][word] += 1 labels[tid + 1][sentenceLabels[i]] += 1 return words, labels diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index bbe4bfb93..907ba8fd3 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -1,22 +1,29 @@ -class Utils: - def __init__(self): - self.concatenateCount = 0 +concatenateCount = 0 - self.UNK_WORD = "" - self.EOS_WORD = "" +UNK_WORD = "" +EOS_WORD = "" - self.UNK_EMBEDDING = 0 +UNK_EMBEDDING = 0 - self.START_TAG = "" - self.STOP_TAG = "" +START_TAG = "" +STOP_TAG = "" - self.RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data - self.WEIGHT_DECAY = 1e-5 +RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data +WEIGHT_DECAY = 1e-5 + +LOG_MIN_VALUE = -10000.0 + +DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by default + +IS_DYNET_INITIALIZED = False + +def save(file, values, comment): + file.write("# " + comment + "\n") + for key, value in values.items(): + file.write(f"{key}\t{value}\n") + file.write("\n") - self.LOG_MIN_VALUE = -10000.0 - self.DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by default - self.IS_DYNET_INITIALIZED = False From 822f6c2ef97895568ae0839ae7cb156d99bf070e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 21 Sep 2021 02:23:29 -0700 Subject: [PATCH 006/134] fixed some bugs, init code for embeddings --- .../python/pytorch/constEmbeddingsGlove.py | 7 + .../src/main/python/pytorch/embeddingLayer.py | 336 ++++++++++++++++++ main/src/main/python/pytorch/initialLayer.py | 11 + main/src/main/python/pytorch/layers.py | 22 +- main/src/main/python/pytorch/metal.py | 10 +- main/src/main/python/pytorch/taskManager.py | 4 +- main/src/main/python/pytorch/utils.py | 5 +- main/src/main/python/run.py | 7 +- main/src/main/python/sequences/rowReaders.py | 2 +- 9 files changed, 383 insertions(+), 21 deletions(-) create mode 100644 main/src/main/python/pytorch/constEmbeddingsGlove.py create mode 100644 main/src/main/python/pytorch/embeddingLayer.py create mode 100644 main/src/main/python/pytorch/initialLayer.py diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py new file mode 100644 index 000000000..d6129393d --- /dev/null +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass +import torch.nn as nn + +@dataclass +class ConstEmbeddingParameters: + emb: nn.Embedding.from_pretrianed("....") + w2i: dict \ No newline at end of file diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py new file mode 100644 index 000000000..386d581cf --- /dev/null +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -0,0 +1,336 @@ +from initialLayer import InitialLayer +import random +from utils import * + +DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY +DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128 +DEFAULT_CHAR_EMBEDDING_SIZE: int = 32 +DEFAULT_CHAR_RNN_STATE_SIZE: int = 16 +DEFAULT_POS_TAG_EMBEDDING_SIZE: int = -1 # no POS tag embeddings by default +DEFAULT_NE_TAG_EMBEDDING_SIZE: int = -1 # no NE tag embeddings by default +DEFAULT_DISTANCE_EMBEDDING_SIZE: int = -1 # no distance embeddings by default +DEFAULT_POSITION_EMBEDDING_SIZE: int = -1 # no position embeddings by default +DEFAULT_DISTANCE_WINDOW_SIZE: int = -1 +DEFAULT_USE_IS_PREDICATE: int = -1 + +class EmbeddingLayer(InitialLayer): + def __init__(w2i, # word to index + w2f, # word to frequency + c2i, # character to index + tag2i, # POS tag to index + ne2i, # NE tag to index + learnedWordEmbeddingSize, # size of the learned word embedding + charEmbeddingSize, # size of the character embedding + charRnnStateSize, # size of each one of the char-level RNNs + posTagEmbeddingSize, # size of the POS tag embedding + neTagEmbeddingSize, # size of the NE tag embedding + distanceEmbeddingSize, + distanceWindowSize, # window considered for distance values (relative to predicate) + positionEmbeddingSize, + useIsPredicate, # if true, add a Boolean bit to indicate if current word is the predicate + wordLookupParameters, + charLookupParameters, + charRnnBuilder, # RNNs for the character representation + posTagLookupParameters, + neTagLookupParameters, + distanceLookupParameters, + positionLookupParameters, + dropoutProb): + super().__init__() + self.w2i = w2i + self.w2f = w2f + self.c2i = c2i + self.tag2i = tag2i + self.ne2i = ne2i + self.learnedWordEmbeddingSize = learnedWordEmbeddingSize + self.charEmbeddingSize = charEmbeddingSize + self.charRnnStateSize = charRnnStateSize + self.posTagEmbeddingSize = posTagEmbeddingSize + self.neTagEmbeddingSize = neTagEmbeddingSize + self.distanceEmbeddingSize = distanceEmbeddingSize + self.distanceWindowSize = distanceWindowSize + self.positionEmbeddingSize = positionEmbeddingSize + self.useIsPredicate = useIsPredicate + self.wordLookupParameters = wordLookupParameters + self.charLookupParameters = charLookupParameters + self.charRnnBuilder = charRnnBuilder + self.posTagLookupParameters = posTagLookupParameters + self.neTagLookupParameters = neTagLookupParameters + self.distanceLookupParameters = distanceLookupParameters + self.positionLookupParameters = positionLookupParameters + self.dropoutProb = dropoutProb + + posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0 + neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0 + distanceDim = distanceWindowSize if distanceLookupParameters else 0 + positionDim = 1 if distanceLookupParameters and useIsPredicate else 0 + predicateDim = positionEmbeddingSize if positionLookupParameters else 0 + + self.outDim = TODO:ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim + random.seed(RANDOM_SEED) + + def forward(self, sentence, constEmbeddings, doDropout): + + words = sentence.words + tags = sentence.posTags + nes = sentence.neTags + headPositions = sentence.headPositions + + # const word embeddings such as GloVe + constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings) + assert(constEmbeddingsExpressions.size(0) == len(words)) + if(tags) assert(len(tags) == len(words)) + if(nes) assert(len(nes) == len(words)) + if(headPositions) assert(len(headPositions) == len(words)) + + # build the word embeddings one by one + embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions) + + return embeddings + + def mkConstEmbeddings(self, words, constEmbeddings): + idxs = [constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words] + embeddings = self.constEmbeddings.emb(idxs) + return embeddings + + def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPositions=None): + # + # Learned word embeddings + # These are initialized randomly, and updated during backprop + # + ids = [] + wordPositions = [] + for i, word in enumerate(words): + wordPositions.append(i) + id = self.w2i.get(word, 0) # 0 reserved for UNK in the vocab + # sample uniformly with prob 0.5 from singletons; move all other singletons to UNK + if(self.doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0 + ids.append(id) + learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids)) + + # + # biLSTM over character embeddings + # + TODO: charEmbedding = mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder) + + # + # POS tag embedding + # + if tags: + posTagEmbed = self.posTagLookupParameters(torch.LongTensor([self.tag2i.get(tag, 0) for tag in tags])) + else: + posTagEmbed = None + # + # NE tag embedding + # + if nes: + neTagEmbed = self.neTagLookupParameters(torch.LongTensor([self.ne2i.get(ne, 0) for ne in nes])) + else: + neTagEmbed = None + # + # 1 if this word is the predicate + # + if headPositions and self.useIsPredicate: + predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) + else: + predEmbed = None + + # + # Distance embedding, relative to the distance to the predicate + # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize] + # + if headPositions and self.distanceLookupParameters: + dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)] + for i in range(dists): + if dists[i] < -self.distanceWindowSize: + dists[i] = self.distanceWindowSize-1 + if dists[i] > self.distanceWindowSize: + dist[i] = self.distanceWindowSize+1 + distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists)) + else: + distanceEmbedding = None + + # + # Embedding that captures the absolute position of the token in the sentence + # + if self.positionLookupParameters: + values = [i if i<100 else 100 for i, word in enumerate(words)] + positionEmbedding = self.positionLookupParameters(torch.LongTensor(values)) + else: + positionEmbedding = None + + # The final word embedding is a concatenation of all these + embedParts = [constEmbeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + embedParts = [ep for ep in embedParts if ep is not None] + embed = torch.cat(embedParts, dim=1) + return embed + + def saveX2i(self): + x2i = dict() + x2i['w2i'] = self.w2i + x2i['w2f'] = self.w2f + x2i['c2i'] = self.c2i + if self.tag2i: + x2i['hasTag2i'] = 1 + x2i['tag2i'] = self.tag2i + else: + x2i['hasTag2i'] = 0 + if self.ne2i: + x2i['hasNe2i'] = 1 + x2i['ne2i'] = self.ne2i + else: + x2i['hasNe2i'] = 0 + x2i['learnedWordEmbeddingSize'] = self.learnedWordEmbeddingSize + x2i['charEmbeddingSize'] = self.charEmbeddingSize + x2i['charRnnStateSize'] = self.charRnnStateSize + x2i['posTagEmbeddingSize'] = self.posTagEmbeddingSize + x2i['neTagEmbeddingSize'] = self.neTagEmbeddingSize + x2i['distanceEmbeddingSize'] = self.distanceEmbeddingSize + x2i['distanceWindowSize'] = self.distanceWindowSize + x2i['useIsPredicate'] = 1 if self.useIsPredicate else 0 + x2i['positionEmbeddingSize'] = self.positionEmbeddingSize + x2i['dropoutProb'] = self.dropoutProb + + return x2i + + def __str__(self): + return f"EmbeddingLayer({self.outDim})" + + @classmethod + def load(cls, x2i): + w2i = x2i['w2i'] + w2f = x2i['w2f'] + c2i = x2i['c2i'] + tag2i = x2i['tag2i'] if x2i['hasTag2i'] == 1 else None + ne2i = x2i['ne2i'] if x2i['hasNe2i'] == 1 else None + + learnedWordEmbeddingSize = x2i.get('learnedWordEmbeddingSize', DEFAULT_LEARNED_WORD_EMBEDDING_SIZE) + charEmbeddingSize = x2i.get('charEmbeddingSize', DEFAULT_CHAR_EMBEDDING_SIZE) + charRnnStateSize = x2i.get('charRnnStateSize', DEFAULT_CHAR_RNN_STATE_SIZE) + posTagEmbeddingSize = x2i.get('posTagEmbeddingSize', DEFAULT_POS_TAG_EMBEDDING_SIZE) + neTagEmbeddingSize = x2i.get('neTagEmbeddingSize', DEFAULT_NE_TAG_EMBEDDING_SIZE) + distanceEmbeddingSize = x2i.get('distanceEmbeddingSize', DEFAULT_DISTANCE_EMBEDDING_SIZE) + distanceWindowSize = x2i.get('distanceWindowSize', DEFAULT_DISTANCE_WINDOW_SIZE) + useIsPredicate = x2i.get('useIsPredicate', DEFAULT_USE_IS_PREDICATE) == 1 + positionEmbeddingSize = x2i.get('positionEmbeddingSize', DEFAULT_POSITION_EMBEDDING_SIZE) + dropoutProb = x2i.get('dropoutProb', DEFAULT_DROPOUT_PROB) + + # make the loadable parameters + wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) + charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) + + #????? The following line would normally provoke construction of the initial ComputationGraph + #????? and do that outside of a synchronized area. This is avoided by ensuring that construction + #????? happens in Utils.initializeDyNet instead, just to be safe. + charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + + posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None + neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None + distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None + positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None + + return cls(w2i, w2f, c2i, tag2i, ne2i, + learnedWordEmbeddingSize, + charEmbeddingSize, + charRnnStateSize, + posTagEmbeddingSize, + neTagEmbeddingSize, + distanceEmbeddingSize, + distanceWindowSize, + positionEmbeddingSize, + useIsPredicate, + wordLookupParameters, + charLookupParameters, + charRnnBuilder, + posTagLookupParameters, + neTagLookupParameters, + distanceLookupParameters, + positionLookupParameters, + dropoutProb) + + @classmethod + def initialize(cls, config, paramPrefix, wordCounter): + + if(not config.__contains__(paramPrefix)): + return None + + learnedWordEmbeddingSize = config.get_int(paramPrefix + ".learnedWordEmbeddingSize",DEFAULT_LEARNED_WORD_EMBEDDING_SIZE) + charEmbeddingSize = config.get_int(paramPrefix + ".charEmbeddingSize",DEFAULT_CHAR_EMBEDDING_SIZE) + charRnnStateSize = config.get_int(paramPrefix + ".charRnnStateSize",DEFAULT_CHAR_RNN_STATE_SIZE) + posTagEmbeddingSize = config.get_int(paramPrefix + ".posTagEmbeddingSize",DEFAULT_POS_TAG_EMBEDDING_SIZE) + neTagEmbeddingSize = config.get_int(paramPrefix + ".neTagEmbeddingSize",DEFAULT_NE_TAG_EMBEDDING_SIZE) + distanceEmbeddingSize = config.get_int(paramPrefix + ".distanceEmbeddingSize",DEFAULT_DISTANCE_EMBEDDING_SIZE) + distanceWindowSize = config.get_int(paramPrefix + ".distanceWindowSize",DEFAULT_DISTANCE_WINDOW_SIZE) + useIsPredicate = config.getArgBoolean(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1) + positionEmbeddingSize = config.get_int(paramPrefix + ".positionEmbeddingSize",DEFAULT_POSITION_EMBEDDING_SIZE) + dropoutProb = config.get_float(paramPrefix + ".dropoutProb",EmbeddingLayer.DEFAULT_DROPOUT_PROB) + + wordList = [UNK_WORD] + sorted(wordCounter.keys()) + w2i = {w:i for i, w in enumerate(wordList)} + + wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) + + c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt") + c2i = TODO + + charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) + charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + + if(posTagEmbeddingSize > 0): + tag2i = TODO + posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) + else: + tag2i = None + posTagLookupParameters = None + + if(neTagEmbeddingSize > 0): + ne2i = TODO + neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize) + else: + ne2i = None + neTagLookupParameters = None + + distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None + positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None + + return cls(w2i, w2f, c2i, tag2i, ne2i, + learnedWordEmbeddingSize, + charEmbeddingSize, + charRnnStateSize, + posTagEmbeddingSize, + neTagEmbeddingSize, + distanceEmbeddingSize, + distanceWindowSize, + positionEmbeddingSize, + useIsPredicate, + wordLookupParameters, + charLookupParameters, + charRnnBuilder, + posTagLookupParameters, + neTagLookupParameters, + distanceLookupParameters, + positionLookupParameters, + dropoutProb) + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/src/main/python/pytorch/initialLayer.py b/main/src/main/python/pytorch/initialLayer.py new file mode 100644 index 000000000..39db90d28 --- /dev/null +++ b/main/src/main/python/pytorch/initialLayer.py @@ -0,0 +1,11 @@ +import torch +import torch.nn as nn + +class InitialLayer(nn.Module): + + def __init__(self): + super().__init__() + self.outDim = None + + def forward(self, sentence, constEmbeddings, doDropout): + raise NotImplementedError \ No newline at end of file diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index c33f3b3e1..d5a3000f4 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -1,5 +1,6 @@ import torch.nn as nn from utils import * +from embeddingLayer import EmbeddingLayer class Layers(nn.Module): def __init__(self, initialLayer, intermediateLayers, finalLayer): @@ -66,8 +67,9 @@ def saveX2i(self): else: x2i['hasInitial'] = 0 x2i['intermediateCount'] = len(intermediateLayers) + x2i['intermediateLayers'] = list() for il in self.intermediateLayers: - il.saveX2i() + x2i['intermediateLayers'].append(il.saveX2i()) if self.finalLayer.nonEmpty: x2i['hasFinal'] = 1 x2i['finalLayer'] = self.finalLayer.saveX2i() @@ -77,8 +79,8 @@ def saveX2i(self): return x2i @classmethod - def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDual, providedInputSize): - initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", parameters, wordCounter) + def apply(cls, config, paramPrefix, wordCounter, labelCounter, isDual, providedInputSize): + initialLayer = EmbeddingLayer.initialize(config, paramPrefix + ".initial", wordCounter) if(initialLayer): inputSize = initialLayer.outDim @@ -97,7 +99,7 @@ def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDua if inputSize is None: raise RuntimeError("ERROR: trying to construct an intermediate layer without a known input size!") - intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", parameters, inputSize) + intermediateLayer = RnnLayer.initialize(config, paramPrefix + f".intermediate{i}", inputSize) if intermediateLayer: intermediateLayers.append(intermediateLayer) @@ -109,25 +111,25 @@ def apply(cls, config, paramPrefix, parameters, wordCounter, labelCounter, isDua if inputSize is None: raise RuntimeError("ERROR: trying to construct a final layer without a known input size!") else: - finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", parameters, labelCounter, isDual, inputSize) + finalLayer = ForwardLayer.initialize(config, paramPrefix + ".final", labelCounter, isDual, inputSize) else: finalLayer = None return cls(initialLayer, intermediateLayers, finalLayer) @classmethod - def loadX2i(cls, models, x2i): + def loadX2i(cls, x2i): hasInitial = x2i['hasInitial'] - initialLayer = EmbeddingLayer.load(models, x2i) if hasInitial == 1 else None + initialLayer = EmbeddingLayer.load(x2i['initialLayer']) if hasInitial == 1 else None intermediateLayers = list() intermediateCount = x2i['intermediateCount'] - for _ in range(intermediateCount): - il = RnnLayer.load(models, x2i) + for i in range(intermediateCount): + il = RnnLayer.load(x2i['intermediateLayers'][i]) intermediateLayers.append(il) hasFinal = x2i['hasFinal'] - finalLayer = ForwardLayer.load(models, x2i) if hasFinal == 1 else none + finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else none return cls(initialLayer, intermediateLayers, finalLayer) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 23f3f2b16..ace00e73e 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -1,10 +1,10 @@ -from utils import * +from pytorch.utils import * from collections import Counter -from sequences.rowReader import * +from sequences.rowReaders import * class Metal(): """docstring for Metal""" - def __init__(self, taskManager, parameters, modelOpt): + def __init__(self, taskManager, modelOpt): # One Layers object per task; model(0) contains the Layers shared between all tasks (if any) if modelOpt: self.model = modelOpt @@ -18,12 +18,12 @@ def initialize(self): layersPerTask = [None for _ in range(taskManager.taskCount + 1)] - layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", parameters, taskWords(0), None, isDual = false, providedInputSize = None) + layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", taskWords[0], None, False, None) inputSize = layersPerTask[0].outDim for i in taskManager.indices: - layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", parameters, taskWords(i + 1), Some(taskLabels(i + 1)), isDual = taskManager.tasks(i).isDual, inputSize) + layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], taskManager.tasks[i].isDual, inputSize) for i in range(len(layersPerTask)): print (f"Summary of layersPerTask({i}):") diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index ef4c18bbf..f5d1ae868 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -126,8 +126,8 @@ def __init__(self, self.devSentences = ColumnReader.readColumns(devFileName) if devFileName else None self.testSentences = ColumnReader.readColumns(testFileName) if testFileName else None - self.isBasic:Boolean = taskType == TYPE_BASIC - self.isDual:Boolean = taskType == TYPE_DUAL + self.isBasic:bool = taskType == TYPE_BASIC + self.isDual:bool = taskType == TYPE_DUAL if taskType == TYPE_BASIC: self.prettyType = "basic" diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 907ba8fd3..06a2902ff 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -9,7 +9,7 @@ START_TAG = "" STOP_TAG = "" -RANDOM_SEED = 2522620396L # used for both DyNet, and the JVM seed for shuffling data +RANDOM_SEED = 2522620396 # used for both DyNet, and the JVM seed for shuffling data WEIGHT_DECAY = 1e-5 LOG_MIN_VALUE = -10000.0 @@ -24,6 +24,9 @@ def save(file, values, comment): file.write(f"{key}\t{value}\n") file.write("\n") +def mkCharacterEmbedding(word, c2i, charLookupParameters, charFwRnnBuilder, charBwRnnBuilder): + TODO + diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index 5d6ea1586..f07a30832 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -1,6 +1,7 @@ from pyhocon import ConfigFactory import argparse from pytorch.taskManager import TaskManager +from pytorch.metal import Metal if __name__ == '__main__': @@ -16,8 +17,10 @@ if args.train: config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) - # modelName = args.model_file - # mtl = Metal(taskManager, parameters, None) + modelName = args.model_file + print (taskManager.debugTraversal()) + + # mtl = Metal(taskManager, None, None) # mtl.train(modelName) elif args.test: pass diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py index c11c39865..0aa409756 100644 --- a/main/src/main/python/sequences/rowReaders.py +++ b/main/src/main/python/sequences/rowReaders.py @@ -64,7 +64,7 @@ def parseSimpleExtended(rows): return AnnotatedSentence(words), posTags, neLabels, labels # Parser for the full format: word, POS tag, NE label, (label head)+ - def parseFull(rows: IndexedSeq[Row]): + def parseFull(rows): assert(len(rows.head) >= 5) numSent = (len(rows.head) - 3) / 2 assert(numSent >= 1) From 8ef31d2941db04340889be3a3e719207df7fbfae Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Fri, 24 Sep 2021 23:33:02 -0700 Subject: [PATCH 007/134] more implementation for embedding layer --- .../python/pytorch/constEmbeddingsGlove.py | 24 +++++++++++++++-- .../src/main/python/pytorch/embeddingLayer.py | 16 +++++------ main/src/main/python/pytorch/utils.py | 27 ++++++++++++++++--- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py index d6129393d..be32c2f39 100644 --- a/main/src/main/python/pytorch/constEmbeddingsGlove.py +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -1,7 +1,27 @@ from dataclasses import dataclass import torch.nn as nn +from embeddings.wordEmbeddingMap import * @dataclass class ConstEmbeddingParameters: - emb: nn.Embedding.from_pretrianed("....") - w2i: dict \ No newline at end of file + emb: nn.Embedding + w2i: dict + +def ConstEmbeddingsGlove: + def __init__(self): + self.SINGLETON_WORD_EMBEDDING_MAP = None + self.load('../resources/org/clulab/glove.conf') + self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim + + def load(self, config): + if self.SINGLETON_WORD_EMBEDDING_MAP is None: + self.SINGLETON_WORD_EMBEDDING_MAP = WordEmbeddingMap(config) + + def mkConstLookupParams(self, words): + w2i = dict() + for i,w in enumerate(words): + weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict[0]) + w2i[w] = i + emd = nn.Embedding.from_pretrained(weight) + emd.weight.requires_grad=False + return ConstEmbeddingParameters(emb ,w2i) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 386d581cf..6a4de8217 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -1,6 +1,8 @@ from initialLayer import InitialLayer import random from utils import * +import torch.nn as nn +import torch DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128 @@ -66,7 +68,7 @@ def __init__(w2i, # word to index positionDim = 1 if distanceLookupParameters and useIsPredicate else 0 predicateDim = positionEmbeddingSize if positionLookupParameters else 0 - self.outDim = TODO:ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim + self.outDim = ConstEmbeddingsGlove().dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim random.seed(RANDOM_SEED) def forward(self, sentence, constEmbeddings, doDropout): @@ -111,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition # # biLSTM over character embeddings # - TODO: charEmbedding = mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder) + charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder, self.charRnnStateSize) for word in words]) # # POS tag embedding @@ -219,9 +221,6 @@ def load(cls, x2i): wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) - #????? The following line would normally provoke construction of the initial ComputationGraph - #????? and do that outside of a synchronized area. This is avoided by ensuring that construction - #????? happens in Utils.initializeDyNet instead, just to be safe. charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None @@ -271,20 +270,21 @@ def initialize(cls, config, paramPrefix, wordCounter): wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt") - c2i = TODO + c2i = readChar2Ids(c2iFilename) charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) if(posTagEmbeddingSize > 0): - tag2i = TODO + + tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt")) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) else: tag2i = None posTagLookupParameters = None if(neTagEmbeddingSize > 0): - ne2i = TODO + ne2i = readString2Ids(config.get_string(paramPrefix + ".ne2i", "../resources/org/clulab/ne2i-en.txt")) neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize) else: ne2i = None diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 06a2902ff..8c1c45d70 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -1,3 +1,5 @@ +import torch.nn as nn +import torch concatenateCount = 0 @@ -24,9 +26,26 @@ def save(file, values, comment): file.write(f"{key}\t{value}\n") file.write("\n") -def mkCharacterEmbedding(word, c2i, charLookupParameters, charFwRnnBuilder, charBwRnnBuilder): - TODO - - +def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder, hidden_dim): + charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word])) + (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) + output, (result, c) = charRnnBuilder(charEmbeddings.view(len(word), 1, -1), (h, c)) + return result.view(1, hidden_dim*2) + +def readString2Ids(s2iFilename): + s2i = dict() + with open(s2iFilename) as f: + for line in f: + if not line.startswith("#"): + k, v = line.strip().split('\t') + s2i[k] = int(v) + +def readChar2Ids(s2iFilename): + s2i = dict() + with open(s2iFilename) as f: + for line in f: + if not line.startswith("#"): + k, v = line.strip().split('\t') + s2i[char(int(k))] = int(v) From ddcf223af45904ac7876368bcd9f8c32cfb5ebc2 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Sat, 25 Sep 2021 14:35:15 -0700 Subject: [PATCH 008/134] init code for rnnLayer also refined some functions in embedding layer --- main/src/main/python/embeddings/__init__.py | 0 .../python/embeddings/wordEmbeddingMap.py | 29 ++++++ .../src/main/python/pytorch/embeddingLayer.py | 2 +- .../main/python/pytorch/intermediateLayer.py | 12 +++ main/src/main/python/pytorch/rnnLayer.py | 91 +++++++++++++++++++ main/src/main/python/pytorch/utils.py | 30 +++++- main/src/main/python/run.py | 2 +- 7 files changed, 161 insertions(+), 5 deletions(-) create mode 100644 main/src/main/python/embeddings/__init__.py create mode 100644 main/src/main/python/embeddings/wordEmbeddingMap.py create mode 100644 main/src/main/python/pytorch/intermediateLayer.py create mode 100644 main/src/main/python/pytorch/rnnLayer.py diff --git a/main/src/main/python/embeddings/__init__.py b/main/src/main/python/embeddings/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py new file mode 100644 index 000000000..a82c2108a --- /dev/null +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -0,0 +1,29 @@ +import numpy as np + +class WordEmbeddingMap: + def __init__(self, config): + self.emb_dict = self.load(config) + self.dim = self.emb_dict.shape[-1] + + def load(self): + emb_matrix = None + emb_dict = dict() + for line in open(config.get_string("glove.matrixResourceName")): + if not len(line.split()) == 2: + if "\t" in line: + delimiter = "\t" + else: + delimiter = " " + line_split = line.rstrip().split(delimiter) + # extract word and vector + word = line_split[0] + x = np.array([float(i) for i in line_split[1:]]) + vector = (x /np.linalg.norm(x)) + embedding_size = vector.shape[0] + emb_dict[word] = vector + base = math.sqrt(6/embedding_size) + emb_dict[""] = np.random.uniform(-base,base,(embedding_size)) + return emb_dict + + def isOutOfVocabulary(self, word): + return word not in self.emb_dict \ No newline at end of file diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 6a4de8217..95b4ef894 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -113,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition # # biLSTM over character embeddings # - charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder, self.charRnnStateSize) for word in words]) + charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) # # POS tag embedding diff --git a/main/src/main/python/pytorch/intermediateLayer.py b/main/src/main/python/pytorch/intermediateLayer.py new file mode 100644 index 000000000..48ea53377 --- /dev/null +++ b/main/src/main/python/pytorch/intermediateLayer.py @@ -0,0 +1,12 @@ +import torch +import torch.nn as nn + +class IntermediateLayer(nn.Module): + + def __init__(self): + super().__init__() + self.inDim = None + self.outDim = None + + def forward(self, inputExpressions, doDropout): + raise NotImplementedError \ No newline at end of file diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py new file mode 100644 index 000000000..c5aef820a --- /dev/null +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -0,0 +1,91 @@ +from intermediateLayer import IntermediateLayer +from utils import * +import torch +import torch.nn as nn + +class RnnLayer(IntermediateLayer): + def __init__(self, + inputSize, + numLayers, + rnnStateSize, + useHighwayConnections, + rnnType, + wordRnnBuilder, + dropoutProb): + + self.inDim = self.inputSize = inputSize + self.numLayers = numLayers + self.rnnStateSize = rnnStateSize + self.useHighwayConnections = useHighwayConnections + self.rnnType = rnnType + self.wordRnnBuilder = wordRnnBuilder + self.dropoutProb = dropoutProb + + highwaySize = inputSize if useHighwayConnections else 0 + self.outDim = 2 * rnnStateSize + highwaySize + + def forward(self, inputExpressions, dropout): + + assert(inputExpressions is not None) + + States, _ = transduce(inputExpressions, self.wordRnnBuilder) + + if self.useHighwayConnections: + States = torch.cat([States, inputExpressions], dim=1) + + return States + + def saveX2i(self): + x2i = dict() + x2i['inputSize'] = self.inputSize + x2i['numLayers'] = self.numLayers + x2i['rnnStateSize'] = self.rnnStateSize + x2i['useHighwayConnections'] = 1 if useHighwayConnections else 0 + x2i['rnnType'] = self.rnnType + x2i['dropoutProb'] = self.dropoutProb + return x2i + + def __str__(self): + return f"RnnLayer({self.rnnType}, {self.inDim}, {self.outDim})" + + @classmethod + def load(cls, x2i): + inputSize = x2i['inputSize'] + numLayers = x2i['numLayers'] + rnnType = x2i.get('rnnType', 'lstm') + rnnStateSize = x2i['rnnStateSize'] + useHighwayConnections = x2i['useHighwayConnections'] == 1 + dropoutProb = x2i['dropoutProb'] + + builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) + + return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) + + @classmethod + def initialize(cls, config, paramPrefix, inputSize): + + if(not config.__contains__(paramPrefix)): + return None + + numLayers = config.get_int(paramPrefix + ".numLayers", 1) + rnnStateSize = config.get_int(paramPrefix + ".rnnStateSize", None) + useHighwayConnections = config.get_bool(paramPrefix + '.useHighwayConnections', False) + rnnType = config.get_string(paramPrefix + ".type", "lstm") + dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY) + + builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize) + + return (inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) + +def mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb): + if rnnType == 'gru': + return nn.GRU(inputSize, rnnStateSize, numLayers, bidirectional=True, dropout=dropoutProb) + elif rnnType == 'lstm': + return nn.LSTM(inputSize, rnnStateSize, numLayers, bidirectional=True, dropout=dropoutProb) + else: + raise RuntimeError(f'ERROR: unknown rnnType "{rnnType}"!') + + + + + diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 8c1c45d70..c808d5421 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -26,10 +26,10 @@ def save(file, values, comment): file.write(f"{key}\t{value}\n") file.write("\n") -def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder, hidden_dim): +def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): + hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word])) - (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) - output, (result, c) = charRnnBuilder(charEmbeddings.view(len(word), 1, -1), (h, c)) + _, result = transduce(charEmbeddings, charRnnBuilder, True) return result.view(1, hidden_dim*2) def readString2Ids(s2iFilename): @@ -48,4 +48,28 @@ def readChar2Ids(s2iFilename): k, v = line.strip().split('\t') s2i[char(int(k))] = int(v) +def transduce(embeddings, builder): + + hidden_dim = builder.hidden_size + bi_direct = builder.bidirectional + mode = build.mode + + if mode == 'LSTM': + if bi_direct: + (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) + output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) + else; + (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) + output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) + elif mode == 'GRU': + if bi_direct: + h = torch.zeros(2, 1, hidden_dim) + output, result = builder(embeddings.view(len(word), 1, -1), h) + else; + h = torch.zeros(1, 1, hidden_dim) + output, result = builder(embeddings.view(len(word), 1, -1), h) + + return output, result + + diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index f07a30832..c75532f0e 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -20,7 +20,7 @@ modelName = args.model_file print (taskManager.debugTraversal()) - # mtl = Metal(taskManager, None, None) + mtl = Metal(taskManager, None, None) # mtl.train(modelName) elif args.test: pass From 03229a42bb781fb9800e15c3943c24b2c48a22ff Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 29 Sep 2021 13:00:00 -0700 Subject: [PATCH 009/134] forward layer implementation --- .../src/main/python/pytorch/embeddingLayer.py | 2 +- main/src/main/python/pytorch/finalLayer.py | 21 +++ main/src/main/python/pytorch/forwardLayer.py | 173 ++++++++++++++++++ .../main/python/pytorch/greedyForwardLayer.py | 0 main/src/main/python/pytorch/utils.py | 21 +++ .../python/pytorch/viterbiForwardLayer.py | 0 6 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 main/src/main/python/pytorch/finalLayer.py create mode 100644 main/src/main/python/pytorch/forwardLayer.py create mode 100644 main/src/main/python/pytorch/greedyForwardLayer.py create mode 100644 main/src/main/python/pytorch/viterbiForwardLayer.py diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 95b4ef894..a506bfa02 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -113,7 +113,7 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition # # biLSTM over character embeddings # - charEmbedding = torch.cat([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) + charEmbedding = torch.stack([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) # # POS tag embedding diff --git a/main/src/main/python/pytorch/finalLayer.py b/main/src/main/python/pytorch/finalLayer.py new file mode 100644 index 000000000..0f2b63d87 --- /dev/null +++ b/main/src/main/python/pytorch/finalLayer.py @@ -0,0 +1,21 @@ +import torch +import torch.nn as nn + +class FinalLayer(nn.Module): + + def __init__(self): + super().__init__() + self.inDim = None + self.outDim = None + + def forward(self, inputExpressions, headPositionsOpt, doDropout): + raise NotImplementedError + + def loss(self, emissionScoresAsExpression, goldLabels): + raise NotImplementedError + + def inference(self, emissionScores): + raise NotImplementedError + + def inferenceWithScores(self, emissionScores): + raise NotImplementedError \ No newline at end of file diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py new file mode 100644 index 000000000..4b3ea489f --- /dev/null +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -0,0 +1,173 @@ +import torch +import torch.nn + +from finalLayer import FinalLayer +from greedyForwardLayer import GreedyForwardLayer +from viterbiForwardLayer import ViterbiForwardLayer + +from utils import * + +def ForwardLayer(FinalLayer): + def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): + self.inputSize = inputSize + self.isDual = isDual + self.t2i = t2i + self.i2t = i2t + self.spans = spans + self.nonlinearity = nonlinearity + + self.pH = nn.Linear(actualInputSize, len(t2i)) + self.pRoot = torch.rand(inputSize) #TODO: Not sure about the shape here + self.dropoutProb = dropoutProb + + self.inDim = spanLength(spans) if spans is not None else inputSize + self.outDim = len(t2i) + + + def pickSpan(self, v): + if self.spans is None: + return v + else: + # Zheng: Will spans overlap? + vs = list() + for span in self.spans: + e = torch.index_select(v, 0, torch.tensor([span[0], span[1]])) + vs.append(e) + return torch.cat(vs) + + def forward(inputExpressions, doDropout, headPositionsOpt = None): + emissionScores = list() + if not self.isDual: + # Zheng: Why the for loop here? Can we just use matrix manipulation? + for i, e in enumerate(inputExpressions): + argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) + l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) + if nonlinearity == NONLIN_TANH: + l1 = torch.tanh(l1) + elif nonlinearity == NONLIN_RELU: + l1 = torch.relu(l1) + emissionScores.append(l1) + else: + if headPositionsOpt is None: + raise RuntimeError("ERROR: dual task without information about head positions!") + for i, e in enumerate(inputExpressions): + headPosition = headPositionsOpt[i] + argExp = expressionDropout(pickSpan(e), self.dropoutProb, doDropout) + if headPosition >= 0: + # there is an explicit head in the sentence + predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout) + else: + # the head is root. we used a dedicated Parameter for root + # Zheng: Why not add root node to the input sequence at the beginning? + predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout) + ss = torch.cat([argExp, predExp]) + l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) + if nonlinearity == NONLIN_TANH: + l1 = torch.tanh(l1) + elif nonlinearity == NONLIN_RELU: + l1 = torch.relu(l1) + emissionScores.append(l1) + return torch.stack(emissionScores) + + @staticmethod + def load(x2i): + inferenceType = x2i["inferenceType"] + if inferenceType == TYPE_VITERBI: + pass + # TODO + # return ViterbiForwardLayer.load(x2i) + elif inferenceType == TYPE_GREEDY: + return GreedyForwardLayer.load(x2i) + else: + raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") + + @staticmethod + def initialize(config, paramPrefix, labelCounter, isDual, inputSize): + if(not config.__contains__(paramPrefix)): + return None + + inferenceType = config.get_string(paramPrefix + ".inference", "greedy") + dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY) + + nonlinAsString = config.get_string(paramPrefix + ".nonlinearity", "") + if nonlinAsString in nonlin_map: + nonlin = nonlin_map[nonlinAsString] + else: + raise RuntimeError(f"ERROR: unknown non-linearity {nonlinAsString}!") + + t2i = {t:i for i, t in enumerate(labelCounter.keys())} + i2t = {i:t for t, i in t2i.items()} + + spanConfig = config.get_string(paramPrefix + ".span", "") + if spanConfig is "": + span = None + else: + span = parseSpan(spanConfig) + + if span: + l = spanLength(span) + actualInputSize = 2*l if isDual else l + else: + actualInputSize = 2*inputSize if isDual else inputSize + + if inferenceType == TYPE_GREEDY_STRING: + return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb) + elif inferenceType == TYPE_VITERBI_STRING: + pass + # TODO + # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb) + # layer.initializeTransitions() + # return layer + else: + raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!") + +def spanLength(spans): + s = 0 + for x in spans: + s += x[1] - x[0] + return s + +def parseSpan(spanParam, inputSize): + spans = list() + spanParamTokens = spanParam.split(",") + for spanParamToken in spanParamTokens: + spanTokens = spanParamToken.split('-') + assert(len(spanTokens) == 2) + spans.append((int(spanTokens[0]), int(spanTokens[1]))) + return spans + +def spanToString(spans): + s = "" + first = True + for span in spans: + if not first: + s += "," + s += f"{span[0]}-{span[1]}" + first = False + return s + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py new file mode 100644 index 000000000..e69de29bb diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index c808d5421..e15fe2a3f 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -20,6 +20,20 @@ IS_DYNET_INITIALIZED = False +TYPE_VITERBI = 1 +TYPE_GREEDY = 2 + +NONLIN_NONE = 0 +NONLIN_RELU = 1 +NONLIN_TANH = 2 + +nonlin_map = {"relu":NONLIN_RELU, "tanh":NONLIN_TANH, "":NONLIN_NONE} + +TYPE_GREEDY_STRING = "greedy" +TYPE_VITERBI_STRING = "viterbi" + +DEFAULT_IS_DUAL = 0 + def save(file, values, comment): file.write("# " + comment + "\n") for key, value in values.items(): @@ -71,5 +85,12 @@ def transduce(embeddings, builder): return output, result +def expressionDropout(expression, dropoutProb, doDropout): + if doDropout and dropoutProb > 0: + dropout = nn.Dropout(dropoutProb) + return dropout(expression) + else: + return expression + diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py new file mode 100644 index 000000000..e69de29bb From 5a6b1284aee4955fba582ee89afd2fbc131f4e2e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 29 Sep 2021 15:33:38 -0700 Subject: [PATCH 010/134] greedy forward layer --- main/src/main/python/pytorch/forwardLayer.py | 41 ++++++-------- .../main/python/pytorch/greedyForwardLayer.py | 53 +++++++++++++++++++ main/src/main/python/pytorch/utils.py | 16 ++++++ 3 files changed, 84 insertions(+), 26 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 4b3ea489f..2f43be5a7 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -1,5 +1,7 @@ import torch import torch.nn +from torch.autograd import Variable +import torch.nn.functional as F from finalLayer import FinalLayer from greedyForwardLayer import GreedyForwardLayer @@ -7,7 +9,7 @@ from utils import * -def ForwardLayer(FinalLayer): +class ForwardLayer(FinalLayer): def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): self.inputSize = inputSize self.isDual = isDual @@ -17,7 +19,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d self.nonlinearity = nonlinearity self.pH = nn.Linear(actualInputSize, len(t2i)) - self.pRoot = torch.rand(inputSize) #TODO: Not sure about the shape here + self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here self.dropoutProb = dropoutProb self.inDim = spanLength(spans) if spans is not None else inputSize @@ -43,9 +45,9 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None): argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) if nonlinearity == NONLIN_TANH: - l1 = torch.tanh(l1) + l1 = F.tanh(l1) elif nonlinearity == NONLIN_RELU: - l1 = torch.relu(l1) + l1 = F.relu(l1) emissionScores.append(l1) else: if headPositionsOpt is None: @@ -63,9 +65,9 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None): ss = torch.cat([argExp, predExp]) l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) if nonlinearity == NONLIN_TANH: - l1 = torch.tanh(l1) + l1 = F.tanh(l1) elif nonlinearity == NONLIN_RELU: - l1 = torch.relu(l1) + l1 = F.relu(l1) emissionScores.append(l1) return torch.stack(emissionScores) @@ -111,40 +113,27 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize): actualInputSize = 2*inputSize if isDual else inputSize if inferenceType == TYPE_GREEDY_STRING: - return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb) + return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) elif inferenceType == TYPE_VITERBI_STRING: pass # TODO - # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, span, nonlin, dropoutProb) + # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) # layer.initializeTransitions() # return layer else: raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!") def spanLength(spans): - s = 0 - for x in spans: - s += x[1] - x[0] - return s + sum(end - start for start, end in spans) def parseSpan(spanParam, inputSize): - spans = list() - spanParamTokens = spanParam.split(",") - for spanParamToken in spanParamTokens: - spanTokens = spanParamToken.split('-') - assert(len(spanTokens) == 2) - spans.append((int(spanTokens[0]), int(spanTokens[1]))) + # Zheng: Why do we need inputSize here? + token1, token2 = map(int, spanParamToken.split('-')) + spans.append((token1, token2)) return spans def spanToString(spans): - s = "" - first = True - for span in spans: - if not first: - s += "," - s += f"{span[0]}-{span[1]}" - first = False - return s + return ','.join(f'{start}-{end}' for start, end in spans) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index e69de29bb..be776a6b5 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -0,0 +1,53 @@ +from forwardLayer import * +from utils import * +import numpy as np + +class GreedyForwardLayer(ForwardLayer): + def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): + super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) + + def loss(self, finalStates, goldLabelStrings): + goldLabels = [self.t2i[gs] for gs in goldLabelStrings] + return sentenceLossGreedy(finalStates, goldLabels) + + def saveX2i(self): + x2i = dict() + x2i["inferenceType"] = TYPE_GREEDY + x2i["inputSize"] = self.inputSize + x2i["isDual"] = 1 if self.isDual else 0 + x2i["span"] = spanToString(span) if self.spans else "" + x2i["nonlinearity"] = self.nonlinearity + x2i["t2i"] = self.t2i + x2i["dropoutProb"] = self.dropoutProb + + return x2i + + def __str__(self): + return f"GreedyForwardLayer({inDim}, {outDim})" + + def inference(self, emissionScores): + labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist() + return [self.i2t[i] for i in labelIds] + + def inferenceWithScores(self, emissionScores): + return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores] + + @classmethod + def load(cls, x2i): + inputSize = x2i["inputSize"] + isDual = x2i.get("isDual", DEFAULT_IS_DUAL) == 1 + sapnValue = x2i.get("span", "") + spans = None if sapnValue == "" else parseSpan(sapnValue, inputSize) + nonlinearity = x2i.get("nonlinearity", NONLIN_NONE) + t2i = x2i["t2i"] + i2t = {i:t for t, i in t2i.items()} + dropoutProb = x2i.get("dropoutProb", DEFAULT_DROPOUT_PROBABILITY) + + if spans: + l = spanLength(spans) + actualInputSize = 2*l if isDual else l + else: + actualInputSize = 2*inputSize if isDual else inputSize + + return cls(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) + \ No newline at end of file diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index e15fe2a3f..c1b43a6f1 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -1,5 +1,8 @@ import torch.nn as nn import torch +from torch.autograd import Variable + +import numpy as np concatenateCount = 0 @@ -92,5 +95,18 @@ def expressionDropout(expression, dropoutProb, doDropout): else: return expression +def sentenceLossGreedy(emissionScoresForSeq, golds): + assert(emissionScoresForSeq.shape(0) == len(golds)) + criterion = nn.CrossEntropyLoss() + golds = Variable(torch.LongTensor(golds)) + return criterion(emissionScoresForSeq, golds) + + + + + + + + From 0aa3aaabb4b05952f43b19f8503fe769f5969b7e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 29 Sep 2021 17:35:08 -0700 Subject: [PATCH 011/134] add more functions to layers, init viterbi layer --- main/src/main/python/pytorch/layers.py | 115 +++++++++++++++--- main/src/main/python/pytorch/utils.py | 7 ++ .../python/pytorch/viterbiForwardLayer.py | 32 +++++ 3 files changed, 139 insertions(+), 15 deletions(-) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index d5a3000f4..363ff29a9 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -1,10 +1,10 @@ import torch.nn as nn from utils import * from embeddingLayer import EmbeddingLayer +from constEmbeddingsGlove import ConstEmbeddingsGlove -class Layers(nn.Module): +class Layers(object): def __init__(self, initialLayer, intermediateLayers, finalLayer): - super().__init__() if finalLayer: self.outDim = finalLayer.outDim elif intermediateLayers: @@ -37,10 +37,10 @@ def __str__(self): s += "final = " + finalLayer return s - def forward(self, sentence, constEmnbeddings, doDropout): + def forward(self, sentence, constEmbeddings, doDropout): if self.initialLayer.isEmpty: raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!") - states = self.initialLayer(sentence, constEmnbeddings, doDropout) + states = self.initialLayer(sentence, constEmbeddings, doDropout) for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer.nonEmpty: @@ -133,18 +133,103 @@ def loadX2i(cls, x2i): return cls(initialLayer, intermediateLayers, finalLayer) - def predictJointly(layers, sentence, constEmnbeddings): - TODO - def forwardForTask(layers, taskId, sentence, constEmnbeddings, doDropout): - TODO - def predict(layers, taskId, sentence, constEmnbeddings): - TODO - def predictWithScores(layers, taskId, sentence, constEmnbeddings): - TODO - def parse(layers, sentence, constEmnbeddings): - TODO + @staticmethod + def predictJointly(layers, sentence, constEmbeddings): + labelsPerTask = list() + # layers(0) contains the shared layers + if layers[0]: + sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False) + for i in range(1, len(layers)): + states = layers[i].forwardFrom(sharedStates, sentence.headPositions, doDropout=False) + emissionScores = emissionScoresToArrays(states) + labels = layers[i].finalLayer.inference(emissionScores) + labelsPerTask += [labels] + # no shared layer + else: + for i in range(1, len(layers)): + states = layers[i].forward(sentence, sentence.headPositions, doDropout=False) + emissionScores = emissionScoresToArrays(states) + labels = layers[i].finalLayer.inference(emissionScores) + labelsPerTask += [labels] + + return labelsPerTask + + @staticmethod + def forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout): + if layers[0]: + sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout) + states = layers[taskId+1].forwardFrom(sharedStates, sentence.headPositions, doDropout) + else: + states = layers[taskId+1].forward(sentence, constEmbeddings, doDropout) + return states + + @staticmethod + def predict(layers, taskId, sentence, constEmbeddings): + states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False) + emissionScores = emissionScoresToArrays(states) + return layers[taskId+1].finalLayer.inference(emissionScores) + + @staticmethod + def predictWithScores(layers, taskId, sentence, constEmbeddings): + states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False) + emissionScores = emissionScoresToArrays(states) + return layers[taskId+1].finalLayer.inferenceWithScores(emissionScores) + + @staticmethod + def parse(layers, sentence, constEmbeddings): + # + # first get the output of the layers that are shared between the two tasks + # + assert(layers[0].nonEmpty) + sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False) + + # + # now predict the heads (first task) + # + headStates = layers[1].forwardFrom(sharedStates, None, doDropout=False) + headEmissionScores = emissionScoresToArrays(headStates) + headScores = layers[1].finalLayer.inference(headEmissionScores) + + # store the head values here + heads = list() + for wi, predictionsForThisWord in enumerate(headScores): + # pick the prediction with the highest score, which is within the boundaries of the current sentence + done = False + for hi, relative in enumerate(predictionsForThisWord): + if done: + break + try: + relativeHead = int(relative[0]) + if relativeHead == 0: + heads.append(1) + done = True + else: + headPosition = wi + relativeHead + heads.append(headPosition) + done = True + except: + raise RuntimeError('''some valid predictions may not be integers, e.g., "" may be predicted by the sequence model''') + if not done: + # we should not be here, but let's be safe + # if nothing good was found, assume root + heads.append(1) + + # + # next, predict the labels using the predicted heads + # + labelStates = layers[2].forwardFrom(sharedStates, heads, doDropout=False) + emissionScores = emissionScoresToArrays(labelStates) + labels = layers[2].finalLayer.inference(emissionScores) + assert(len(labels)==len(heads)) + + return zip(heads, labels) + + @staticmethod def loss(layers, taskId, sentence, goldLabels): - TODO + # Zheng: I am not sure this is the suitable way to load embeddings or not, need help... + constEmbeddings = ConstEmbeddingsGlove().mkConstLookupParams(sentence.words) + states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training! + return layers[taskId+1].finalLayer.loss(states, goldLabels) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index c1b43a6f1..dd1709a8f 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -100,6 +100,13 @@ def sentenceLossGreedy(emissionScoresForSeq, golds): criterion = nn.CrossEntropyLoss() golds = Variable(torch.LongTensor(golds)) return criterion(emissionScoresForSeq, golds) + +def emissionScoresToArrays(expressions): + lattice = list() + for expr in expressions: + probs = expr.data.tolist() + lattice += [probs] + return lattice diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index e69de29bb..636b130c4 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -0,0 +1,32 @@ +from forwardLayer import * +from utils import * + +class GreedyForwardLayer(ForwardLayer): + def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): + super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) + + def initializeTransitions(self): + #TODO + pass + def initTransitionsTo(self, dst, size, startTag, stopTag): + #TODO + pass + def loss(self, finalStates, goldLabelStrings): + #TODO + pass + def saveX2i(self): + #TODO + pass + def __str__(self): + #TODO + pass + def inference(emissionScores): + #TODO + pass + def inferenceWithScores(emissionScores): + #TODO + pass + @classmethod + def load(cls, x2i): + #TODO + pass \ No newline at end of file From e12161839b89bbdb26d8c1678b3c7bd523183be9 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 30 Sep 2021 02:18:03 -0700 Subject: [PATCH 012/134] traverse the code, fixed bugs Now, the model initialization part is working --- .../python/embeddings/wordEmbeddingMap.py | 46 ++++++------- .../python/pytorch/constEmbeddingsGlove.py | 8 ++- .../src/main/python/pytorch/embeddingLayer.py | 25 ++++--- main/src/main/python/pytorch/forwardLayer.py | 13 ++-- .../main/python/pytorch/greedyForwardLayer.py | 6 +- main/src/main/python/pytorch/layers.py | 49 ++++++++----- main/src/main/python/pytorch/metal.py | 69 ++++++++++++++++--- main/src/main/python/pytorch/rnnLayer.py | 12 ++-- main/src/main/python/pytorch/taskManager.py | 23 ++++++- main/src/main/python/pytorch/utils.py | 10 +-- .../python/pytorch/viterbiForwardLayer.py | 6 +- main/src/main/python/run.py | 4 +- .../src/main/python/sequences/columnReader.py | 2 +- main/src/main/python/sequences/rowReaders.py | 30 ++++---- 14 files changed, 197 insertions(+), 106 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index a82c2108a..327b4befc 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -1,29 +1,29 @@ import numpy as np +import math class WordEmbeddingMap: def __init__(self, config): - self.emb_dict = self.load(config) - self.dim = self.emb_dict.shape[-1] - - def load(self): - emb_matrix = None - emb_dict = dict() - for line in open(config.get_string("glove.matrixResourceName")): - if not len(line.split()) == 2: - if "\t" in line: - delimiter = "\t" - else: - delimiter = " " - line_split = line.rstrip().split(delimiter) - # extract word and vector - word = line_split[0] - x = np.array([float(i) for i in line_split[1:]]) - vector = (x /np.linalg.norm(x)) - embedding_size = vector.shape[0] - emb_dict[word] = vector - base = math.sqrt(6/embedding_size) - emb_dict[""] = np.random.uniform(-base,base,(embedding_size)) - return emb_dict + self.emb_dict, self.dim = load(config) def isOutOfVocabulary(self, word): - return word not in self.emb_dict \ No newline at end of file + return word not in self.emb_dict + +def load(config): + emb_matrix = None + emb_dict = dict() + for line in open(config.get_string("glove.matrixResourceName")): + if not len(line.split()) == 2: + if "\t" in line: + delimiter = "\t" + else: + delimiter = " " + line_split = line.rstrip().split(delimiter) + # extract word and vector + word = line_split[0] + x = np.array([float(i) for i in line_split[1:]]) + vector = (x /np.linalg.norm(x)) + embedding_size = vector.shape[0] + emb_dict[word] = vector + base = math.sqrt(6/embedding_size) + emb_dict[""] = np.random.uniform(-base,base,(embedding_size)) + return emb_dict, embedding_size \ No newline at end of file diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py index be32c2f39..52547b6a8 100644 --- a/main/src/main/python/pytorch/constEmbeddingsGlove.py +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -1,16 +1,18 @@ from dataclasses import dataclass import torch.nn as nn from embeddings.wordEmbeddingMap import * +from pyhocon import ConfigFactory @dataclass class ConstEmbeddingParameters: emb: nn.Embedding w2i: dict -def ConstEmbeddingsGlove: +class _ConstEmbeddingsGlove: def __init__(self): self.SINGLETON_WORD_EMBEDDING_MAP = None - self.load('../resources/org/clulab/glove.conf') + config = ConfigFactory.parse_file('../resources/org/clulab/glove.conf') + self.load(config) self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim def load(self, config): @@ -25,3 +27,5 @@ def mkConstLookupParams(self, words): emd = nn.Embedding.from_pretrained(weight) emd.weight.requires_grad=False return ConstEmbeddingParameters(emb ,w2i) + +ConstEmbeddingsGlove = _ConstEmbeddingsGlove() diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index a506bfa02..2ebea6f7b 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -1,8 +1,9 @@ -from initialLayer import InitialLayer +from pytorch.initialLayer import InitialLayer import random -from utils import * +from pytorch.utils import * import torch.nn as nn import torch +from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove DEFAULT_DROPOUT_PROB: float = DEFAULT_DROPOUT_PROBABILITY DEFAULT_LEARNED_WORD_EMBEDDING_SIZE: int = 128 @@ -16,7 +17,7 @@ DEFAULT_USE_IS_PREDICATE: int = -1 class EmbeddingLayer(InitialLayer): - def __init__(w2i, # word to index + def __init__(self, w2i, # word to index w2f, # word to frequency c2i, # character to index tag2i, # POS tag to index @@ -68,7 +69,7 @@ def __init__(w2i, # word to index positionDim = 1 if distanceLookupParameters and useIsPredicate else 0 predicateDim = positionEmbeddingSize if positionLookupParameters else 0 - self.outDim = ConstEmbeddingsGlove().dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim + self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim random.seed(RANDOM_SEED) def forward(self, sentence, constEmbeddings, doDropout): @@ -81,9 +82,9 @@ def forward(self, sentence, constEmbeddings, doDropout): # const word embeddings such as GloVe constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings) assert(constEmbeddingsExpressions.size(0) == len(words)) - if(tags) assert(len(tags) == len(words)) - if(nes) assert(len(nes) == len(words)) - if(headPositions) assert(len(headPositions) == len(words)) + if(tags): assert(len(tags) == len(words)) + if(nes): assert(len(nes) == len(words)) + if(headPositions): assert(len(headPositions) == len(words)) # build the word embeddings one by one embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions) @@ -250,7 +251,7 @@ def load(cls, x2i): @classmethod def initialize(cls, config, paramPrefix, wordCounter): - if(not config.__contains__(paramPrefix)): + if(not config.contains(paramPrefix)): return None learnedWordEmbeddingSize = config.get_int(paramPrefix + ".learnedWordEmbeddingSize",DEFAULT_LEARNED_WORD_EMBEDDING_SIZE) @@ -260,9 +261,9 @@ def initialize(cls, config, paramPrefix, wordCounter): neTagEmbeddingSize = config.get_int(paramPrefix + ".neTagEmbeddingSize",DEFAULT_NE_TAG_EMBEDDING_SIZE) distanceEmbeddingSize = config.get_int(paramPrefix + ".distanceEmbeddingSize",DEFAULT_DISTANCE_EMBEDDING_SIZE) distanceWindowSize = config.get_int(paramPrefix + ".distanceWindowSize",DEFAULT_DISTANCE_WINDOW_SIZE) - useIsPredicate = config.getArgBoolean(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1) + useIsPredicate = config.get_bool(paramPrefix + ".useIsPredicate",DEFAULT_USE_IS_PREDICATE == 1) positionEmbeddingSize = config.get_int(paramPrefix + ".positionEmbeddingSize",DEFAULT_POSITION_EMBEDDING_SIZE) - dropoutProb = config.get_float(paramPrefix + ".dropoutProb",EmbeddingLayer.DEFAULT_DROPOUT_PROB) + dropoutProb = config.get_float(paramPrefix + ".dropoutProb",DEFAULT_DROPOUT_PROB) wordList = [UNK_WORD] + sorted(wordCounter.keys()) w2i = {w:i for i, w in enumerate(wordList)} @@ -293,7 +294,7 @@ def initialize(cls, config, paramPrefix, wordCounter): distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None - return cls(w2i, w2f, c2i, tag2i, ne2i, + return cls(w2i, wordCounter, c2i, tag2i, ne2i, learnedWordEmbeddingSize, charEmbeddingSize, charRnnStateSize, @@ -331,6 +332,8 @@ def initialize(cls, config, paramPrefix, wordCounter): + + diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 2f43be5a7..92b7a133d 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -3,14 +3,13 @@ from torch.autograd import Variable import torch.nn.functional as F -from finalLayer import FinalLayer -from greedyForwardLayer import GreedyForwardLayer -from viterbiForwardLayer import ViterbiForwardLayer +from pytorch.finalLayer import FinalLayer -from utils import * +from pytorch.utils import * class ForwardLayer(FinalLayer): def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): + super().__init__() self.inputSize = inputSize self.isDual = isDual self.t2i = t2i @@ -73,6 +72,8 @@ def forward(inputExpressions, doDropout, headPositionsOpt = None): @staticmethod def load(x2i): + from pytorch.greedyForwardLayer import GreedyForwardLayer + from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] if inferenceType == TYPE_VITERBI: pass @@ -85,7 +86,9 @@ def load(x2i): @staticmethod def initialize(config, paramPrefix, labelCounter, isDual, inputSize): - if(not config.__contains__(paramPrefix)): + from pytorch.greedyForwardLayer import GreedyForwardLayer + from pytorch.viterbiForwardLayer import ViterbiForwardLayer + if(not config.contains(paramPrefix)): return None inferenceType = config.get_string(paramPrefix + ".inference", "greedy") diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index be776a6b5..2d9ddeeae 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -1,5 +1,5 @@ -from forwardLayer import * -from utils import * +from pytorch.forwardLayer import * +from pytorch.utils import * import numpy as np class GreedyForwardLayer(ForwardLayer): @@ -23,7 +23,7 @@ def saveX2i(self): return x2i def __str__(self): - return f"GreedyForwardLayer({inDim}, {outDim})" + return f"GreedyForwardLayer({self.inDim}, {self.outDim})" def inference(self, emissionScores): labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist() diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 363ff29a9..4c1a1889d 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -1,7 +1,9 @@ import torch.nn as nn -from utils import * -from embeddingLayer import EmbeddingLayer -from constEmbeddingsGlove import ConstEmbeddingsGlove +from pytorch.utils import * +from pytorch.embeddingLayer import EmbeddingLayer +from pytorch.rnnLayer import RnnLayer +from pytorch.forwardLayer import ForwardLayer +from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove class Layers(object): def __init__(self, initialLayer, intermediateLayers, finalLayer): @@ -14,8 +16,7 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer): else: self.outDim = None - if initialLayer and intermediateLayers and finalLayer: - self.nonEmpty = True + self.nonEmpty = initialLayer is not None and intermediateLayers is not None and finalLayer is not None self.isEmpty = not self.nonEmpty self.initialLayer = initialLayer @@ -25,43 +26,53 @@ def __init__(self, initialLayer, intermediateLayers, finalLayer): def __str__(self): s = "" started = False - if(initialLayer.nonEmpty): - s += "initial = " + initialLayer + if(self.initialLayer is not None): + s += "initial = " + str(self.initialLayer) started = True - for i in intermediateLayers.indices: - if(started) s += " " - s += s"intermediate ({i+1}) = " + intermediateLayers[i] + for i in range(len(self.intermediateLayers)): + if(started): s += " " + s += f"intermediate ({i+1}) = " + str(self.intermediateLayers[i]) started = True - if(finalLayer.nonEmpty): - if(started) s += " " - s += "final = " + finalLayer + if(self.finalLayer is not None): + if(started): s += " " + s += "final = " + str(self.finalLayer) return s + def get_parameters(self): + parameters = list() + if self.initialLayer is not None: + parameters += [p for p in self.initialLayer.parameters() if p.requires_grad] + for il in self.intermediateLayers: + parameters += [p for p in il.parameters() if p.requires_grad] + if self.finalLayer is not None: + parameters += [p for p in self.finalLayer.parameters() if p.requires_grad] + return parameters + def forward(self, sentence, constEmbeddings, doDropout): if self.initialLayer.isEmpty: raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!") states = self.initialLayer(sentence, constEmbeddings, doDropout) for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) - if self.finalLayer.nonEmpty: + if self.finalLayer is not None: states = self.finalLayer(states, sentence.headPositions, doDropout) return states def forwardFrom(self, inStates, headPositions, doDropout): - if self.initialLayer.nonEmpty: + if self.initialLayer is not None: raise RuntimeError(f"ERROR: you can't call forwardFrom() on a Layers object that has an initial layer: {self}") states = inStates for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) - if self.finalLayer.nonEmpty: + if self.finalLayer is not None: states = self.finalLayer(states, sentence.headPositions, doDropout) return states def saveX2i(self): x2i = dict() - if self.initialLayer.nonEmpty: + if self.initialLayer is not None: x2i['hasInitial'] = 1 x2i['initialLayer'] = self.initialLayer.saveX2i() else: @@ -70,7 +81,7 @@ def saveX2i(self): x2i['intermediateLayers'] = list() for il in self.intermediateLayers: x2i['intermediateLayers'].append(il.saveX2i()) - if self.finalLayer.nonEmpty: + if self.finalLayer is not None: x2i['hasFinal'] = 1 x2i['finalLayer'] = self.finalLayer.saveX2i() else: @@ -227,7 +238,7 @@ def parse(layers, sentence, constEmbeddings): @staticmethod def loss(layers, taskId, sentence, goldLabels): # Zheng: I am not sure this is the suitable way to load embeddings or not, need help... - constEmbeddings = ConstEmbeddingsGlove().mkConstLookupParams(sentence.words) + constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words) states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training! return layers[taskId+1].finalLayer.loss(states, goldLabels) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index ace00e73e..c95e7747f 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -1,29 +1,33 @@ from pytorch.utils import * from collections import Counter from sequences.rowReaders import * +from pytorch.layers import Layers -class Metal(): +from torch.optim import SGD, Adam + +class Metal(object): """docstring for Metal""" def __init__(self, taskManager, modelOpt): + self.taskManager = taskManager + # One Layers object per task; model(0) contains the Layers shared between all tasks (if any) if modelOpt: self.model = modelOpt else: self.model = self.initialize() - self.taskManager = taskManager def initialize(self): - taskWords, taskLabels = mkVocabularies() + taskWords, taskLabels = self.mkVocabularies() - layersPerTask = [None for _ in range(taskManager.taskCount + 1)] + layersPerTask = [None for _ in range(self.taskManager.taskCount + 1)] - layersPerTask[0] = Layers.apply(taskManager, "mtl.layers", taskWords[0], None, False, None) + layersPerTask[0] = Layers.apply(self.taskManager, "mtl.layers", taskWords[0], None, False, None) inputSize = layersPerTask[0].outDim - for i in taskManager.indices: - layersPerTask[i+1] = Layers.apply(taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], taskManager.tasks[i].isDual, inputSize) + for i in self.taskManager.indices: + layersPerTask[i+1] = Layers.apply(self.taskManager, f"mtl.task{i+1}.layers", taskWords[i + 1], taskLabels[i + 1], self.taskManager.tasks[i].isDual, inputSize) for i in range(len(layersPerTask)): print (f"Summary of layersPerTask({i}):") @@ -33,17 +37,17 @@ def initialize(self): def mkVocabularies(self): # index 0 reserved for the shared Layers; tid + 1 corresponds to each task - labels = [Counter() for _ in range(taskManager.taskCount + 1)] + labels = [Counter() for _ in range(self.taskManager.taskCount + 1)] for i in range(1, len(labels)): # labels(0) not used, since only task-specific layers have a final layer labels[i][START_TAG] += 1 labels[i][STOP_TAG] += 1 - words = [Counter() for _ in range(taskManager.taskCount + 1)] + words = [Counter() for _ in range(self.taskManager.taskCount + 1)] reader = MetalRowReader() - for tid in taskManager.indices: - for sentence in taskManager.tasks[tid].trainSentences: + for tid in self.taskManager.indices: + for sentence in self.taskManager.tasks[tid].trainSentences: annotatedSentences = reader.toAnnotatedSentences(sentence) for asent in annotatedSentences: @@ -56,3 +60,46 @@ def mkVocabularies(self): return words, labels + def train(self, modelNamePrefix): + learningRate = self.taskManager.get_float("mtl.learningRate", 0.001) + trainerType = self.taskManager.get_string("mtl.trainer", "adam") + batchSize = self.taskManager.get_int("mtl.batchSize", 1) + assert(batchSize>0) + + parameters = list() + for layers in self.model: + parameters += layers.get_parameters() + + if trainerType == "adam": + trainer = Adam(parameters, lr=learningRate) + elif trainerType == "rmsprop": + trainer = RMSprop(parameters, lr=learningRate) + elif trainerType == "sgd": + trainer = SDG(parameters, lr=learningRate) + else: + raise RuntimeError(f"ERROR: unknown trainer {trainerType}!") + + reader = MetalRowReader() + + cummulativeLoss = 0.0 + numTagged = 0 + + maxAvgAcc = 0.0 + maxAvgF1 = 0.0 + bestEpoch = 0 + + allEpochScores = list() + epochPatience = self.taskManager.epochPatience + + for epoch in range(0, self.taskManager.maxEpochs): + if epochPatience <= 0: + break + + + + + + + + + diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index c5aef820a..ee1896f8e 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -1,5 +1,5 @@ -from intermediateLayer import IntermediateLayer -from utils import * +from pytorch.intermediateLayer import IntermediateLayer +from pytorch.utils import * import torch import torch.nn as nn @@ -12,7 +12,7 @@ def __init__(self, rnnType, wordRnnBuilder, dropoutProb): - + super().__init__() self.inDim = self.inputSize = inputSize self.numLayers = numLayers self.rnnStateSize = rnnStateSize @@ -64,7 +64,7 @@ def load(cls, x2i): @classmethod def initialize(cls, config, paramPrefix, inputSize): - if(not config.__contains__(paramPrefix)): + if(not config.contains(paramPrefix)): return None numLayers = config.get_int(paramPrefix + ".numLayers", 1) @@ -73,9 +73,9 @@ def initialize(cls, config, paramPrefix, inputSize): rnnType = config.get_string(paramPrefix + ".type", "lstm") dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY) - builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize) + builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) - return (inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) + return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) def mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb): if rnnType == 'gru': diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index f5d1ae868..25e669eb0 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -6,7 +6,7 @@ TYPE_BASIC = 0 TYPE_DUAL = 1 -class TaskManager: +class TaskManager(): def __init__(self, config, seed): @@ -31,6 +31,27 @@ def __init__(self, config, seed): # Training shards from all tasks self.shards = self.mkShards() + def contains(self, paramPrefix): + return self.config.__contains__(paramPrefix) + + def get_int(self, x, defualt=None): + return self.config.get_int(x, defualt) + + def get_string(self, x, defualt=None): + return self.config.get_string(x, defualt) + + def get_float(self, x, defualt=None): + return self.config.get_float(x, defualt) + + def get_bool(self, x, defualt=None): + return self.config.get_bool(x, defualt) + + def get_list(self, x, defualt=None): + return self.config.get_list(x, defualt) + + def get_config(self, x, defualt=None): + return self.config.get_config(x, defualt) + # Construct training shards by interleaving shards from all tasks def mkShards(self): shardsByTasks = list() diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index dd1709a8f..049ca8845 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -56,14 +56,16 @@ def readString2Ids(s2iFilename): if not line.startswith("#"): k, v = line.strip().split('\t') s2i[k] = int(v) + return s2i def readChar2Ids(s2iFilename): s2i = dict() with open(s2iFilename) as f: for line in f: - if not line.startswith("#"): + if not line.startswith("#") and line.rstrip(): k, v = line.strip().split('\t') - s2i[char(int(k))] = int(v) + s2i[chr(int(k))] = int(v) + return s2i def transduce(embeddings, builder): @@ -75,14 +77,14 @@ def transduce(embeddings, builder): if bi_direct: (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) - else; + else: (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) elif mode == 'GRU': if bi_direct: h = torch.zeros(2, 1, hidden_dim) output, result = builder(embeddings.view(len(word), 1, -1), h) - else; + else: h = torch.zeros(1, 1, hidden_dim) output, result = builder(embeddings.view(len(word), 1, -1), h) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 636b130c4..4b025293b 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -1,7 +1,7 @@ -from forwardLayer import * -from utils import * +from pytorch.forwardLayer import * +from pytorch.utils import * -class GreedyForwardLayer(ForwardLayer): +class ViterbiForwardLayer(ForwardLayer): def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index c75532f0e..fc4e1385a 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -20,8 +20,8 @@ modelName = args.model_file print (taskManager.debugTraversal()) - mtl = Metal(taskManager, None, None) - # mtl.train(modelName) + mtl = Metal(taskManager, None) + mtl.train(modelName) elif args.test: pass elif args.shell: diff --git a/main/src/main/python/sequences/columnReader.py b/main/src/main/python/sequences/columnReader.py index 0f8c04610..e162316f7 100644 --- a/main/src/main/python/sequences/columnReader.py +++ b/main/src/main/python/sequences/columnReader.py @@ -44,4 +44,4 @@ def __init__(self, tokens): def get(self, idx): if(idx >= self.length): raise RuntimeError(f"ERROR: trying to read field #{idx}, which does not exist in this row: {tokens}!") - return tokens[idx] + return self.tokens[idx] diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py index 0aa409756..58a15cb71 100644 --- a/main/src/main/python/sequences/rowReaders.py +++ b/main/src/main/python/sequences/rowReaders.py @@ -26,18 +26,18 @@ def __init__(self): self.LABEL_START_OFFSET = 3 def toAnnotatedSentences(self, rows): - if (len(rows.head) == 2): - self.parseSimple(rows) - elif (len(rows.head) == 4): - self.parseSimpleExtended(rows) - elif (len(rows.head) >= 5): - self.parseFull(rows) + if (rows[0].length == 2): + return self.parseSimple(rows) + elif (rows[0].length == 4): + return self.parseSimpleExtended(rows) + elif (rows[0].length >= 5): + return self.parseFull(rows) else: raise RuntimeError("ERROR: the Metal format expects 2, 4, or 5+ columns!") # Parser for the simple format: word, label - def parseSimple(rows): - assert(len(rows.head) == 2) + def parseSimple(self, rows): + assert(rows[0].length == 2) words = list() labels = list() @@ -45,11 +45,11 @@ def parseSimple(rows): words += [row.get(self.WORD_POSITION)] labels += [row.get(self.WORD_POSITION + 1)] - return AnnotatedSentence(words), labels + return [(AnnotatedSentence(words), labels)] # Parser for the simple extended format: word, POS tag, NE label, label - def parseSimpleExtended(rows): - assert(len(rows.head) == 4) + def parseSimpleExtended(self, rows): + assert(rows[0].length == 4) words = list() posTags = list() neLabels = list() @@ -61,12 +61,12 @@ def parseSimpleExtended(rows): neLabels += [row.get(self.NE_LABEL_POSITION)] labels += [row.get(self.LABEL_START_OFFSET)] - return AnnotatedSentence(words), posTags, neLabels, labels + return [(AnnotatedSentence(words), posTags, neLabels, labels)] # Parser for the full format: word, POS tag, NE label, (label head)+ - def parseFull(rows): - assert(len(rows.head) >= 5) - numSent = (len(rows.head) - 3) / 2 + def parseFull(self, rows): + assert(rows[0].length >= 5) + numSent = (rows[0].length - 3) / 2 assert(numSent >= 1) words = list() From 4cbeb68cd8789d334c19d7c2174c1a8de78cab5d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 30 Sep 2021 04:56:39 -0700 Subject: [PATCH 013/134] finished the whole model except the viterbi part --- .../python/embeddings/wordEmbeddingMap.py | 1 - .../python/pytorch/constEmbeddingsGlove.py | 8 +- .../src/main/python/pytorch/embeddingLayer.py | 12 +- main/src/main/python/pytorch/layers.py | 39 +++- main/src/main/python/pytorch/metal.py | 210 +++++++++++++++++- main/src/main/python/pytorch/seqScorer.py | 96 ++++++++ main/src/main/python/pytorch/utils.py | 23 +- 7 files changed, 370 insertions(+), 19 deletions(-) create mode 100644 main/src/main/python/pytorch/seqScorer.py diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index 327b4befc..49ab541e9 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -9,7 +9,6 @@ def isOutOfVocabulary(self, word): return word not in self.emb_dict def load(config): - emb_matrix = None emb_dict = dict() for line in open(config.get_string("glove.matrixResourceName")): if not len(line.split()) == 2: diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py index 52547b6a8..d2589f9d3 100644 --- a/main/src/main/python/pytorch/constEmbeddingsGlove.py +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -2,6 +2,8 @@ import torch.nn as nn from embeddings.wordEmbeddingMap import * from pyhocon import ConfigFactory +import numpy as np +import torch @dataclass class ConstEmbeddingParameters: @@ -21,11 +23,11 @@ def load(self, config): def mkConstLookupParams(self, words): w2i = dict() + weights = np.zeros((len(words), self.dim)) for i,w in enumerate(words): - weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emd_dict[0]) + weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict[""]) w2i[w] = i - emd = nn.Embedding.from_pretrained(weight) - emd.weight.requires_grad=False + emb = nn.Embedding.from_pretrained(torch.tensor(weights), freeze=True) return ConstEmbeddingParameters(emb ,w2i) ConstEmbeddingsGlove = _ConstEmbeddingsGlove() diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 2ebea6f7b..15242ed7a 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -87,16 +87,16 @@ def forward(self, sentence, constEmbeddings, doDropout): if(headPositions): assert(len(headPositions) == len(words)) # build the word embeddings one by one - embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, tags, nes, headPositions) + embeddings = self.mkEmbeddings(words, constEmbeddingsExpressions, doDropout, tags, nes, headPositions) return embeddings def mkConstEmbeddings(self, words, constEmbeddings): - idxs = [constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words] - embeddings = self.constEmbeddings.emb(idxs) + idxs = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(idxs) return embeddings - def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPositions=None): + def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, headPositions=None): # # Learned word embeddings # These are initialized randomly, and updated during backprop @@ -107,14 +107,14 @@ def mkEmbeddings(self, words, constEmbeddings, tags=None, nes=None, headPosition wordPositions.append(i) id = self.w2i.get(word, 0) # 0 reserved for UNK in the vocab # sample uniformly with prob 0.5 from singletons; move all other singletons to UNK - if(self.doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0 + if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0 ids.append(id) learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids)) # # biLSTM over character embeddings # - charEmbedding = torch.stack([mkCharacterEmbedding(word, c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) + charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) # # POS tag embedding diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 4c1a1889d..7e01c2594 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -48,8 +48,45 @@ def get_parameters(self): parameters += [p for p in self.finalLayer.parameters() if p.requires_grad] return parameters + def start_train(self): + if self.initialLayer is not None: + self.initialLayer.train() + for il in self.intermediateLayers: + il.train() + if self.finalLayer is not None: + self.finalLayer.train() + + def start_eval(self): + if self.initialLayer is not None: + self.initialLayer.eval() + for il in self.intermediateLayers: + il.eval() + if self.finalLayer is not None: + self.finalLayer.eval() + + def get_state_dict(self): + params = dict() + if self.initialLayer is not None: + params['initialLayer'] = self.initialLayer.state_dict() + if self.intermediateLayers: + params['intermediateLayers'] = list() + for il in self.intermediateLayers: + params['intermediateLayers'].append(il.state_dict()) + if self.finalLayer is not None: + params['finalLayer'] = self.finalLayer.state_dict() + return params + + def load_state_dict(self, params): + if self.initialLayer is not None: + self.initialLayer.load_state_dict(params['initialLayer']) + for i, il in enumerate(self.intermediateLayers): + il.load_state_dict(params['intermediateLayers'][i]) + if self.finalLayer is not None: + self.finalLayer.load_state_dict(params['finalLayer']) + + def forward(self, sentence, constEmbeddings, doDropout): - if self.initialLayer.isEmpty: + if self.initialLayer is None: raise RuntimeError(f"ERROR: you can't call forward() on a Layers object that does not have an initial layer: {self}!") states = self.initialLayer(sentence, constEmbeddings, doDropout) for intermediateLayer in self.intermediateLayers: diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index c95e7747f..a2136529b 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -2,9 +2,12 @@ from collections import Counter from sequences.rowReaders import * from pytorch.layers import Layers +from pytorch.seqScorer import * from torch.optim import SGD, Adam +import json + class Metal(object): """docstring for Metal""" def __init__(self, taskManager, modelOpt): @@ -94,7 +97,212 @@ def train(self, modelNamePrefix): for epoch in range(0, self.taskManager.maxEpochs): if epochPatience <= 0: break - + # this fetches randomized training sentences from all tasks + sentenceIterator = self.taskManager.getSentences() + sentCount = 0 + + for layers in self.model: + layers.start_train() + trainer.zero_grad() + + batchLosses = list() + + # traverse all training sentences + for metaSentence in sentenceIterator: + taskId = metaSentence[0] + sentence = metaSentence[1] + + sentCount += 1 + + annotatedSentences = reader.toAnnotatedSentences(sentence) + assert(annotatedSentences is not None) + + unweightedLoss = sum([Layers.loss(self.model, taskId, a_sent[0], a_sent[1]) for a_sent in annotatedSentences]) + + loss = unweightedLoss * self.taskManager.tasks[taskId].taskWeight # Zheng: I don't think this is necessary: if self.taskManager.tasks[taskId].taskWeight!=1.0 else unweightedLoss + + batchLosses.append(loss) + + if len(batchLosses) >= batchSize: + batchLoss = sum(batchLosses) + cummulativeLoss = batchLoss.item() + batchLoss.backward() + trainer.step() + batchLosses = list() + + numTagged += len(sentence) + + if(sentCount % 1000 == 0): + print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} {sentences})") + cummulativeLoss = 0.0 + numTagged = 0 + # we may have an incomplete batch here + if batchLosses: + batchLoss = sum(batchLosses) + cummulativeLoss = batchLoss.item() + batchLoss.backward() + trainer.step() + batchLosses = list() + + # check dev performance in this epoch, for all tasks + totalAcc = 0.0 + totalPrec = 0.0 + totalRec = 0.0 + totalF1 = 0.0 + for taskId in range(0, self.taskManager.taskCount): + taskName = self.taskManager.tasks[taskId].taskName + devSentences = self.taskManager.tasks[taskId].devSentences + + if devSentences: + acc, prec, rec, f1 = self.evaluate(taskId, taskName, devSentences, "development", epoch) + totalAcc += acc + totalPrec += prec + totalRec += rec + totalF1 += f1 + + avgAcc = totalAcc / taskManager.taskCount + avgPrec = totalPrec / taskManager.taskCount + avgRec = totalRec / taskManager.taskCount + avgF1 = totalF1 / taskManager.taskCount + + print (f"Average accuracy across {taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}") + print (f"Average P/R/F1 across {taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}") + + allEpochScores.append((epoch, avgF1)) + + if avgF1 > maxAvgF1: + maxAvgF1 = avgF1 + maxAvgAcc = avgAcc + bestEpoch = epoch + epochPatience = self.taskManager.epochPatience + else: + epochPatience -= 1 + + self.save(f"{modelNamePrefix}-epoch{epoch}") + + allEpochScores.sort(key=lambda x: x[1]) + print ("Epochs in descending order of scores:") + for t in allEpochScores: + print (f"Epoch #{t[0]}: {t[1]}") + + def evaluate(self, taskId, taskName, sentences, name, epoch=-1): + scoreCountsByLabel = ScoreCountsByLabel() + taskNumber = taskId + 1 + sentCount = 0 + + print (f"Started evaluation on the {name} dataset for task {taskNumber} ({taskName})...") + + if epoch >= 0: + pw = open(f"task{taskNumber}.dev.output.{epoch}", "w") + else: + pw = open(f"task{taskNumber}.test.output", "w") + + reader = MetalRowReader() + + for sent in sentences: + sentCount += 1 + + annotatedSentences = reader.toAnnotatedSentences(sent) + + for asent in annotatedSentences: + sentence = asent[0] + goldLabels = asent[1] + + constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words) + preds = Layers.predict(self.model, taskId, sentence, constEmbeddings) + + sc = SeqScorer.f1(goldLabels, preds) + scoreCountsByLabel.incAll(sc) + + printCoNLLOutput(pw, sentence.words, goldLabels, preds) + + pw.close() + + print (f"Accuracy on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}") + print (f"Precision on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}") + print (f"Recall on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}") + print (f"Micro F1 on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}") + for label in scoreCountsByLabel.labels: + print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map(label).gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + + return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() ) + + def predictJointly(self, sentence, constEmbeddings): + return Layers.predictJointly(self.model, sentence, constEmbeddings) + + def predict(self, taskId, sentence, constEmbeddings): + return Layers.predict(self.model, taskId, sentence, constEmbeddings) + + def predictWithScores(self, taskId, sentence, constEmbeddings): + return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings) + + # Custom method for the parsing algorithm + # @param sentence Input sentence + # @param constEmbeddings Constant embeddings for this sentence + # @return Tuple of (head, label) for each word in the sentence + def parse(self, sentence, constEmbeddings): + Layers.parse(self.model, sentence, constEmbeddings) + + def test(self): + taskName = taskManager.tasks[taskId].taskName + testSentences = taskManager.tasks[taskId].testSentences + if testSentences: + self.evaluate(taskId, taskName, devSentences, "testing") + + def save(self, baseFilename): + + params = list() + for layers in self.model: + sd = layers.get_state_dict() + x2i = layers.saveX2i() + params.append({"model": sd, "x2i": x2i}) + + # torch pickle save + try: + torch.save(params, baseFilename) + print("model saved to {}".format(baseFilename+".torch")) + except BaseException: + print("[Warning: Saving failed... continuing anyway.]") + + # We can also save as text json file: + with open(baseFilename+".json") as f: + f.write(json.dumps(params)) + + + @classmethod + def load(cls, modelFilenamePrefix): + print (f"Loading MTL model from {modelFilenamePrefix}...") + layersSeq = list() + checkpoint = torch.load(modelFilenamePrefix+".torch") + for param in checkpoint: + layers = loadX2i(param['x2i']) + layers.load_state_dict(param['model']) + layersSeq.append(layers) + + print (f"Loading MTL model from {modelFilenamePrefix} complete.") + + return layersSeq + + @classmethod + def apply(cls, modelFilenamePrefix, taskManager=None): + model = Metal.load(modelFilenamePrefix) + return cls(taskManager, model) + + + + + + + + + + + + + + + + diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py new file mode 100644 index 000000000..144e5bfe9 --- /dev/null +++ b/main/src/main/python/pytorch/seqScorer.py @@ -0,0 +1,96 @@ +from dataclasses import dataclass +from collections import defaultdict + +OUTSIDE_LABEL = "O" + +@dataclass +class ScoreCounts: + correct: int + gold: int + predicted: int + +class SeqScorer: + + @staticmethod + def f1(golds, preds): + scoreCountsByLabel = scoreCountsByLabel() + + for e1, e2 in zip(preds, golds): + scoreCountsByLabel.total += 1 + if e1 == e2: + scoreCountsByLabel.correct += 1 + if e2 != OUTSIDE_LABEL: + scoreCountsByLabel.incGold() + scoreCountsByLabel.incGold(e2) + if e1 != OUTSIDE_LABEL: + scoreCountsByLabel.incPredicted() + scoreCountsByLabel.incPredicted(e1) + if e1 == e2: + scoreCountsByLabel.incCorrect() + scoreCountsByLabel.incCorrect(e1) + return scoreCountsByLabel + +class ScoreCountsByLabel: + + def __init__(self): + self.map = defaultdict(ScoreCounts) + self.total = 0 + self.correct = 0 + + def labels(self): + return self.map.keys() + + def incGold(self, label="*", value=1): + counts = self.map[label] + counts.gold += value + + def incPredicted(self, label="*", value=1): + counts = self.map[label] + counts.predicted += value + + def incCorrect(self, label="*", value=1): + counts = self.map[label] + counts.correct += value + + def incAll(self, counts): + correct += counts.correct + total += counts.total + + for label in counts.labels(): + c = counts.map[label] + incGold(label, c.gold) + incPredicted(label, c.predicted) + incCorrect(label, c.correct) + + def precision(self, label="*", decimals=2): + c = self.map[label].correct + p = self.map[label].predicted + + prec = c/p if p!=0 else 0 + + return round(prec, decimals) if decimals>0 else prec + + def recall(self, label="*", decimals=2): + c = self.map[label].correct + g = self.map[label].gold + + reca = c/g if p!=0 else 0 + + return round(reca, decimals) if decimals>0 else reca + + def f1(self, label="*", decimals=2): + p = self.precision(label, decimals=-1) + r = self.recall(label, decimals=-1) + + f1 = 2.0 * p * r / (p + r), decimals if (p!=0 and r!=0) else 0 + + return round(f1, decimals) if decimals>0 else f1 + + def accuracy(self, decimals=2): + a = self.correct / self.total + + return round(a, decimals) if decimals>0 else a + + + + diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 049ca8845..6d80e4169 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -46,7 +46,7 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word])) - _, result = transduce(charEmbeddings, charRnnBuilder, True) + _, result = transduce(charEmbeddings, charRnnBuilder, len(word)) return result.view(1, hidden_dim*2) def readString2Ids(s2iFilename): @@ -67,26 +67,26 @@ def readChar2Ids(s2iFilename): s2i[chr(int(k))] = int(v) return s2i -def transduce(embeddings, builder): +def transduce(embeddings, builder, l): hidden_dim = builder.hidden_size bi_direct = builder.bidirectional - mode = build.mode + mode = builder.mode if mode == 'LSTM': if bi_direct: (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) - output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) + output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c)) else: (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) - output, (result, c) = builder(embeddings.view(len(word), 1, -1), (h, c)) + output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c)) elif mode == 'GRU': if bi_direct: h = torch.zeros(2, 1, hidden_dim) - output, result = builder(embeddings.view(len(word), 1, -1), h) + output, result = builder(embeddings.view(l, 1, -1), h) else: h = torch.zeros(1, 1, hidden_dim) - output, result = builder(embeddings.view(len(word), 1, -1), h) + output, result = builder(embeddings.view(l, 1, -1), h) return output, result @@ -109,6 +109,15 @@ def emissionScoresToArrays(expressions): probs = expr.data.tolist() lattice += [probs] return lattice + +def printCoNLLOutput(pw, words, golds, preds): + + assert(len(words) == len(golds)) + assert(len(words) == len(preds)) + + for i in range(len(words)): + pw.write(f"{words[i]} {golds[i]} {preds[i]}\n") + pw.write("\n") From 470eca9594a3be5f50134207a19cea6767aa57ff Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 30 Sep 2021 12:26:21 -0700 Subject: [PATCH 014/134] finally training... --- .../python/pytorch/constEmbeddingsGlove.py | 2 +- main/src/main/python/pytorch/forwardLayer.py | 6 +++--- main/src/main/python/pytorch/layers.py | 4 ++-- main/src/main/python/pytorch/metal.py | 2 +- main/src/main/python/pytorch/rnnLayer.py | 3 +-- main/src/main/python/pytorch/taskManager.py | 2 ++ main/src/main/python/pytorch/utils.py | 21 +++++++++++-------- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py index d2589f9d3..cf887909d 100644 --- a/main/src/main/python/pytorch/constEmbeddingsGlove.py +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -27,7 +27,7 @@ def mkConstLookupParams(self, words): for i,w in enumerate(words): weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict[""]) w2i[w] = i - emb = nn.Embedding.from_pretrained(torch.tensor(weights), freeze=True) + emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True) return ConstEmbeddingParameters(emb ,w2i) ConstEmbeddingsGlove = _ConstEmbeddingsGlove() diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 92b7a133d..492cb3cca 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -36,16 +36,16 @@ def pickSpan(self, v): vs.append(e) return torch.cat(vs) - def forward(inputExpressions, doDropout, headPositionsOpt = None): + def forward(self, inputExpressions, doDropout, headPositionsOpt = None): emissionScores = list() if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? for i, e in enumerate(inputExpressions): argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) - if nonlinearity == NONLIN_TANH: + if self.nonlinearity == NONLIN_TANH: l1 = F.tanh(l1) - elif nonlinearity == NONLIN_RELU: + elif self.nonlinearity == NONLIN_RELU: l1 = F.relu(l1) emissionScores.append(l1) else: diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 7e01c2594..569addce3 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -92,7 +92,7 @@ def forward(self, sentence, constEmbeddings, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, sentence.headPositions, doDropout) + states = self.finalLayer(states, doDropout, sentence.headPositions) return states @@ -103,7 +103,7 @@ def forwardFrom(self, inStates, headPositions, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, sentence.headPositions, doDropout) + states = self.finalLayer(states, doDropout, headPositions) return states diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index a2136529b..2ee0ac3e9 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -133,7 +133,7 @@ def train(self, modelNamePrefix): numTagged += len(sentence) if(sentCount % 1000 == 0): - print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} {sentences})") + print (f"Cumulative loss: {cummulativeLoss/numTagged} ({sentCount} sentences)") cummulativeLoss = 0.0 numTagged = 0 # we may have an incomplete batch here diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index ee1896f8e..633d6b65b 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -29,9 +29,8 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) States, _ = transduce(inputExpressions, self.wordRnnBuilder) - if self.useHighwayConnections: - States = torch.cat([States, inputExpressions], dim=1) + States = torch.cat([States.squeeze(1), inputExpressions], dim=1) return States diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index 25e669eb0..5f6ecd76c 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -163,6 +163,8 @@ def __init__(self, # Current position in the training sentences when we iterate during training currentTrainingSentencePosition = 0 + self.taskWeight = taskWeight + print (f"============ starting task {taskNumber} ============") print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.") if(self.devSentences is not None): diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 6d80e4169..38a9b6186 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -46,8 +46,9 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word])) - _, result = transduce(charEmbeddings, charRnnBuilder, len(word)) - return result.view(1, hidden_dim*2) + _, result = transduce(charEmbeddings, charRnnBuilder) + # Zheng: Not sure if this is the right way to concatenate the two direction hidden states + return result.view(hidden_dim*2) def readString2Ids(s2iFilename): s2i = dict() @@ -67,26 +68,28 @@ def readChar2Ids(s2iFilename): s2i[chr(int(k))] = int(v) return s2i -def transduce(embeddings, builder, l): +def transduce(embeddings, builder): + + builder = builder.float() hidden_dim = builder.hidden_size bi_direct = builder.bidirectional mode = builder.mode - + if mode == 'LSTM': if bi_direct: (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) - output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c)) + output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) else: (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) - output, (result, c) = builder(embeddings.view(l, 1, -1), (h, c)) + output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) elif mode == 'GRU': if bi_direct: h = torch.zeros(2, 1, hidden_dim) - output, result = builder(embeddings.view(l, 1, -1), h) + output, result = builder(embeddings.unsqueeze(1), h) else: h = torch.zeros(1, 1, hidden_dim) - output, result = builder(embeddings.view(l, 1, -1), h) + output, result = builder(embeddings.unsqueeze(1), h) return output, result @@ -98,7 +101,7 @@ def expressionDropout(expression, dropoutProb, doDropout): return expression def sentenceLossGreedy(emissionScoresForSeq, golds): - assert(emissionScoresForSeq.shape(0) == len(golds)) + assert(emissionScoresForSeq.size(0) == len(golds)) criterion = nn.CrossEntropyLoss() golds = Variable(torch.LongTensor(golds)) return criterion(emissionScoresForSeq, golds) From 2b91e036f47bedcb80796c4817931b170cffd589 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 30 Sep 2021 23:03:55 -0700 Subject: [PATCH 015/134] the training pipeline is working now --- .../main/python/pytorch/greedyForwardLayer.py | 3 +- main/src/main/python/pytorch/layers.py | 9 +++-- main/src/main/python/pytorch/metal.py | 35 ++++++++++--------- main/src/main/python/pytorch/rnnLayer.py | 2 +- main/src/main/python/pytorch/seqScorer.py | 22 ++++++------ main/src/main/python/pytorch/taskManager.py | 1 + main/src/main/python/pytorch/utils.py | 4 +-- 7 files changed, 42 insertions(+), 34 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index 2d9ddeeae..86ad8cbf1 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -26,8 +26,7 @@ def __str__(self): return f"GreedyForwardLayer({self.inDim}, {self.outDim})" def inference(self, emissionScores): - labelIds = np.argmax(lattice.data.numpy(), axis=1).tolist() - return [self.i2t[i] for i in labelIds] + return [self.i2t[np.argmax(es)] for es in emissionScores] def inferenceWithScores(self, emissionScores): return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores] diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 569addce3..f8f9ecfb4 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -66,15 +66,20 @@ def start_eval(self): def get_state_dict(self): params = dict() + j_params = dict() if self.initialLayer is not None: params['initialLayer'] = self.initialLayer.state_dict() + j_params['initialLayer'] = {k:v.data.tolist() for k, v in params['initialLayer'].items()} if self.intermediateLayers: params['intermediateLayers'] = list() + j_params['intermediateLayers'] = list() for il in self.intermediateLayers: params['intermediateLayers'].append(il.state_dict()) + j_params['intermediateLayers'].append({k:v.data.tolist() for k, v in params['intermediateLayers'][-1].items()}) if self.finalLayer is not None: params['finalLayer'] = self.finalLayer.state_dict() - return params + j_params['finalLayer'] = {k:v.data.tolist() for k, v in params['finalLayer'].items()} + return params, j_params def load_state_dict(self, params): if self.initialLayer is not None: @@ -114,7 +119,7 @@ def saveX2i(self): x2i['initialLayer'] = self.initialLayer.saveX2i() else: x2i['hasInitial'] = 0 - x2i['intermediateCount'] = len(intermediateLayers) + x2i['intermediateCount'] = len(self.intermediateLayers) x2i['intermediateLayers'] = list() for il in self.intermediateLayers: x2i['intermediateLayers'].append(il.saveX2i()) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 2ee0ac3e9..ccfdad2c6 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -3,6 +3,7 @@ from sequences.rowReaders import * from pytorch.layers import Layers from pytorch.seqScorer import * +from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove from torch.optim import SGD, Adam @@ -160,13 +161,13 @@ def train(self, modelNamePrefix): totalRec += rec totalF1 += f1 - avgAcc = totalAcc / taskManager.taskCount - avgPrec = totalPrec / taskManager.taskCount - avgRec = totalRec / taskManager.taskCount - avgF1 = totalF1 / taskManager.taskCount + avgAcc = totalAcc / self.taskManager.taskCount + avgPrec = totalPrec / self.taskManager.taskCount + avgRec = totalRec / self.taskManager.taskCount + avgF1 = totalF1 / self.taskManager.taskCount - print (f"Average accuracy across {taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}") - print (f"Average P/R/F1 across {taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}") + print (f"Average accuracy across {self.taskManager.taskCount} tasks in epoch {epoch}: {avgAcc}") + print (f"Average P/R/F1 across {self.taskManager.taskCount} tasks in epoch $epoch: {avgPrec} / {avgRec} / {avgF1}") allEpochScores.append((epoch, avgF1)) @@ -218,12 +219,12 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): pw.close() - print (f"Accuracy on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}") - print (f"Precision on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}") - print (f"Recall on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}") - print (f"Micro F1 on {sentences.length} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}") - for label in scoreCountsByLabel.labels: - print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map(label).gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + print (f"Accuracy on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.accuracy()}") + print (f"Precision on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.precision()}") + print (f"Recall on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.recall()}") + print (f"Micro F1 on {len(sentences)} {name} sentences for task {taskNumber} ({taskName}): {scoreCountsByLabel.f1()}") + for label in scoreCountsByLabel.labels(): + print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() ) @@ -252,21 +253,23 @@ def test(self): def save(self, baseFilename): params = list() + j_params = list() for layers in self.model: - sd = layers.get_state_dict() + sd, j_sd = layers.get_state_dict() x2i = layers.saveX2i() params.append({"model": sd, "x2i": x2i}) + j_params.append({"model": j_sd, "x2i": x2i}) # torch pickle save try: - torch.save(params, baseFilename) + torch.save(params, baseFilename+".torch") print("model saved to {}".format(baseFilename+".torch")) except BaseException: print("[Warning: Saving failed... continuing anyway.]") # We can also save as text json file: - with open(baseFilename+".json") as f: - f.write(json.dumps(params)) + with open(baseFilename+".json", "w") as f: + f.write(json.dumps(j_params)) @classmethod diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index 633d6b65b..3384e89ce 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -39,7 +39,7 @@ def saveX2i(self): x2i['inputSize'] = self.inputSize x2i['numLayers'] = self.numLayers x2i['rnnStateSize'] = self.rnnStateSize - x2i['useHighwayConnections'] = 1 if useHighwayConnections else 0 + x2i['useHighwayConnections'] = 1 if self.useHighwayConnections else 0 x2i['rnnType'] = self.rnnType x2i['dropoutProb'] = self.dropoutProb return x2i diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py index 144e5bfe9..0855ff120 100644 --- a/main/src/main/python/pytorch/seqScorer.py +++ b/main/src/main/python/pytorch/seqScorer.py @@ -5,15 +5,15 @@ @dataclass class ScoreCounts: - correct: int - gold: int - predicted: int + correct: int = 0 + gold: int = 0 + predicted: int = 0 class SeqScorer: @staticmethod def f1(golds, preds): - scoreCountsByLabel = scoreCountsByLabel() + scoreCountsByLabel = ScoreCountsByLabel() for e1, e2 in zip(preds, golds): scoreCountsByLabel.total += 1 @@ -53,14 +53,14 @@ def incCorrect(self, label="*", value=1): counts.correct += value def incAll(self, counts): - correct += counts.correct - total += counts.total + self.correct += counts.correct + self.total += counts.total for label in counts.labels(): c = counts.map[label] - incGold(label, c.gold) - incPredicted(label, c.predicted) - incCorrect(label, c.correct) + self.incGold(label, c.gold) + self.incPredicted(label, c.predicted) + self.incCorrect(label, c.correct) def precision(self, label="*", decimals=2): c = self.map[label].correct @@ -74,7 +74,7 @@ def recall(self, label="*", decimals=2): c = self.map[label].correct g = self.map[label].gold - reca = c/g if p!=0 else 0 + reca = c/g if g!=0 else 0 return round(reca, decimals) if decimals>0 else reca @@ -82,7 +82,7 @@ def f1(self, label="*", decimals=2): p = self.precision(label, decimals=-1) r = self.recall(label, decimals=-1) - f1 = 2.0 * p * r / (p + r), decimals if (p!=0 and r!=0) else 0 + f1 = 2.0 * p * r / (p + r) if (p!=0 and r!=0) else 0 return round(f1, decimals) if decimals>0 else f1 diff --git a/main/src/main/python/pytorch/taskManager.py b/main/src/main/python/pytorch/taskManager.py index 5f6ecd76c..ba8eab3cd 100644 --- a/main/src/main/python/pytorch/taskManager.py +++ b/main/src/main/python/pytorch/taskManager.py @@ -164,6 +164,7 @@ def __init__(self, currentTrainingSentencePosition = 0 self.taskWeight = taskWeight + self.taskName = taskName print (f"============ starting task {taskNumber} ============") print (f"Read {len(self.trainSentences)} training sentences for task {taskNumber}, with shard size {self.shardSize}.") diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 38a9b6186..053339d1e 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -45,7 +45,7 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size - charEmbeddings = charLookupParameters(torch.LongTensor([c2i[c] for c in word])) + charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) _, result = transduce(charEmbeddings, charRnnBuilder) # Zheng: Not sure if this is the right way to concatenate the two direction hidden states return result.view(hidden_dim*2) @@ -109,7 +109,7 @@ def sentenceLossGreedy(emissionScoresForSeq, golds): def emissionScoresToArrays(expressions): lattice = list() for expr in expressions: - probs = expr.data.tolist() + probs = expr.data.numpy() lattice += [probs] return lattice From 1892713b9a3ab93b9268982db682060c3acf23e5 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Fri, 1 Oct 2021 00:19:09 -0700 Subject: [PATCH 016/134] fix some minor issues --- main/src/main/python/embeddings/wordEmbeddingMap.py | 6 ++---- main/src/main/python/pytorch/metal.py | 2 +- main/src/main/python/pytorch/utils.py | 6 +----- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index 49ab541e9..95cac8d04 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -16,10 +16,8 @@ def load(config): delimiter = "\t" else: delimiter = " " - line_split = line.rstrip().split(delimiter) - # extract word and vector - word = line_split[0] - x = np.array([float(i) for i in line_split[1:]]) + word, *rest = line.rstrip().split(delimiter) + x = np.array(list(map(float, rest))) vector = (x /np.linalg.norm(x)) embedding_size = vector.shape[0] emb_dict[word] = vector diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index ccfdad2c6..74376cf7f 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -5,7 +5,7 @@ from pytorch.seqScorer import * from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove -from torch.optim import SGD, Adam +from torch.optim import SGD, Adam, RMSprop import json diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 053339d1e..cce14eb5f 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -107,11 +107,7 @@ def sentenceLossGreedy(emissionScoresForSeq, golds): return criterion(emissionScoresForSeq, golds) def emissionScoresToArrays(expressions): - lattice = list() - for expr in expressions: - probs = expr.data.numpy() - lattice += [probs] - return lattice + return [expr.data.tolist() for expr in expressions] def printCoNLLOutput(pw, words, golds, preds): From 850e9d202797a601abe4b9f588d2e147a9a10986 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 7 Oct 2021 02:22:26 -0700 Subject: [PATCH 017/134] make minor changes, implemented Viterbi decoder --- .../python/embeddings/wordEmbeddingMap.py | 17 +- .../python/pytorch/constEmbeddingsGlove.py | 13 +- main/src/main/python/pytorch/forwardLayer.py | 29 ++-- main/src/main/python/pytorch/layers.py | 5 +- main/src/main/python/pytorch/metal.py | 32 ++-- main/src/main/python/pytorch/utils.py | 6 + .../python/pytorch/viterbiForwardLayer.py | 153 +++++++++++++++--- 7 files changed, 201 insertions(+), 54 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index 95cac8d04..f2766c9c8 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -1,15 +1,19 @@ import numpy as np import math +import torch.nn as nn +import torch class WordEmbeddingMap: def __init__(self, config): - self.emb_dict, self.dim = load(config) + self.emb_dict, self.dim, self.w2i, self.emb = load(config) def isOutOfVocabulary(self, word): - return word not in self.emb_dict + return word not in self.w2i def load(config): emb_dict = dict() + w2i = {"":0} + i = 1 for line in open(config.get_string("glove.matrixResourceName")): if not len(line.split()) == 2: if "\t" in line: @@ -17,10 +21,17 @@ def load(config): else: delimiter = " " word, *rest = line.rstrip().split(delimiter) + w2i[word] = i + i += 1 x = np.array(list(map(float, rest))) vector = (x /np.linalg.norm(x)) embedding_size = vector.shape[0] emb_dict[word] = vector base = math.sqrt(6/embedding_size) emb_dict[""] = np.random.uniform(-base,base,(embedding_size)) - return emb_dict, embedding_size \ No newline at end of file + + weights = np.zeros((len(emb_dict), embedding_size)) + for w, i in w2i.items(): + weights[i] = emb_dict[w] + emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True) + return emb_dict, embedding_size, w2i, emb \ No newline at end of file diff --git a/main/src/main/python/pytorch/constEmbeddingsGlove.py b/main/src/main/python/pytorch/constEmbeddingsGlove.py index cf887909d..24d298f0b 100644 --- a/main/src/main/python/pytorch/constEmbeddingsGlove.py +++ b/main/src/main/python/pytorch/constEmbeddingsGlove.py @@ -2,7 +2,6 @@ import torch.nn as nn from embeddings.wordEmbeddingMap import * from pyhocon import ConfigFactory -import numpy as np import torch @dataclass @@ -13,6 +12,7 @@ class ConstEmbeddingParameters: class _ConstEmbeddingsGlove: def __init__(self): self.SINGLETON_WORD_EMBEDDING_MAP = None + self.cep = None config = ConfigFactory.parse_file('../resources/org/clulab/glove.conf') self.load(config) self.dim = self.SINGLETON_WORD_EMBEDDING_MAP.dim @@ -20,14 +20,9 @@ def __init__(self): def load(self, config): if self.SINGLETON_WORD_EMBEDDING_MAP is None: self.SINGLETON_WORD_EMBEDDING_MAP = WordEmbeddingMap(config) + self.cep = ConstEmbeddingParameters(self.SINGLETON_WORD_EMBEDDING_MAP.emb, self.SINGLETON_WORD_EMBEDDING_MAP.w2i) - def mkConstLookupParams(self, words): - w2i = dict() - weights = np.zeros((len(words), self.dim)) - for i,w in enumerate(words): - weights[i] = self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict.get(w, self.SINGLETON_WORD_EMBEDDING_MAP.emb_dict[""]) - w2i[w] = i - emb = nn.Embedding.from_pretrained(torch.FloatTensor(weights), freeze=True) - return ConstEmbeddingParameters(emb ,w2i) + def get_ConstLookupParams(self): + return self.cep ConstEmbeddingsGlove = _ConstEmbeddingsGlove() diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 492cb3cca..3a162c525 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -32,23 +32,29 @@ def pickSpan(self, v): # Zheng: Will spans overlap? vs = list() for span in self.spans: - e = torch.index_select(v, 0, torch.tensor([span[0], span[1]])) + e = torch.index_select(v, 1, torch.tensor([span[0], span[1]])) vs.append(e) return torch.cat(vs) def forward(self, inputExpressions, doDropout, headPositionsOpt = None): - emissionScores = list() if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - for i, e in enumerate(inputExpressions): - argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) - l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) - if self.nonlinearity == NONLIN_TANH: - l1 = F.tanh(l1) - elif self.nonlinearity == NONLIN_RELU: - l1 = F.relu(l1) - emissionScores.append(l1) + argExp = expressionDropout(self.pickSpan(inputExpressions), self.dropoutProb, doDropout) + emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) + if self.nonlinearity == NONLIN_TANH: + emissionScores = F.tanh(emissionScores) + elif self.nonlinearity == NONLIN_RELU: + emissionScores = F.relu(emissionScores) + # for i, e in enumerate(inputExpressions): + # argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) + # l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) + # if self.nonlinearity == NONLIN_TANH: + # l1 = F.tanh(l1) + # elif self.nonlinearity == NONLIN_RELU: + # l1 = F.relu(l1) + # emissionScores.append(l1) else: + emissionScores = list() if headPositionsOpt is None: raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): @@ -68,7 +74,8 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): elif nonlinearity == NONLIN_RELU: l1 = F.relu(l1) emissionScores.append(l1) - return torch.stack(emissionScores) + emissionScores = torch.stack(emissionScores) + return emissionScores @staticmethod def load(x2i): diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index f8f9ecfb4..178a76693 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -280,9 +280,10 @@ def parse(layers, sentence, constEmbeddings): @staticmethod def loss(layers, taskId, sentence, goldLabels): # Zheng: I am not sure this is the suitable way to load embeddings or not, need help... - constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words) + constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=True) # use dropout during training! - return layers[taskId+1].finalLayer.loss(states, goldLabels) + loss = layers[taskId+1].finalLayer.loss(states, goldLabels) + return loss diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 74376cf7f..887e31e2c 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -106,7 +106,8 @@ def train(self, modelNamePrefix): layers.start_train() trainer.zero_grad() - batchLosses = list() + batchLoss = 0 + i = 0 # traverse all training sentences for metaSentence in sentenceIterator: @@ -118,18 +119,21 @@ def train(self, modelNamePrefix): annotatedSentences = reader.toAnnotatedSentences(sentence) assert(annotatedSentences is not None) - unweightedLoss = sum([Layers.loss(self.model, taskId, a_sent[0], a_sent[1]) for a_sent in annotatedSentences]) + unweightedLoss = 0 + for a_sent in annotatedSentences: + unweightedLoss += Layers.loss(self.model, taskId, a_sent[0], a_sent[1]) loss = unweightedLoss * self.taskManager.tasks[taskId].taskWeight # Zheng: I don't think this is necessary: if self.taskManager.tasks[taskId].taskWeight!=1.0 else unweightedLoss - batchLosses.append(loss) + batchLoss += loss + i += 1 - if len(batchLosses) >= batchSize: - batchLoss = sum(batchLosses) - cummulativeLoss = batchLoss.item() + if i >= batchSize: + cummulativeLoss += batchLoss.item() batchLoss.backward() trainer.step() - batchLosses = list() + batchLoss = 0 + i = 0 numTagged += len(sentence) @@ -138,12 +142,12 @@ def train(self, modelNamePrefix): cummulativeLoss = 0.0 numTagged = 0 # we may have an incomplete batch here - if batchLosses: - batchLoss = sum(batchLosses) + if batchLoss: cummulativeLoss = batchLoss.item() batchLoss.backward() trainer.step() - batchLosses = list() + batchLoss = 0 + i = 0 # check dev performance in this epoch, for all tasks totalAcc = 0.0 @@ -209,7 +213,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): sentence = asent[0] goldLabels = asent[1] - constEmbeddings = ConstEmbeddingsGlove.mkConstLookupParams(sentence.words) + constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() preds = Layers.predict(self.model, taskId, sentence, constEmbeddings) sc = SeqScorer.f1(goldLabels, preds) @@ -229,12 +233,18 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() ) def predictJointly(self, sentence, constEmbeddings): + for layers in self.model: + layers.start_eval() return Layers.predictJointly(self.model, sentence, constEmbeddings) def predict(self, taskId, sentence, constEmbeddings): + for layers in self.model: + layers.start_eval() return Layers.predict(self.model, taskId, sentence, constEmbeddings) def predictWithScores(self, taskId, sentence, constEmbeddings): + for layers in self.model: + layers.start_eval() return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings) # Custom method for the parsing algorithm diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index cce14eb5f..2c6500352 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -117,6 +117,12 @@ def printCoNLLOutput(pw, words, golds, preds): for i in range(len(words)): pw.write(f"{words[i]} {golds[i]} {preds[i]}\n") pw.write("\n") + +def log_sum_exp(vec): + max_score = vec[0, argmax(vec)] + max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1]) + return max_score + \ + torch.log(torch.sum(torch.exp(vec - max_score_broadcast))) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 4b025293b..2d8911c98 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -5,28 +5,145 @@ class ViterbiForwardLayer(ForwardLayer): def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans = None): super().__init__(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) - def initializeTransitions(self): - #TODO - pass - def initTransitionsTo(self, dst, size, startTag, stopTag): - #TODO - pass + # Matrix of transition parameters. Entry i,j is the score of + # transitioning *to* i *from* j. + self.transitions = nn.Parameter( + torch.randn(self.outDim, self.outDim)) + + # These two statements enforce the constraint that we never transfer + # to the start tag and we never transfer from the stop tag + self.transitions.data[t2i[START_TAG], :] = -10000 + self.transitions.data[:, t2i[STOP_TAG]] = -10000 + + def _forward_alg(self, feats): + # Do the forward algorithm to compute the partition function + init_alphas = torch.full((1, self.tagset_size), -10000.) + # START_TAG has all of the score. + init_alphas[0][self.tag_to_ix[START_TAG]] = 0. + + # Wrap in a variable so that we will get automatic backprop + forward_var = init_alphas + + # Iterate through the sentence + for feat in feats: + alphas_t = [] # The forward tensors at this timestep + for next_tag in range(self.tagset_size): + # broadcast the emission score: it is the same regardless of + # the previous tag + emit_score = feat[next_tag].view( + 1, -1).expand(1, self.tagset_size) + # the ith entry of trans_score is the score of transitioning to + # next_tag from i + trans_score = self.transitions[next_tag].view(1, -1) + # The ith entry of next_tag_var is the value for the + # edge (i -> next_tag) before we do log-sum-exp + next_tag_var = forward_var + trans_score + emit_score + # The forward variable for this tag is log-sum-exp of all the + # scores. + alphas_t.append(log_sum_exp(next_tag_var).view(1)) + forward_var = torch.cat(alphas_t).view(1, -1) + terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] + alpha = log_sum_exp(terminal_var) + return alpha + + def _score_sentence(self, feats, tags): + # Gives the score of a provided tag sequence + score = torch.zeros(1) + tags = torch.cat([torch.tensor([self.tag_to_ix[START_TAG]], dtype=torch.long), tags]) + for i, feat in enumerate(feats): + score = score + \ + self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]] + score = score + self.transitions[self.tag_to_ix[STOP_TAG], tags[-1]] + return score + + def _viterbi_decode(self, feats): + backpointers = [] + + # Initialize the viterbi variables in log space + init_vvars = torch.full((1, self.tagset_size), -10000.) + init_vvars[0][self.tag_to_ix[START_TAG]] = 0 + + # forward_var at step i holds the viterbi variables for step i-1 + forward_var = init_vvars + for feat in feats: + bptrs_t = [] # holds the backpointers for this step + viterbivars_t = [] # holds the viterbi variables for this step + + for next_tag in range(self.tagset_size): + # next_tag_var[i] holds the viterbi variable for tag i at the + # previous step, plus the score of transitioning + # from tag i to next_tag. + # We don't include the emission scores here because the max + # does not depend on them (we add them in below) + next_tag_var = forward_var + self.transitions[next_tag] + best_tag_id = argmax(next_tag_var) + bptrs_t.append(best_tag_id) + viterbivars_t.append(next_tag_var[0][best_tag_id].view(1)) + # Now add in the emission scores, and assign forward_var to the set + # of viterbi variables we just computed + forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1) + backpointers.append(bptrs_t) + + # Transition to STOP_TAG + terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] + best_tag_id = argmax(terminal_var) + path_score = terminal_var[0][best_tag_id] + + # Follow the back pointers to decode the best path. + best_path = [best_tag_id] + for bptrs_t in reversed(backpointers): + best_tag_id = bptrs_t[best_tag_id] + best_path.append(best_tag_id) + # Pop off the start tag (we dont want to return that to the caller) + start = best_path.pop() + assert start == self.tag_to_ix[START_TAG] # Sanity check + best_path.reverse() + return path_score, best_path + def loss(self, finalStates, goldLabelStrings): - #TODO - pass + goldLabels = [self.t2i[gs] for gs in goldLabelStrings] + forward_score = self._forward_alg(finalStates) + gold_score = self._score_sentence(feats, goldLabels) + return forward_score - gold_score + def saveX2i(self): - #TODO - pass + x2i = dict() + x2i["inferenceType"] = TYPE_GREEDY + x2i["inputSize"] = self.inputSize + x2i["isDual"] = 1 if self.isDual else 0 + x2i["span"] = spanToString(span) if self.spans else "" + x2i["nonlinearity"] = self.nonlinearity + x2i["t2i"] = self.t2i + x2i["dropoutProb"] = self.dropoutProb + + return x2i + def __str__(self): - #TODO - pass + return f"ViterbiForwardLayer({self.inDim}, {self.outDim})" + def inference(emissionScores): - #TODO - pass + score, labelsIds = self._viterbi_decode(emissionScores) + return [self.i2t[i] for i in labelsIds] + def inferenceWithScores(emissionScores): - #TODO - pass + raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!") + @classmethod def load(cls, x2i): - #TODO - pass \ No newline at end of file + inputSize = x2i["inputSize"] + isDual = x2i.get("isDual", DEFAULT_IS_DUAL) == 1 + sapnValue = x2i.get("span", "") + spans = None if sapnValue == "" else parseSpan(sapnValue, inputSize) + nonlinearity = x2i.get("nonlinearity", NONLIN_NONE) + t2i = x2i["t2i"] + i2t = {i:t for t, i in t2i.items()} + dropoutProb = x2i.get("dropoutProb", DEFAULT_DROPOUT_PROBABILITY) + + if spans: + l = spanLength(spans) + actualInputSize = 2*l if isDual else l + else: + actualInputSize = 2*inputSize if isDual else inputSize + + return cls(inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, dropoutProb, spans) + From b8e2d3c32c7613bc010719cf0cb0ce8916428f2b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 7 Oct 2021 02:27:49 -0700 Subject: [PATCH 018/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 3a162c525..f066d1011 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -83,9 +83,7 @@ def load(x2i): from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] if inferenceType == TYPE_VITERBI: - pass - # TODO - # return ViterbiForwardLayer.load(x2i) + return ViterbiForwardLayer.load(x2i) elif inferenceType == TYPE_GREEDY: return GreedyForwardLayer.load(x2i) else: @@ -125,11 +123,9 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize): if inferenceType == TYPE_GREEDY_STRING: return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) elif inferenceType == TYPE_VITERBI_STRING: - pass - # TODO - # layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) - # layer.initializeTransitions() - # return layer + layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) + layer.initializeTransitions() + return layer else: raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!") From c5476bc227988ce674a35b12de7f944da26e983b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 7 Oct 2021 02:30:47 -0700 Subject: [PATCH 019/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index f066d1011..c1910a119 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -124,7 +124,6 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize): return GreedyForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) elif inferenceType == TYPE_VITERBI_STRING: layer = ViterbiForwardLayer(inputSize, isDual, t2i, i2t, actualInputSize, nonlin, dropoutProb, span) - layer.initializeTransitions() return layer else: raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!") From 9199eed65694acab90cca9bad8572998eb590fbb Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 7 Oct 2021 13:57:48 -0700 Subject: [PATCH 020/134] fixed bugs in viterbi decoder --- .../main/python/pytorch/greedyForwardLayer.py | 2 ++ main/src/main/python/pytorch/layers.py | 18 ++++------- main/src/main/python/pytorch/utils.py | 6 +++- .../python/pytorch/viterbiForwardLayer.py | 30 +++++++++---------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index 86ad8cbf1..e45116a67 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -26,9 +26,11 @@ def __str__(self): return f"GreedyForwardLayer({self.inDim}, {self.outDim})" def inference(self, emissionScores): + emissionScores = emissionScoresToArrays(states) return [self.i2t[np.argmax(es)] for es in emissionScores] def inferenceWithScores(self, emissionScores): + emissionScores = emissionScoresToArrays(states) return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores] @classmethod diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 178a76693..6bca50dfc 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -194,15 +194,13 @@ def predictJointly(layers, sentence, constEmbeddings): sharedStates = layers[0].forward(sentence, constEmbeddings, doDropout=False) for i in range(1, len(layers)): states = layers[i].forwardFrom(sharedStates, sentence.headPositions, doDropout=False) - emissionScores = emissionScoresToArrays(states) - labels = layers[i].finalLayer.inference(emissionScores) + labels = layers[i].finalLayer.inference(states) labelsPerTask += [labels] # no shared layer else: for i in range(1, len(layers)): states = layers[i].forward(sentence, sentence.headPositions, doDropout=False) - emissionScores = emissionScoresToArrays(states) - labels = layers[i].finalLayer.inference(emissionScores) + labels = layers[i].finalLayer.inference(states) labelsPerTask += [labels] return labelsPerTask @@ -219,14 +217,12 @@ def forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout): @staticmethod def predict(layers, taskId, sentence, constEmbeddings): states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False) - emissionScores = emissionScoresToArrays(states) - return layers[taskId+1].finalLayer.inference(emissionScores) + return layers[taskId+1].finalLayer.inference(states) @staticmethod def predictWithScores(layers, taskId, sentence, constEmbeddings): states = Layers.forwardForTask(layers, taskId, sentence, constEmbeddings, doDropout=False) - emissionScores = emissionScoresToArrays(states) - return layers[taskId+1].finalLayer.inferenceWithScores(emissionScores) + return layers[taskId+1].finalLayer.inferenceWithScores(states) @staticmethod def parse(layers, sentence, constEmbeddings): @@ -240,8 +236,7 @@ def parse(layers, sentence, constEmbeddings): # now predict the heads (first task) # headStates = layers[1].forwardFrom(sharedStates, None, doDropout=False) - headEmissionScores = emissionScoresToArrays(headStates) - headScores = layers[1].finalLayer.inference(headEmissionScores) + headScores = layers[1].finalLayer.inference(headStates) # store the head values here heads = list() @@ -271,8 +266,7 @@ def parse(layers, sentence, constEmbeddings): # next, predict the labels using the predicted heads # labelStates = layers[2].forwardFrom(sharedStates, heads, doDropout=False) - emissionScores = emissionScoresToArrays(labelStates) - labels = layers[2].finalLayer.inference(emissionScores) + labels = layers[2].finalLayer.inference(labelStates) assert(len(labels)==len(heads)) return zip(heads, labels) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 2c6500352..26ef279cd 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -117,7 +117,11 @@ def printCoNLLOutput(pw, words, golds, preds): for i in range(len(words)): pw.write(f"{words[i]} {golds[i]} {preds[i]}\n") pw.write("\n") - +def argmax(vec): + # return the argmax as a python int + _, idx = torch.max(vec, 1) + return idx.item() + def log_sum_exp(vec): max_score = vec[0, argmax(vec)] max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1]) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 2d8911c98..1952666b0 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -17,9 +17,9 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d def _forward_alg(self, feats): # Do the forward algorithm to compute the partition function - init_alphas = torch.full((1, self.tagset_size), -10000.) + init_alphas = torch.full((1, self.outDim), -10000.) # START_TAG has all of the score. - init_alphas[0][self.tag_to_ix[START_TAG]] = 0. + init_alphas[0][self.t2i[START_TAG]] = 0. # Wrap in a variable so that we will get automatic backprop forward_var = init_alphas @@ -27,11 +27,11 @@ def _forward_alg(self, feats): # Iterate through the sentence for feat in feats: alphas_t = [] # The forward tensors at this timestep - for next_tag in range(self.tagset_size): + for next_tag in range(self.outDim): # broadcast the emission score: it is the same regardless of # the previous tag emit_score = feat[next_tag].view( - 1, -1).expand(1, self.tagset_size) + 1, -1).expand(1, self.outDim) # the ith entry of trans_score is the score of transitioning to # next_tag from i trans_score = self.transitions[next_tag].view(1, -1) @@ -42,26 +42,26 @@ def _forward_alg(self, feats): # scores. alphas_t.append(log_sum_exp(next_tag_var).view(1)) forward_var = torch.cat(alphas_t).view(1, -1) - terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] + terminal_var = forward_var + self.transitions[self.t2i[STOP_TAG]] alpha = log_sum_exp(terminal_var) return alpha def _score_sentence(self, feats, tags): # Gives the score of a provided tag sequence score = torch.zeros(1) - tags = torch.cat([torch.tensor([self.tag_to_ix[START_TAG]], dtype=torch.long), tags]) + tags = torch.cat([torch.tensor([self.t2i[START_TAG]], dtype=torch.long), tags]) for i, feat in enumerate(feats): score = score + \ self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]] - score = score + self.transitions[self.tag_to_ix[STOP_TAG], tags[-1]] + score = score + self.transitions[self.t2i[STOP_TAG], tags[-1]] return score def _viterbi_decode(self, feats): backpointers = [] # Initialize the viterbi variables in log space - init_vvars = torch.full((1, self.tagset_size), -10000.) - init_vvars[0][self.tag_to_ix[START_TAG]] = 0 + init_vvars = torch.full((1, self.outDim), -10000.) + init_vvars[0][self.t2i[START_TAG]] = 0 # forward_var at step i holds the viterbi variables for step i-1 forward_var = init_vvars @@ -69,7 +69,7 @@ def _viterbi_decode(self, feats): bptrs_t = [] # holds the backpointers for this step viterbivars_t = [] # holds the viterbi variables for this step - for next_tag in range(self.tagset_size): + for next_tag in range(self.outDim): # next_tag_var[i] holds the viterbi variable for tag i at the # previous step, plus the score of transitioning # from tag i to next_tag. @@ -85,7 +85,7 @@ def _viterbi_decode(self, feats): backpointers.append(bptrs_t) # Transition to STOP_TAG - terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] + terminal_var = forward_var + self.transitions[self.t2i[STOP_TAG]] best_tag_id = argmax(terminal_var) path_score = terminal_var[0][best_tag_id] @@ -96,14 +96,14 @@ def _viterbi_decode(self, feats): best_path.append(best_tag_id) # Pop off the start tag (we dont want to return that to the caller) start = best_path.pop() - assert start == self.tag_to_ix[START_TAG] # Sanity check + assert start == self.t2i[START_TAG] # Sanity check best_path.reverse() return path_score, best_path def loss(self, finalStates, goldLabelStrings): - goldLabels = [self.t2i[gs] for gs in goldLabelStrings] + goldLabels = torch.tensor([self.t2i[gs] for gs in goldLabelStrings], dtype=torch.long) forward_score = self._forward_alg(finalStates) - gold_score = self._score_sentence(feats, goldLabels) + gold_score = self._score_sentence(finalStates, goldLabels) return forward_score - gold_score def saveX2i(self): @@ -121,7 +121,7 @@ def saveX2i(self): def __str__(self): return f"ViterbiForwardLayer({self.inDim}, {self.outDim})" - def inference(emissionScores): + def inference(self, emissionScores): score, labelsIds = self._viterbi_decode(emissionScores) return [self.i2t[i] for i in labelsIds] From fdaf8e48986d98448f236913a2694fb1c0c5a995 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Fri, 8 Oct 2021 17:39:20 -0700 Subject: [PATCH 021/134] fixed some bugs, changed default learning rate --- .../src/main/python/pytorch/greedyForwardLayer.py | 4 ++-- main/src/main/python/pytorch/metal.py | 8 ++++---- main/src/main/python/pytorch/utils.py | 15 ++++++++------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index e45116a67..556559cf4 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -26,11 +26,11 @@ def __str__(self): return f"GreedyForwardLayer({self.inDim}, {self.outDim})" def inference(self, emissionScores): - emissionScores = emissionScoresToArrays(states) + emissionScores = emissionScoresToArrays(emissionScores) return [self.i2t[np.argmax(es)] for es in emissionScores] def inferenceWithScores(self, emissionScores): - emissionScores = emissionScoresToArrays(states) + emissionScores = emissionScoresToArrays(emissionScores) return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores] @classmethod diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 887e31e2c..e31f464a2 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -65,7 +65,7 @@ def mkVocabularies(self): return words, labels def train(self, modelNamePrefix): - learningRate = self.taskManager.get_float("mtl.learningRate", 0.001) + learningRate = self.taskManager.get_float("mtl.learningRate", 1e-5) trainerType = self.taskManager.get_string("mtl.trainer", "adam") batchSize = self.taskManager.get_int("mtl.batchSize", 1) assert(batchSize>0) @@ -75,11 +75,11 @@ def train(self, modelNamePrefix): parameters += layers.get_parameters() if trainerType == "adam": - trainer = Adam(parameters, lr=learningRate) + trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) elif trainerType == "rmsprop": - trainer = RMSprop(parameters, lr=learningRate) + trainer = RMSprop(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) elif trainerType == "sgd": - trainer = SDG(parameters, lr=learningRate) + trainer = SDG(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) else: raise RuntimeError(f"ERROR: unknown trainer {trainerType}!") diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 26ef279cd..472eab18a 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -46,9 +46,10 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) - _, result = transduce(charEmbeddings, charRnnBuilder) + output, _ = transduce(charEmbeddings, charRnnBuilder) + result = output.squeeze(1)[-1] # Zheng: Not sure if this is the right way to concatenate the two direction hidden states - return result.view(hidden_dim*2) + return result def readString2Ids(s2iFilename): s2i = dict() @@ -78,17 +79,17 @@ def transduce(embeddings, builder): if mode == 'LSTM': if bi_direct: - (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) + (h, c) = (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) else: - (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) + (h, c) = (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) elif mode == 'GRU': if bi_direct: - h = torch.zeros(2, 1, hidden_dim) + h = torch.rand(2, 1, hidden_dim) output, result = builder(embeddings.unsqueeze(1), h) else: - h = torch.zeros(1, 1, hidden_dim) + h = torch.rand(1, 1, hidden_dim) output, result = builder(embeddings.unsqueeze(1), h) return output, result @@ -121,7 +122,7 @@ def argmax(vec): # return the argmax as a python int _, idx = torch.max(vec, 1) return idx.item() - + def log_sum_exp(vec): max_score = vec[0, argmax(vec)] max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1]) From c2b7193372944340e7fd8e8e05d62f1a09834323 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 19 Oct 2021 14:08:33 -0700 Subject: [PATCH 022/134] add features and fixed bugs fixed bugs on UNK word embedding set dropout prob to 0.1 add clipping --- .../python/embeddings/wordEmbeddingMap.py | 9 ++++---- main/src/main/python/pytorch/metal.py | 2 ++ main/src/main/python/pytorch/rnnLayer.py | 2 +- main/src/main/python/pytorch/seqScorer.py | 13 ++++++++++++ main/src/main/python/pytorch/utils.py | 21 +++++++++---------- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index f2766c9c8..f2b820402 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -24,11 +24,12 @@ def load(config): w2i[word] = i i += 1 x = np.array(list(map(float, rest))) - vector = (x /np.linalg.norm(x)) + vector = x #(x /np.linalg.norm(x)) #normalized embedding_size = vector.shape[0] - emb_dict[word] = vector - base = math.sqrt(6/embedding_size) - emb_dict[""] = np.random.uniform(-base,base,(embedding_size)) + if word == "": + emb_dict[""] = vector + else: + emb_dict[word] = vector weights = np.zeros((len(emb_dict), embedding_size)) for w, i in w2i.items(): diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index e31f464a2..609292c64 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -74,6 +74,8 @@ def train(self, modelNamePrefix): for layers in self.model: parameters += layers.get_parameters() + torch.nn.utils.clip_grad_norm_(parameters, 1e-2) + if trainerType == "adam": trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) elif trainerType == "rmsprop": diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index 3384e89ce..525f5747a 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -28,7 +28,7 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) - States, _ = transduce(inputExpressions, self.wordRnnBuilder) + States = transduce(inputExpressions, self.wordRnnBuilder) if self.useHighwayConnections: States = torch.cat([States.squeeze(1), inputExpressions], dim=1) diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py index 0855ff120..335339e05 100644 --- a/main/src/main/python/pytorch/seqScorer.py +++ b/main/src/main/python/pytorch/seqScorer.py @@ -91,6 +91,19 @@ def accuracy(self, decimals=2): return round(a, decimals) if decimals>0 else a +def round(d, decimals): + if(decimals < 0): + return d # do not round when decimals is set to a negative value + + zeros = 1 + i = 0 + while (i < decimals + 2): + zeros *= 10 + i += 1 + + v = (d * zeros) / 100 + return v + diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 472eab18a..f3e6571a3 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -14,14 +14,12 @@ START_TAG = "" STOP_TAG = "" -RANDOM_SEED = 2522620396 # used for both DyNet, and the JVM seed for shuffling data -WEIGHT_DECAY = 1e-5 +RANDOM_SEED = 2522620396 +WEIGHT_DECAY = 0.01 LOG_MIN_VALUE = -10000.0 -DEFAULT_DROPOUT_PROBABILITY = 0.0 # no dropout by default - -IS_DYNET_INITIALIZED = False +DEFAULT_DROPOUT_PROBABILITY = 0.1 # no dropout by default TYPE_VITERBI = 1 TYPE_GREEDY = 2 @@ -46,7 +44,7 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) - output, _ = transduce(charEmbeddings, charRnnBuilder) + output = transduce(charEmbeddings, charRnnBuilder) result = output.squeeze(1)[-1] # Zheng: Not sure if this is the right way to concatenate the two direction hidden states return result @@ -79,20 +77,21 @@ def transduce(embeddings, builder): if mode == 'LSTM': if bi_direct: + # change 1 to the layers we need (h, c) = (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) - output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) + output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) else: (h, c) = (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) - output, (result, c) = builder(embeddings.unsqueeze(1), (h, c)) + output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) elif mode == 'GRU': if bi_direct: h = torch.rand(2, 1, hidden_dim) - output, result = builder(embeddings.unsqueeze(1), h) + output, h = builder(embeddings.unsqueeze(1), h) else: h = torch.rand(1, 1, hidden_dim) - output, result = builder(embeddings.unsqueeze(1), h) + output, h = builder(embeddings.unsqueeze(1), h) - return output, result + return output def expressionDropout(expression, dropoutProb, doDropout): if doDropout and dropoutProb > 0: From 66e64003496870a8a7b25618ad2fa3ee127aa16e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 19 Oct 2021 14:10:17 -0700 Subject: [PATCH 023/134] Update wordEmbeddingMap.py --- main/src/main/python/embeddings/wordEmbeddingMap.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index f2b820402..7412db665 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -21,15 +21,13 @@ def load(config): else: delimiter = " " word, *rest = line.rstrip().split(delimiter) + word = "" if word == "" else word w2i[word] = i i += 1 x = np.array(list(map(float, rest))) vector = x #(x /np.linalg.norm(x)) #normalized embedding_size = vector.shape[0] - if word == "": - emb_dict[""] = vector - else: - emb_dict[word] = vector + emb_dict[word] = vector weights = np.zeros((len(emb_dict), embedding_size)) for w, i in w2i.items(): From 46bc27e9669d0ba90fe92e4afc0e7b3f68447f1e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 19 Oct 2021 14:18:53 -0700 Subject: [PATCH 024/134] Update wordEmbeddingMap.py --- main/src/main/python/embeddings/wordEmbeddingMap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/embeddings/wordEmbeddingMap.py b/main/src/main/python/embeddings/wordEmbeddingMap.py index 7412db665..7ea6328cd 100644 --- a/main/src/main/python/embeddings/wordEmbeddingMap.py +++ b/main/src/main/python/embeddings/wordEmbeddingMap.py @@ -12,8 +12,8 @@ def isOutOfVocabulary(self, word): def load(config): emb_dict = dict() - w2i = {"":0} - i = 1 + w2i = {} + i = 0 for line in open(config.get_string("glove.matrixResourceName")): if not len(line.split()) == 2: if "\t" in line: From 21d861ff4c2324ea746feca881650418726aab9b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 19 Oct 2021 22:24:54 -0700 Subject: [PATCH 025/134] Update seqScorer.py --- main/src/main/python/pytorch/seqScorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py index 335339e05..1c626566d 100644 --- a/main/src/main/python/pytorch/seqScorer.py +++ b/main/src/main/python/pytorch/seqScorer.py @@ -101,7 +101,7 @@ def round(d, decimals): zeros *= 10 i += 1 - v = (d * zeros) / 100 + v = int(d * zeros) / 100.0 return v From a1a44653ceb32ad42f5add2d521217bb80c22df8 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 20 Oct 2021 14:45:23 -0700 Subject: [PATCH 026/134] Update seqScorer.py --- main/src/main/python/pytorch/seqScorer.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/main/src/main/python/pytorch/seqScorer.py b/main/src/main/python/pytorch/seqScorer.py index 1c626566d..068815cff 100644 --- a/main/src/main/python/pytorch/seqScorer.py +++ b/main/src/main/python/pytorch/seqScorer.py @@ -68,7 +68,7 @@ def precision(self, label="*", decimals=2): prec = c/p if p!=0 else 0 - return round(prec, decimals) if decimals>0 else prec + return round(prec*100, decimals) if decimals>0 else prec def recall(self, label="*", decimals=2): c = self.map[label].correct @@ -76,7 +76,7 @@ def recall(self, label="*", decimals=2): reca = c/g if g!=0 else 0 - return round(reca, decimals) if decimals>0 else reca + return round(reca*100, decimals) if decimals>0 else reca def f1(self, label="*", decimals=2): p = self.precision(label, decimals=-1) @@ -84,25 +84,14 @@ def f1(self, label="*", decimals=2): f1 = 2.0 * p * r / (p + r) if (p!=0 and r!=0) else 0 - return round(f1, decimals) if decimals>0 else f1 + return round(f1*100, decimals) if decimals>0 else f1 def accuracy(self, decimals=2): a = self.correct / self.total - return round(a, decimals) if decimals>0 else a + return round(a*100, decimals) if decimals>0 else a -def round(d, decimals): - if(decimals < 0): - return d # do not round when decimals is set to a negative value - zeros = 1 - i = 0 - while (i < decimals + 2): - zeros *= 10 - i += 1 - - v = int(d * zeros) / 100.0 - return v From cfb54792294499f6179079661bf96eaa363b0e9a Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 21 Oct 2021 14:34:02 -0700 Subject: [PATCH 027/134] fixed the eval() bug --- main/src/main/python/pytorch/layers.py | 4 ++-- main/src/main/python/pytorch/metal.py | 17 +++++++++-------- main/src/main/python/run.py | 8 ++++++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 6bca50dfc..7447c79e3 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -127,7 +127,7 @@ def saveX2i(self): x2i['hasFinal'] = 1 x2i['finalLayer'] = self.finalLayer.saveX2i() else: - x2i['finalLayer'] = 0 + x2i['hasFinal'] = 0 return x2i @@ -182,7 +182,7 @@ def loadX2i(cls, x2i): intermediateLayers.append(il) hasFinal = x2i['hasFinal'] - finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else none + finalLayer = ForwardLayer.load(x2i['finalLayer']) if hasFinal == 1 else None return cls(initialLayer, intermediateLayers, finalLayer) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 609292c64..57ec114d2 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -74,7 +74,7 @@ def train(self, modelNamePrefix): for layers in self.model: parameters += layers.get_parameters() - torch.nn.utils.clip_grad_norm_(parameters, 1e-2) + torch.nn.utils.clip_grad_norm_(parameters, 5) if trainerType == "adam": trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) @@ -216,7 +216,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): goldLabels = asent[1] constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() - preds = Layers.predict(self.model, taskId, sentence, constEmbeddings) + preds = self.predict(taskId, sentence, constEmbeddings) sc = SeqScorer.f1(goldLabels, preds) scoreCountsByLabel.incAll(sc) @@ -257,10 +257,11 @@ def parse(self, sentence, constEmbeddings): Layers.parse(self.model, sentence, constEmbeddings) def test(self): - taskName = taskManager.tasks[taskId].taskName - testSentences = taskManager.tasks[taskId].testSentences - if testSentences: - self.evaluate(taskId, taskName, devSentences, "testing") + for taskId in range(0, self.taskManager.taskCount): + taskName = self.taskManager.tasks[taskId].taskName + testSentences = self.taskManager.tasks[taskId].devSentences + if testSentences: + self.evaluate(taskId, taskName, testSentences, "testing") def save(self, baseFilename): @@ -270,7 +271,7 @@ def save(self, baseFilename): sd, j_sd = layers.get_state_dict() x2i = layers.saveX2i() params.append({"model": sd, "x2i": x2i}) - j_params.append({"model": j_sd, "x2i": x2i}) + j_params.append({"x2i": x2i}) # torch pickle save try: @@ -290,7 +291,7 @@ def load(cls, modelFilenamePrefix): layersSeq = list() checkpoint = torch.load(modelFilenamePrefix+".torch") for param in checkpoint: - layers = loadX2i(param['x2i']) + layers = Layers.loadX2i(param['x2i']) layers.load_state_dict(param['model']) layersSeq.append(layers) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index fc4e1385a..01c37404b 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -18,11 +18,15 @@ config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) modelName = args.model_file - print (taskManager.debugTraversal()) mtl = Metal(taskManager, None) mtl.train(modelName) elif args.test: - pass + config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') + taskManager = TaskManager(config, args.seed) + modelName = args.model_file + model = Metal.load(modelName) + mtl = Metal(taskManager, model) + mtl.test() elif args.shell: pass \ No newline at end of file From 3ee92feb42d346941e6dd4402a5a3650e2de8523 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 26 Oct 2021 21:55:54 -0700 Subject: [PATCH 028/134] Controlling sources of randomness --- main/src/main/python/pytorch/metal.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 57ec114d2..3ee8e605b 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -65,16 +65,21 @@ def mkVocabularies(self): return words, labels def train(self, modelNamePrefix): + learningRate = self.taskManager.get_float("mtl.learningRate", 1e-5) trainerType = self.taskManager.get_string("mtl.trainer", "adam") batchSize = self.taskManager.get_int("mtl.batchSize", 1) + + torch.manual_seed(self.taskManager.random) + random.seed(self.taskManager.random) + assert(batchSize>0) parameters = list() for layers in self.model: parameters += layers.get_parameters() - torch.nn.utils.clip_grad_norm_(parameters, 5) + # torch.nn.utils.clip_grad_norm_(parameters, 5) if trainerType == "adam": trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) @@ -259,7 +264,7 @@ def parse(self, sentence, constEmbeddings): def test(self): for taskId in range(0, self.taskManager.taskCount): taskName = self.taskManager.tasks[taskId].taskName - testSentences = self.taskManager.tasks[taskId].devSentences + testSentences = self.taskManager.tasks[taskId].testSentences if testSentences: self.evaluate(taskId, taskName, testSentences, "testing") From a37ef474d48a8cb41ffe9f711628b1b5a8688167 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 27 Oct 2021 00:34:35 -0700 Subject: [PATCH 029/134] missed import... --- main/src/main/python/pytorch/metal.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 3ee8e605b..340664028 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -8,6 +8,7 @@ from torch.optim import SGD, Adam, RMSprop import json +import random class Metal(object): """docstring for Metal""" From 5e9434bc0fd5fa3521d090a20f6f5370e1c56c05 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 27 Oct 2021 16:44:33 -0700 Subject: [PATCH 030/134] debugged for parsing --- .../src/main/python/pytorch/embeddingLayer.py | 9 ++++----- main/src/main/python/pytorch/forwardLayer.py | 19 ++++++++++++------- main/src/main/python/pytorch/rnnLayer.py | 3 ++- main/src/main/python/pytorch/utils.py | 4 ++-- main/src/main/python/sequences/rowReaders.py | 6 +++++- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 15242ed7a..d348e3052 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -66,9 +66,8 @@ def __init__(self, w2i, # word to index posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0 neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0 distanceDim = distanceWindowSize if distanceLookupParameters else 0 - positionDim = 1 if distanceLookupParameters and useIsPredicate else 0 - predicateDim = positionEmbeddingSize if positionLookupParameters else 0 - + positionDim = 1 if distanceLookupParameters else 0 + predicateDim = positionEmbeddingSize if positionLookupParameters and useIsPredicate else 0 self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim random.seed(RANDOM_SEED) @@ -119,14 +118,14 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h # # POS tag embedding # - if tags: + if tags and self.posTagLookupParameters: posTagEmbed = self.posTagLookupParameters(torch.LongTensor([self.tag2i.get(tag, 0) for tag in tags])) else: posTagEmbed = None # # NE tag embedding # - if nes: + if nes and self.neTagLookupParameters: neTagEmbed = self.neTagLookupParameters(torch.LongTensor([self.ne2i.get(ne, 0) for ne in nes])) else: neTagEmbed = None diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index c1910a119..06c29d8ba 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -32,9 +32,9 @@ def pickSpan(self, v): # Zheng: Will spans overlap? vs = list() for span in self.spans: - e = torch.index_select(v, 1, torch.tensor([span[0], span[1]])) + e = torch.index_select(v, 1, torch.tensor(range(span[0], span[1]))) vs.append(e) - return torch.cat(vs) + return torch.cat(vs, dim=1) def forward(self, inputExpressions, doDropout, headPositionsOpt = None): if not self.isDual: @@ -65,7 +65,6 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout) else: # the head is root. we used a dedicated Parameter for root - # Zheng: Why not add root node to the input sequence at the beginning? predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout) ss = torch.cat([argExp, predExp]) l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) @@ -129,12 +128,18 @@ def initialize(config, paramPrefix, labelCounter, isDual, inputSize): raise RuntimeError(f"ERROR: unknown inference type {inferenceType}!") def spanLength(spans): - sum(end - start for start, end in spans) + return sum(end - start for start, end in spans) -def parseSpan(spanParam, inputSize): +def parseSpan(spanParam, inputSize=None): # Zheng: Why do we need inputSize here? - token1, token2 = map(int, spanParamToken.split('-')) - spans.append((token1, token2)) + spans = list() + spanParamTokens = spanParam.split(",") + for spanParamToken in spanParamTokens: + # spanTokens = spanParamToken.split('-') + # assert(len(spanTokens) == 2) + # spans.append((int(spanTokens[0]), int(spanTokens[1]))) + token1, token2 = map(int, spanParamToken.split('-')) + spans.append((token1, token2)) return spans def spanToString(spans): diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index 525f5747a..5c8681269 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -29,8 +29,9 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) States = transduce(inputExpressions, self.wordRnnBuilder) + States = States.squeeze(1) if self.useHighwayConnections: - States = torch.cat([States.squeeze(1), inputExpressions], dim=1) + States = torch.cat([States, inputExpressions], dim=1) return States diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index f3e6571a3..25457c88c 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -53,7 +53,7 @@ def readString2Ids(s2iFilename): s2i = dict() with open(s2iFilename) as f: for line in f: - if not line.startswith("#"): + if not line.startswith("# ") and line.rstrip(): k, v = line.strip().split('\t') s2i[k] = int(v) return s2i @@ -62,7 +62,7 @@ def readChar2Ids(s2iFilename): s2i = dict() with open(s2iFilename) as f: for line in f: - if not line.startswith("#") and line.rstrip(): + if not line.startswith("# ") and line.rstrip(): k, v = line.strip().split('\t') s2i[chr(int(k))] = int(v) return s2i diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py index 58a15cb71..0bd68210f 100644 --- a/main/src/main/python/sequences/rowReaders.py +++ b/main/src/main/python/sequences/rowReaders.py @@ -68,6 +68,10 @@ def parseFull(self, rows): assert(rows[0].length >= 5) numSent = (rows[0].length - 3) / 2 assert(numSent >= 1) + assert(numSent==int(numSent)) + numSent = int(numSent) + + words = list() posTags = list() @@ -85,7 +89,7 @@ def parseFull(self, rows): try: headPositions[j] += [int(row.get(self.LABEL_START_OFFSET + (j * 2) + 1))] except: - raise RuntimeError # not sure about this part + raise RuntimeError sentences = list() for i in range(numSent): From ce00bd7e3648cc7118c0eabb5e8867870e75695c Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 27 Oct 2021 20:12:33 -0700 Subject: [PATCH 031/134] fixed bugs for parsing --- main/src/main/python/pytorch/embeddingLayer.py | 10 +++++----- main/src/main/python/pytorch/forwardLayer.py | 6 +++--- main/src/main/python/sequences/rowReaders.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index d348e3052..14cca2d52 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -15,6 +15,7 @@ DEFAULT_POSITION_EMBEDDING_SIZE: int = -1 # no position embeddings by default DEFAULT_DISTANCE_WINDOW_SIZE: int = -1 DEFAULT_USE_IS_PREDICATE: int = -1 +random.seed(RANDOM_SEED) class EmbeddingLayer(InitialLayer): def __init__(self, w2i, # word to index @@ -66,10 +67,10 @@ def __init__(self, w2i, # word to index posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0 neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0 distanceDim = distanceWindowSize if distanceLookupParameters else 0 - positionDim = 1 if distanceLookupParameters else 0 - predicateDim = positionEmbeddingSize if positionLookupParameters and useIsPredicate else 0 - self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim - random.seed(RANDOM_SEED) + predicateDim = 1 if distanceLookupParameters and useIsPredicate else 0 + positionDim = positionEmbeddingSize if positionLookupParameters else 0 + self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim + def forward(self, sentence, constEmbeddings, doDropout): @@ -276,7 +277,6 @@ def initialize(cls, config, paramPrefix, wordCounter): charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) if(posTagEmbeddingSize > 0): - tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt")) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) else: diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 06c29d8ba..95cd36c5c 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -59,13 +59,13 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - argExp = expressionDropout(pickSpan(e), self.dropoutProb, doDropout) + argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) if headPosition >= 0: # there is an explicit head in the sentence - predExp = expressionDropout(pickSpan(inputExpressions[headPosition]), self.dropout, doDropout) + predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition]), self.dropout, doDropout) else: # the head is root. we used a dedicated Parameter for root - predExp = expressionDropout(pickSpan(self.pRoot), self.dropout, doDropout) + predExp = expressionDropout(self.pickSpan(self.pRoot), self.dropout, doDropout) ss = torch.cat([argExp, predExp]) l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) if nonlinearity == NONLIN_TANH: diff --git a/main/src/main/python/sequences/rowReaders.py b/main/src/main/python/sequences/rowReaders.py index 0bd68210f..434469e92 100644 --- a/main/src/main/python/sequences/rowReaders.py +++ b/main/src/main/python/sequences/rowReaders.py @@ -61,7 +61,7 @@ def parseSimpleExtended(self, rows): neLabels += [row.get(self.NE_LABEL_POSITION)] labels += [row.get(self.LABEL_START_OFFSET)] - return [(AnnotatedSentence(words), posTags, neLabels, labels)] + return [(AnnotatedSentence(words, posTags, neLabels), labels)] # Parser for the full format: word, POS tag, NE label, (label head)+ def parseFull(self, rows): From d00c087fe90307dae77bce87bda88b8f3b063ed5 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 27 Oct 2021 21:44:47 -0700 Subject: [PATCH 032/134] export model to onnx --- main/src/main/python/pytorch/pytorch2onnx.py | 29 ++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 main/src/main/python/pytorch/pytorch2onnx.py diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py new file mode 100644 index 000000000..a1b620c03 --- /dev/null +++ b/main/src/main/python/pytorch/pytorch2onnx.py @@ -0,0 +1,29 @@ +import torch + +from pytorch.metal import Metal + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--model_file', type=str, help='Filename of the model.') + args = parser.parse_args() + modelName = args.model_file + model = Metal.load(modelName) + + input_names = [ "input" ] + output_names = [ "output" ] + + #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... + for i, layers in enumerate(model): + if layers.initialLayer is not None: + #export the initial layer + dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though... + torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names) + dummy_input = layers.initialLayer(sentence, embeddings) + for j, il in enumerate(layers.intermediateLayers): + #export the intermediate layer layer + torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names, output_names=output_names) + dummy_input = il(dummy_input) + if layers.finalLayer is not None: + #export the final layer + torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names) \ No newline at end of file From ee1c6ddbfe3382868bbcb76780879258cded9dfe Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 27 Oct 2021 21:53:01 -0700 Subject: [PATCH 033/134] specified input and output names --- main/src/main/python/pytorch/pytorch2onnx.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py index a1b620c03..6b8a1a8fd 100644 --- a/main/src/main/python/pytorch/pytorch2onnx.py +++ b/main/src/main/python/pytorch/pytorch2onnx.py @@ -10,20 +10,21 @@ modelName = args.model_file model = Metal.load(modelName) - input_names = [ "input" ] - output_names = [ "output" ] - #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... for i, layers in enumerate(model): if layers.initialLayer is not None: #export the initial layer + input_names_1 = ["sentence", "const embeddings"] + output_names_1 = [ "embeddings" ] dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though... - torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names) + torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_1, output_names=output_names_1) dummy_input = layers.initialLayer(sentence, embeddings) for j, il in enumerate(layers.intermediateLayers): #export the intermediate layer layer - torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names, output_names=output_names) + input_names_2 = ["input", "dropout"] + output_names_2 = [ "output" ] + torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names_2, output_names=output_names_2) dummy_input = il(dummy_input) if layers.finalLayer is not None: #export the final layer - torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names, output_names=output_names) \ No newline at end of file + torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_2, output_names=output_names_2) \ No newline at end of file From 3f89fa76ce2cf32d5b828081846926f978eedd0c Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 28 Oct 2021 09:44:43 -0700 Subject: [PATCH 034/134] fixed bug in saving x2i --- main/src/main/python/pytorch/greedyForwardLayer.py | 2 +- main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index 556559cf4..b913c10d5 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -15,7 +15,7 @@ def saveX2i(self): x2i["inferenceType"] = TYPE_GREEDY x2i["inputSize"] = self.inputSize x2i["isDual"] = 1 if self.isDual else 0 - x2i["span"] = spanToString(span) if self.spans else "" + x2i["span"] = spanToString(self.spans) if self.spans else "" x2i["nonlinearity"] = self.nonlinearity x2i["t2i"] = self.t2i x2i["dropoutProb"] = self.dropoutProb diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 1952666b0..5aa9e6669 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -111,7 +111,7 @@ def saveX2i(self): x2i["inferenceType"] = TYPE_GREEDY x2i["inputSize"] = self.inputSize x2i["isDual"] = 1 if self.isDual else 0 - x2i["span"] = spanToString(span) if self.spans else "" + x2i["span"] = spanToString(self.spans) if self.spans else "" x2i["nonlinearity"] = self.nonlinearity x2i["t2i"] = self.t2i x2i["dropoutProb"] = self.dropoutProb From 96d7dcfa6e9b61fa09b53a95bbc816c75a9d3cd0 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 28 Oct 2021 21:03:26 -0700 Subject: [PATCH 035/134] fixed some bugs also trying the new weight decay strategy: (rule out biases and norms) --- main/src/main/python/pytorch/forwardLayer.py | 18 +++++++++--------- main/src/main/python/pytorch/layers.py | 16 ++++++++++++---- main/src/main/python/pytorch/metal.py | 8 ++++---- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 95cd36c5c..145a76d1a 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -25,21 +25,21 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d self.outDim = len(t2i) - def pickSpan(self, v): + def pickSpan(self, v, i): if self.spans is None: return v else: # Zheng: Will spans overlap? vs = list() for span in self.spans: - e = torch.index_select(v, 1, torch.tensor(range(span[0], span[1]))) + e = torch.index_select(v, i, torch.tensor(range(span[0], span[1]))) vs.append(e) - return torch.cat(vs, dim=1) + return torch.cat(vs, dim=i) def forward(self, inputExpressions, doDropout, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - argExp = expressionDropout(self.pickSpan(inputExpressions), self.dropoutProb, doDropout) + argExp = expressionDropout(self.pickSpan(inputExpressions, 1), self.dropoutProb, doDropout) emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) @@ -59,18 +59,18 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) + argExp = expressionDropout(self.pickSpan(e, 0), self.dropoutProb, doDropout) if headPosition >= 0: # there is an explicit head in the sentence - predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition]), self.dropout, doDropout) + predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition], 0), self.dropoutProb, doDropout) else: # the head is root. we used a dedicated Parameter for root - predExp = expressionDropout(self.pickSpan(self.pRoot), self.dropout, doDropout) + predExp = expressionDropout(self.pickSpan(self.pRoot, 0), self.dropoutProb, doDropout) ss = torch.cat([argExp, predExp]) l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) - if nonlinearity == NONLIN_TANH: + if self.nonlinearity == NONLIN_TANH: l1 = F.tanh(l1) - elif nonlinearity == NONLIN_RELU: + elif self.nonlinearity == NONLIN_RELU: l1 = F.relu(l1) emissionScores.append(l1) emissionScores = torch.stack(emissionScores) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 7447c79e3..ec79b7d02 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -41,12 +41,20 @@ def __str__(self): def get_parameters(self): parameters = list() if self.initialLayer is not None: - parameters += [p for p in self.initialLayer.parameters() if p.requires_grad] + parameters += [p for p in self.initialLayer.named_parameters() if p.requires_grad] for il in self.intermediateLayers: - parameters += [p for p in il.parameters() if p.requires_grad] + parameters += [p for p in il.named_parameters() if p.requires_grad] if self.finalLayer is not None: - parameters += [p for p in self.finalLayer.parameters() if p.requires_grad] - return parameters + parameters += [p for p in self.finalLayer.named_parameters() if p.requires_grad] + + no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in parameters + if not any(nd in n for nd in no_decay)], 'weight_decay': WEIGHT_DECAY}, + {'params': [p for n, p in parameters + if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} + ] + return optimizer_grouped_parameters def start_train(self): if self.initialLayer is not None: diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 340664028..07350ba30 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -80,14 +80,14 @@ def train(self, modelNamePrefix): for layers in self.model: parameters += layers.get_parameters() - # torch.nn.utils.clip_grad_norm_(parameters, 5) + torch.nn.utils.clip_grad_norm_(parameters, 0.01) if trainerType == "adam": - trainer = Adam(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) + trainer = Adam(parameters, lr=learningRate) elif trainerType == "rmsprop": - trainer = RMSprop(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) + trainer = RMSprop(parameters, lr=learningRate) elif trainerType == "sgd": - trainer = SDG(parameters, lr=learningRate, weight_decay=WEIGHT_DECAY) + trainer = SDG(parameters, lr=learningRate) else: raise RuntimeError(f"ERROR: unknown trainer {trainerType}!") From 6a37769c5ad76c547f0dc1a5c2796090a10dbd11 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 28 Oct 2021 21:38:18 -0700 Subject: [PATCH 036/134] remove clipping --- main/src/main/python/pytorch/layers.py | 10 +++++----- main/src/main/python/pytorch/metal.py | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index ec79b7d02..4acf2f2cf 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -41,18 +41,18 @@ def __str__(self): def get_parameters(self): parameters = list() if self.initialLayer is not None: - parameters += [p for p in self.initialLayer.named_parameters() if p.requires_grad] + parameters += [p for p in self.initialLayer.named_parameters()] for il in self.intermediateLayers: - parameters += [p for p in il.named_parameters() if p.requires_grad] + parameters += [p for p in il.named_parameters()] if self.finalLayer is not None: - parameters += [p for p in self.finalLayer.named_parameters() if p.requires_grad] + parameters += [p for p in self.finalLayer.named_parameters()] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in parameters - if not any(nd in n for nd in no_decay)], 'weight_decay': WEIGHT_DECAY}, + if not any(nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': WEIGHT_DECAY}, {'params': [p for n, p in parameters - if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} + if any(nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': 0.0} ] return optimizer_grouped_parameters diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 07350ba30..309b13554 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -80,8 +80,6 @@ def train(self, modelNamePrefix): for layers in self.model: parameters += layers.get_parameters() - torch.nn.utils.clip_grad_norm_(parameters, 0.01) - if trainerType == "adam": trainer = Adam(parameters, lr=learningRate) elif trainerType == "rmsprop": From e577f3c8eba652906b921de9371024a13238a6cf Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 3 Nov 2021 15:27:48 -0700 Subject: [PATCH 037/134] add scheduler --- main/src/main/python/pytorch/metal.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 309b13554..127e94207 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -6,6 +6,7 @@ from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove from torch.optim import SGD, Adam, RMSprop +from torch.optim.lr_scheduler import * import json import random @@ -89,6 +90,8 @@ def train(self, modelNamePrefix): else: raise RuntimeError(f"ERROR: unknown trainer {trainerType}!") + scheduler = ExponentialLR(trainer, gamma=0.9) + reader = MetalRowReader() cummulativeLoss = 0.0 @@ -154,6 +157,7 @@ def train(self, modelNamePrefix): trainer.step() batchLoss = 0 i = 0 + scheduler.step() # check dev performance in this epoch, for all tasks totalAcc = 0.0 From c8ec489c602cc1b9188d85b9741e570c270263bf Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 16 Nov 2021 00:49:03 -0700 Subject: [PATCH 038/134] use xavier uniform to initialize weights --- main/src/main/python/pytorch/embeddingLayer.py | 16 ++++++++++++++-- main/src/main/python/pytorch/forwardLayer.py | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 14cca2d52..502bb21bb 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -269,16 +269,19 @@ def initialize(cls, config, paramPrefix, wordCounter): w2i = {w:i for i, w in enumerate(wordList)} wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) + nn.init.xavier_uniform_(wordLookupParameters.weight) c2iFilename = config.get_string(paramPrefix + ".c2i", "org/clulab/c2i-en.txt") c2i = readChar2Ids(c2iFilename) charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) + nn.init.xavier_uniform_(charLookupParameters.weight) charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) if(posTagEmbeddingSize > 0): tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt")) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) + nn.init.xavier_uniform_(posTagLookupParameters.weight) else: tag2i = None posTagLookupParameters = None @@ -290,8 +293,17 @@ def initialize(cls, config, paramPrefix, wordCounter): ne2i = None neTagLookupParameters = None - distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) if distanceEmbeddingSize > 0 else None - positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) if positionEmbeddingSize > 0 else None + if distanceEmbeddingSize > 0: + distanceLookupParameters = nn.Embedding(distanceWindowSize * 2 + 3, distanceEmbeddingSize) + nn.init.xavier_uniform_(distanceLookupParameters.weight) + else: + distanceLookupParameters = None + + if positionEmbeddingSize > 0: + positionLookupParameters = nn.Embedding(101, positionEmbeddingSize) + nn.init.xavier_uniform_(positionLookupParameters.weight) + else: + positionLookupParameters = None return cls(w2i, wordCounter, c2i, tag2i, ne2i, learnedWordEmbeddingSize, diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 145a76d1a..2241db86a 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -1,5 +1,5 @@ import torch -import torch.nn +import torch.nn as nn from torch.autograd import Variable import torch.nn.functional as F @@ -18,6 +18,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d self.nonlinearity = nonlinearity self.pH = nn.Linear(actualInputSize, len(t2i)) + nn.init.xavier_uniform_(self.pH.weight) self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here self.dropoutProb = dropoutProb From 1a9d4bdab0276485f9ff64ac68e9af98961626b5 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 16 Nov 2021 10:57:04 -0700 Subject: [PATCH 039/134] Update metal.py --- main/src/main/python/pytorch/metal.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 127e94207..298320d08 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -265,6 +265,10 @@ def parse(self, sentence, constEmbeddings): Layers.parse(self.model, sentence, constEmbeddings) def test(self): + + torch.manual_seed(self.taskManager.random) + random.seed(self.taskManager.random) + for taskId in range(0, self.taskManager.taskCount): taskName = self.taskManager.tasks[taskId].taskName testSentences = self.taskManager.tasks[taskId].testSentences From dc4e77b84595d2323838aceb2c43ae55f973bd4b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 1 Dec 2021 20:38:12 -0700 Subject: [PATCH 040/134] convert layers to a single NN module to save it to onnx --- main/src/main/python/pytorch/pytorch2onnx.py | 56 +++++++++++++------- main/src/main/python/pytorch/utils.py | 8 +++ 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py index 6b8a1a8fd..5b749ca7a 100644 --- a/main/src/main/python/pytorch/pytorch2onnx.py +++ b/main/src/main/python/pytorch/pytorch2onnx.py @@ -1,6 +1,39 @@ import torch from pytorch.metal import Metal +from pytorch.utils import mkCharacterEmbedding2 + +class Saving_Model(nn.Module): + """docstring for Saving_Model""" + def __init__(self, model, constEmbeddings): + super().__init__() + self.model = model + for layers in model: + layers.start_eval() + self.constEmbeddings = constEmbeddings + self.initialLayers = [None for _ in range(len(model))] + for i, layers in enumerate(model): + if layers.initialLayer is not None: + self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters, + "charLookupParameters":layers.initialLayer.charLookupParameters, + "charRnnBuilder":layers.initialLayer.charRnnBuilder} + + def forward(self, word_ids, char_ids_list): + + #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... + for i, layers in enumerate(self.model): + if self.initialLayers[i]: + embeddings = constEmbeddings.emb(idxs) + learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids) + charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list]) + embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + embedParts = [ep for ep in embedParts if ep is not None] + embed = torch.cat(embedParts, dim=1) + for j, il in enumerate(layers.intermediateLayers): + dummy_input = il(dummy_input) + if layers.finalLayer is not None: + output = layers.finalLayer(dummy_input) + return output if __name__ == '__main__': @@ -10,21 +43,8 @@ modelName = args.model_file model = Metal.load(modelName) - #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... - for i, layers in enumerate(model): - if layers.initialLayer is not None: - #export the initial layer - input_names_1 = ["sentence", "const embeddings"] - output_names_1 = [ "embeddings" ] - dummy_input = (sentence, embeddings)# we need some toy sentence and embeddings here, not sure if onnx is happy with this input though... - torch.onnx.export(layers.initialLayer, dummy_input_1, "initialLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_1, output_names=output_names_1) - dummy_input = layers.initialLayer(sentence, embeddings) - for j, il in enumerate(layers.intermediateLayers): - #export the intermediate layer layer - input_names_2 = ["input", "dropout"] - output_names_2 = [ "output" ] - torch.onnx.export(il, dummy_input_2, "intermediateLayer_%d_inTask%d.onnx"%(i,j), verbose=True, input_names=input_names_2, output_names=output_names_2) - dummy_input = il(dummy_input) - if layers.finalLayer is not None: - #export the final layer - torch.onnx.export(layers.finalLayer, dummy_input, "finalLayer_inTask%d.onnx"%i, verbose=True, input_names=input_names_2, output_names=output_names_2) \ No newline at end of file + export_model = Saving_Model(model) + + torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names_2, output_names=output_names_2) + + \ No newline at end of file diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 25457c88c..b6218ba2f 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -49,6 +49,14 @@ def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): # Zheng: Not sure if this is the right way to concatenate the two direction hidden states return result +def mkCharacterEmbedding2(char_ids, charLookupParameters, charRnnBuilder): + hidden_dim = charRnnBuilder.hidden_size + charEmbeddings = charLookupParameters(char_ids) + output = transduce(charEmbeddings, charRnnBuilder) + result = output.squeeze(1)[-1] + # Zheng: Not sure if this is the right way to concatenate the two direction hidden states + return result + def readString2Ids(s2iFilename): s2i = dict() with open(s2iFilename) as f: From 86f12505c049dcbaa183d3ec8120750912738d4f Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 1 Dec 2021 21:58:02 -0700 Subject: [PATCH 041/134] get dummy input --- main/src/main/python/pytorch/pytorch2onnx.py | 46 +++++++++++++++++--- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py index 5b749ca7a..5a9cae82e 100644 --- a/main/src/main/python/pytorch/pytorch2onnx.py +++ b/main/src/main/python/pytorch/pytorch2onnx.py @@ -1,15 +1,15 @@ import torch from pytorch.metal import Metal -from pytorch.utils import mkCharacterEmbedding2 +from pytorch.utils import * +from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove +from sequences.rowReaders import * class Saving_Model(nn.Module): """docstring for Saving_Model""" def __init__(self, model, constEmbeddings): super().__init__() self.model = model - for layers in model: - layers.start_eval() self.constEmbeddings = constEmbeddings self.initialLayers = [None for _ in range(len(model))] for i, layers in enumerate(model): @@ -39,12 +39,48 @@ def forward(self, word_ids, char_ids_list): parser = argparse.ArgumentParser() parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--config', type=str, help='Filename of the configuration.') + parser.add_argument('--seed', type=int, default=1234) args = parser.parse_args() + + config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') + taskManager = TaskManager(config, args.seed) modelName = args.model_file model = Metal.load(modelName) + for layers in model: + layers.start_eval() + constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() + export_model = Saving_Model(model, constEmbeddings) + export_model.eval() + + torch.manual_seed(taskManager.random) + random.seed(taskManager.random) + + for i, layers in enumerate(model): + if layers.initialLayer is not None: + c2i = layers.initialLayer.c2i + + for taskId in range(0, taskManager.taskCount): + taskName = taskManager.tasks[taskId].taskName + testSentences = taskManager.tasks[taskId].testSentences + if testSentences: + reader = MetalRowReader() + annotatedSentences = reader.toAnnotatedSentences(testSentences[0]) + + asent = annotatedSentences[0] + sentence = asent[0] + goldLabels = asent[1] + + words = sentence.words + + word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words] + + dummy_input = [word_ids, char_ids_list] - export_model = Saving_Model(model) + input_names = [ "input1", "input2" ] + output_names = [ "output" ] - torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names_2, output_names=output_names_2) + torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names) \ No newline at end of file From 3e1fd11779b55d4c055c625424a9e32f6ee91631 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 1 Dec 2021 23:28:25 -0700 Subject: [PATCH 042/134] Create pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 90 ++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 main/src/main/python/pytorch2onnx.py diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py new file mode 100644 index 000000000..fe3426678 --- /dev/null +++ b/main/src/main/python/pytorch2onnx.py @@ -0,0 +1,90 @@ +import torch +import argparse +from pyhocon import ConfigFactory +import random + +from pytorch.taskManager import TaskManager +from pytorch.metal import Metal +from pytorch.utils import * +from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove +from sequences.rowReaders import * + +class Saving_Model(nn.Module): + """docstring for Saving_Model""" + def __init__(self, model, constEmbeddings): + super().__init__() + self.model = model + self.constEmbeddings = constEmbeddings + self.initialLayers = [None for _ in range(len(model))] + for i, layers in enumerate(model): + if layers.initialLayer is not None: + self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters, + "charLookupParameters":layers.initialLayer.charLookupParameters, + "charRnnBuilder":layers.initialLayer.charRnnBuilder} + + def forward(self, word_ids, char_ids_list): + + #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... + for i, layers in enumerate(self.model): + if self.initialLayers[i]: + embeddings = constEmbeddings.emb(idxs) + learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids) + charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list]) + embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + embedParts = [ep for ep in embedParts if ep is not None] + embed = torch.cat(embedParts, dim=1) + for j, il in enumerate(layers.intermediateLayers): + dummy_input = il(dummy_input) + if layers.finalLayer is not None: + output = layers.finalLayer(dummy_input) + return output + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--config', type=str, help='Filename of the configuration.') + parser.add_argument('--seed', type=int, default=1234) + args = parser.parse_args() + + config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') + taskManager = TaskManager(config, args.seed) + modelName = args.model_file + model = Metal.load(modelName) + for layers in model: + layers.start_eval() + constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() + export_model = Saving_Model(model, constEmbeddings) + export_model.eval() + + torch.manual_seed(taskManager.random) + random.seed(taskManager.random) + + for i, layers in enumerate(model): + if layers.initialLayer is not None: + c2i = layers.initialLayer.c2i + + for taskId in range(0, taskManager.taskCount): + taskName = taskManager.tasks[taskId].taskName + testSentences = taskManager.tasks[taskId].testSentences + if testSentences: + reader = MetalRowReader() + annotatedSentences = reader.toAnnotatedSentences(testSentences[0]) + + asent = annotatedSentences[0] + sentence = asent[0] + goldLabels = asent[1] + + words = sentence.words + + word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words] + + dummy_input = [word_ids, char_ids_list] + + input_names = [ "input1", "input2" ] + output_names = [ "output" ] + + torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names) + + From d9ed82c99d89a917d49e9adf5af7e11e42809589 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 1 Dec 2021 23:28:54 -0700 Subject: [PATCH 043/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index fe3426678..880fafd82 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -78,7 +78,7 @@ def forward(self, word_ids, char_ids_list): words = sentence.words word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words] + char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) for word in words] dummy_input = [word_ids, char_ids_list] From f29403e2ce6ab9ca805f4800447e03ef0bca5de6 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 1 Dec 2021 23:59:31 -0700 Subject: [PATCH 044/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 30 +++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 880fafd82..49bb86aac 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -22,21 +22,21 @@ def __init__(self, model, constEmbeddings): "charLookupParameters":layers.initialLayer.charLookupParameters, "charRnnBuilder":layers.initialLayer.charRnnBuilder} - def forward(self, word_ids, char_ids_list): - + def forward(self, input_list): + word_ids, char_ids_list = input_list #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... for i, layers in enumerate(self.model): if self.initialLayers[i]: - embeddings = constEmbeddings.emb(idxs) - learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids) - charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list]) + embeddings = constEmbeddings.emb(word_ids) + learnedWordEmbeddings = self.initialLayers[i]["wordLookupParameters"](word_ids) + charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i]["charLookupParameters"], self.initialLayers[i]["charRnnBuilder"]) for char_ids in char_ids_list]) embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] embedParts = [ep for ep in embedParts if ep is not None] embed = torch.cat(embedParts, dim=1) for j, il in enumerate(layers.intermediateLayers): - dummy_input = il(dummy_input) + output = il(embed, False) if layers.finalLayer is not None: - output = layers.finalLayer(dummy_input) + output = layers.finalLayer(output, False, None)#headPositions set to be None for now, we can add it in input list later return output if __name__ == '__main__': @@ -54,6 +54,7 @@ def forward(self, word_ids, char_ids_list): for layers in model: layers.start_eval() constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() + export_model = Saving_Model(model, constEmbeddings) export_model.eval() @@ -82,9 +83,20 @@ def forward(self, word_ids, char_ids_list): dummy_input = [word_ids, char_ids_list] - input_names = [ "input1", "input2" ] + output = export_model(dummy_input) + + input_names = [ "input_list"] output_names = [ "output" ] - torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names) + torch.onnx.export(export_model, # model being run + dummy_input, # model input (or a tuple for multiple inputs) + "model.onnx", # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=10, # the ONNX version to export the model to + do_constant_folding=True, # whether to execute constant folding for optimization + input_names = ['input'], # the model's input names + output_names = ['output'], # the model's output names + dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes + 'output' : {0 : 'batch_size'}}) From b36cbabaab153cbb7ba45157b7e3357526219079 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 2 Dec 2021 09:53:13 -0700 Subject: [PATCH 045/134] converted the list in the model to nnModuleList --- main/src/main/python/pytorch2onnx.py | 44 ++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 49bb86aac..ad8260449 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -9,34 +9,40 @@ from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove from sequences.rowReaders import * -class Saving_Model(nn.Module): +class Saving_Model(torch.nn.Module): """docstring for Saving_Model""" def __init__(self, model, constEmbeddings): super().__init__() - self.model = model + self.model_length = len(model) self.constEmbeddings = constEmbeddings - self.initialLayers = [None for _ in range(len(model))] + self.initialLayers = [None for _ in range(self.model_length)] + self.intermediateLayerss = [None for _ in range(self.model_length)] + self.finalLayers = [None for _ in range(self.model_length)] for i, layers in enumerate(model): if layers.initialLayer is not None: - self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters, - "charLookupParameters":layers.initialLayer.charLookupParameters, - "charRnnBuilder":layers.initialLayer.charRnnBuilder} - + self.initialLayers[i] = nn.ModuleList([layers.initialLayer.wordLookupParameters, + layers.initialLayer.charLookupParameters, + layers.initialLayer.charRnnBuilder]) + self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers) + self.finalLayers[i] = layers.finalLayer + self.initialLayers = nn.ModuleList(self.initialLayers) + self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) + self.finalLayers = nn.ModuleList(self.finalLayers) def forward(self, input_list): word_ids, char_ids_list = input_list #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... - for i, layers in enumerate(self.model): + for i in range(self.model_length): if self.initialLayers[i]: embeddings = constEmbeddings.emb(word_ids) - learnedWordEmbeddings = self.initialLayers[i]["wordLookupParameters"](word_ids) - charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i]["charLookupParameters"], self.initialLayers[i]["charRnnBuilder"]) for char_ids in char_ids_list]) + learnedWordEmbeddings = self.initialLayers[i][0](word_ids) + charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i][1], self.initialLayers[i][2]) for char_ids in char_ids_list]) embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] embedParts = [ep for ep in embedParts if ep is not None] embed = torch.cat(embedParts, dim=1) - for j, il in enumerate(layers.intermediateLayers): + for il in self.intermediateLayerss[i]: output = il(embed, False) - if layers.finalLayer is not None: - output = layers.finalLayer(output, False, None)#headPositions set to be None for now, we can add it in input list later + if self.finalLayers[i]: + output = self.finalLayers[i](output, False, None)#headPositions set to be None for now, we can add it in input list later return output if __name__ == '__main__': @@ -57,6 +63,8 @@ def forward(self, input_list): export_model = Saving_Model(model, constEmbeddings) export_model.eval() + for param in export_model.parameters(): + param.requires_grad = False torch.manual_seed(taskManager.random) random.seed(taskManager.random) @@ -78,14 +86,14 @@ def forward(self, input_list): words = sentence.words - word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) for word in words] + word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach() + char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]).detach() for word in words] dummy_input = [word_ids, char_ids_list] output = export_model(dummy_input) - input_names = [ "input_list"] + input_names = ["input_list"] output_names = [ "output" ] torch.onnx.export(export_model, # model being run @@ -94,9 +102,9 @@ def forward(self, input_list): export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['input'], # the model's input names + input_names = ['input'], # the model's input names output_names = ['output'], # the model's output names - dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes + dynamic_axes = {'input' : {0 : 'batch_size'}, # variable length axes 'output' : {0 : 'batch_size'}}) From 045c581a6db55f5c25d85ab1e3ed24d682ff6cfa Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 2 Dec 2021 09:54:34 -0700 Subject: [PATCH 046/134] remove the redundant --- main/src/main/python/pytorch/pytorch2onnx.py | 86 -------------------- 1 file changed, 86 deletions(-) delete mode 100644 main/src/main/python/pytorch/pytorch2onnx.py diff --git a/main/src/main/python/pytorch/pytorch2onnx.py b/main/src/main/python/pytorch/pytorch2onnx.py deleted file mode 100644 index 5a9cae82e..000000000 --- a/main/src/main/python/pytorch/pytorch2onnx.py +++ /dev/null @@ -1,86 +0,0 @@ -import torch - -from pytorch.metal import Metal -from pytorch.utils import * -from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove -from sequences.rowReaders import * - -class Saving_Model(nn.Module): - """docstring for Saving_Model""" - def __init__(self, model, constEmbeddings): - super().__init__() - self.model = model - self.constEmbeddings = constEmbeddings - self.initialLayers = [None for _ in range(len(model))] - for i, layers in enumerate(model): - if layers.initialLayer is not None: - self.initialLayers[i] = {"wordLookupParameters":layers.initialLayer.wordLookupParameters, - "charLookupParameters":layers.initialLayer.charLookupParameters, - "charRnnBuilder":layers.initialLayer.charRnnBuilder} - - def forward(self, word_ids, char_ids_list): - - #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... - for i, layers in enumerate(self.model): - if self.initialLayers[i]: - embeddings = constEmbeddings.emb(idxs) - learnedWordEmbeddings = self.initialLayers[i].wordLookupParameters(word_ids) - charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i].charLookupParameters, self.initialLayers[i].charRnnBuilder) for char_ids in char_ids_list]) - embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] - embedParts = [ep for ep in embedParts if ep is not None] - embed = torch.cat(embedParts, dim=1) - for j, il in enumerate(layers.intermediateLayers): - dummy_input = il(dummy_input) - if layers.finalLayer is not None: - output = layers.finalLayer(dummy_input) - return output - -if __name__ == '__main__': - - parser = argparse.ArgumentParser() - parser.add_argument('--model_file', type=str, help='Filename of the model.') - parser.add_argument('--config', type=str, help='Filename of the configuration.') - parser.add_argument('--seed', type=int, default=1234) - args = parser.parse_args() - - config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') - taskManager = TaskManager(config, args.seed) - modelName = args.model_file - model = Metal.load(modelName) - for layers in model: - layers.start_eval() - constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() - export_model = Saving_Model(model, constEmbeddings) - export_model.eval() - - torch.manual_seed(taskManager.random) - random.seed(taskManager.random) - - for i, layers in enumerate(model): - if layers.initialLayer is not None: - c2i = layers.initialLayer.c2i - - for taskId in range(0, taskManager.taskCount): - taskName = taskManager.tasks[taskId].taskName - testSentences = taskManager.tasks[taskId].testSentences - if testSentences: - reader = MetalRowReader() - annotatedSentences = reader.toAnnotatedSentences(testSentences[0]) - - asent = annotatedSentences[0] - sentence = asent[0] - goldLabels = asent[1] - - words = sentence.words - - word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in w]) for word in words] - - dummy_input = [word_ids, char_ids_list] - - input_names = [ "input1", "input2" ] - output_names = [ "output" ] - - torch.onnx.export(export_model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names) - - \ No newline at end of file From de010c3bfb39215c9c40000e0ce757d8b395664c Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 6 Dec 2021 12:32:58 -0700 Subject: [PATCH 047/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index ad8260449..d73bb19e9 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -9,6 +9,9 @@ from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove from sequences.rowReaders import * +import onnx +import onnxruntime + class Saving_Model(torch.nn.Module): """docstring for Saving_Model""" def __init__(self, model, constEmbeddings): @@ -107,4 +110,21 @@ def forward(self, input_list): dynamic_axes = {'input' : {0 : 'batch_size'}, # variable length axes 'output' : {0 : 'batch_size'}}) + onnx_model = onnx.load("model.onnx") + onnx.checker.check_model(onnx_model) + + ort_session = onnxruntime.InferenceSession("model.onnx") + + def to_numpy(tensor): + return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() + + # compute ONNX Runtime output prediction + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} + ort_outs = ort_session.run(None, ort_inputs) + + # compare ONNX Runtime and PyTorch results + np.testing.assert_allclose(to_numpy(output), ort_outs[0], rtol=1e-03, atol=1e-05) + + print("Exported model has been tested with ONNXRuntime, and the result looks good!") + From 78c2f2743016b61aea5bda25c29982f6f9d10b28 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 6 Dec 2021 14:20:52 -0700 Subject: [PATCH 048/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index d73bb19e9..122555729 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -119,7 +119,7 @@ def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() # compute ONNX Runtime output prediction - ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} + ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and PyTorch results From 2347df5a11cde43623855767bf494048a8a15b13 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 6 Dec 2021 15:28:40 -0700 Subject: [PATCH 049/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 122555729..fa47c190b 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -31,8 +31,7 @@ def __init__(self, model, constEmbeddings): self.initialLayers = nn.ModuleList(self.initialLayers) self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) self.finalLayers = nn.ModuleList(self.finalLayers) - def forward(self, input_list): - word_ids, char_ids_list = input_list + def forward(self, word_ids, char_ids_list): #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... for i in range(self.model_length): if self.initialLayers[i]: @@ -90,14 +89,11 @@ def forward(self, input_list): words = sentence.words word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach() - char_ids_list = [torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]).detach() for word in words] + char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]).detach() - dummy_input = [word_ids, char_ids_list] + dummy_input = (word_ids, char_ids_list) - output = export_model(dummy_input) - - input_names = ["input_list"] - output_names = [ "output" ] + output = export_model(word_ids, char_ids_list) torch.onnx.export(export_model, # model being run dummy_input, # model input (or a tuple for multiple inputs) @@ -105,7 +101,7 @@ def forward(self, input_list): export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['input'], # the model's input names + input_names = ['words', 'chars'], # the model's input names output_names = ['output'], # the model's output names dynamic_axes = {'input' : {0 : 'batch_size'}, # variable length axes 'output' : {0 : 'batch_size'}}) @@ -116,10 +112,11 @@ def forward(self, input_list): ort_session = onnxruntime.InferenceSession("model.onnx") def to_numpy(tensor): - return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() + return tensor.cpu().numpy() # compute ONNX Runtime output prediction - ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_input)} + print ([i.name for i in ort_session.get_inputs()]) + ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and PyTorch results From 70a9370245b57e4c69c244e06734566c756d857f Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Mon, 6 Dec 2021 19:20:43 -0700 Subject: [PATCH 050/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index fa47c190b..0e3a1f08d 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -88,8 +88,8 @@ def forward(self, word_ids, char_ids_list): words = sentence.words - word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]).detach() - char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]).detach() + word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]) dummy_input = (word_ids, char_ids_list) @@ -112,7 +112,7 @@ def forward(self, word_ids, char_ids_list): ort_session = onnxruntime.InferenceSession("model.onnx") def to_numpy(tensor): - return tensor.cpu().numpy() + return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() # compute ONNX Runtime output prediction print ([i.name for i in ort_session.get_inputs()]) From d22925996409c2e049011dcb97fccb2c029f6c43 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 9 Dec 2021 00:47:24 -0700 Subject: [PATCH 051/134] test the onnx model I have to save the character RNN in separate model, since the sentence length and word length are all varied and onnx is not happy with these kind of input... --- main/src/main/python/pytorch2onnx.py | 114 ++++++++++++++++++--------- main/src/main/python/test_onnx.py | 81 +++++++++++++++++++ 2 files changed, 157 insertions(+), 38 deletions(-) create mode 100644 main/src/main/python/test_onnx.py diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 0e3a1f08d..5fc059a31 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -12,40 +12,52 @@ import onnx import onnxruntime +import json + +def to_numpy(tensor): + return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() + +class Char_RNN(torch.nn.Module): + + def __init__(self, model): + super().__init__() + for i, layers in enumerate(model): + if layers.initialLayer is not None: + self.char_lookup = layers.initialLayer.charLookupParameters + self.char_rnn = layers.initialLayer.charRnnBuilder + + def forward(self, char_ids): + charEmbedding = mkCharacterEmbedding2(char_ids, self.char_lookup, self.char_rnn) + return charEmbedding + class Saving_Model(torch.nn.Module): """docstring for Saving_Model""" def __init__(self, model, constEmbeddings): super().__init__() self.model_length = len(model) self.constEmbeddings = constEmbeddings - self.initialLayers = [None for _ in range(self.model_length)] self.intermediateLayerss = [None for _ in range(self.model_length)] self.finalLayers = [None for _ in range(self.model_length)] for i, layers in enumerate(model): if layers.initialLayer is not None: - self.initialLayers[i] = nn.ModuleList([layers.initialLayer.wordLookupParameters, - layers.initialLayer.charLookupParameters, - layers.initialLayer.charRnnBuilder]) + self.word_lookup = layers.initialLayer.wordLookupParameters self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers) self.finalLayers[i] = layers.finalLayer - self.initialLayers = nn.ModuleList(self.initialLayers) self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) self.finalLayers = nn.ModuleList(self.finalLayers) - def forward(self, word_ids, char_ids_list): - #In current setting, each layer is a nn.Moudle and we need to export each of them. This is not very elegant... + def forward(self, embed_ids, word_ids, charEmbedding): + # Can I assuem there is only one initial layer? + embeddings = constEmbeddings.emb(embed_ids) + learnedWordEmbeddings = self.word_lookup(word_ids) + embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + embedParts = [ep for ep in embedParts if ep is not None] + state = torch.cat(embedParts, dim=1) for i in range(self.model_length): - if self.initialLayers[i]: - embeddings = constEmbeddings.emb(word_ids) - learnedWordEmbeddings = self.initialLayers[i][0](word_ids) - charEmbedding = torch.stack([mkCharacterEmbedding2(char_ids, self.initialLayers[i][1], self.initialLayers[i][2]) for char_ids in char_ids_list]) - embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] - embedParts = [ep for ep in embedParts if ep is not None] - embed = torch.cat(embedParts, dim=1) for il in self.intermediateLayerss[i]: - output = il(embed, False) + state = il(state, False) if self.finalLayers[i]: - output = self.finalLayers[i](output, False, None)#headPositions set to be None for now, we can add it in input list later - return output + state = self.finalLayers[i](state, False, None)#headPositions set to be None for now, we can add it in input list later + return state if __name__ == '__main__': @@ -63,37 +75,55 @@ def forward(self, word_ids, char_ids_list): layers.start_eval() constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() + export_char = Char_RNN(model) export_model = Saving_Model(model, constEmbeddings) export_model.eval() + export_char.eval() for param in export_model.parameters(): param.requires_grad = False + for param in export_char.parameters(): + param.requires_grad = False torch.manual_seed(taskManager.random) random.seed(taskManager.random) - for i, layers in enumerate(model): - if layers.initialLayer is not None: - c2i = layers.initialLayer.c2i + x2i = json.load(open(args.model_file+".json")) + + c2i = x2i[0]['x2i']['initialLayer']['c2i'] + w2i = x2i[0]['x2i']['initialLayer']['w2i'] for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName testSentences = taskManager.tasks[taskId].testSentences if testSentences: reader = MetalRowReader() - annotatedSentences = reader.toAnnotatedSentences(testSentences[0]) + annotatedSentences = reader.toAnnotatedSentences(testSentences[1]) asent = annotatedSentences[0] sentence = asent[0] goldLabels = asent[1] words = sentence.words - - word_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - char_ids_list = torch.LongTensor([[c2i.get(c, UNK_EMBEDDING) for c in word] for word in words]) - - dummy_input = (word_ids, char_ids_list) - - output = export_model(word_ids, char_ids_list) + char_embs = [] + for word in words: + char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) + char_out = export_char(char_ids) + char_embs.append(char_out) + char_embs = torch.stack(char_embs) + embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) + output = export_model(embed_ids, word_ids, char_embs) + + dummy_input = (embed_ids, word_ids, char_embs) + + torch.onnx.export(export_char, + char_ids, + "char.onnx", + export_params=True, + do_constant_folding=True, + input_names = ['char_ids'], + output_names = ['chars'], + dynamic_axes = {"char_ids": {0: 'word length'}}) torch.onnx.export(export_model, # model being run dummy_input, # model input (or a tuple for multiple inputs) @@ -101,26 +131,34 @@ def forward(self, word_ids, char_ids_list): export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['words', 'chars'], # the model's input names + input_names = ['embed', 'words', 'chars'], # the model's input names output_names = ['output'], # the model's output names - dynamic_axes = {'input' : {0 : 'batch_size'}, # variable length axes - 'output' : {0 : 'batch_size'}}) + dynamic_axes = {'embed' : {0 : 'sentence length'}, + 'words' : {0 : 'sentence length'}, + 'chars' : {0 : 'sentence length'}}) onnx_model = onnx.load("model.onnx") onnx.checker.check_model(onnx_model) + char_model = onnx.load("char.onnx") + onnx.checker.check_model(char_model) ort_session = onnxruntime.InferenceSession("model.onnx") + ort_char = onnxruntime.InferenceSession("char.onnx") + # compute ONNX Runtime output prediction - def to_numpy(tensor): - return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() + ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])} + ort_outs = ort_char.run(None, ort_inputs) + try: + np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05) + except AssertionError as e: + print (e) - # compute ONNX Runtime output prediction - print ([i.name for i in ort_session.get_inputs()]) ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) - - # compare ONNX Runtime and PyTorch results - np.testing.assert_allclose(to_numpy(output), ort_outs[0], rtol=1e-03, atol=1e-05) + try: + np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) + except AssertionError as e: + print (e) print("Exported model has been tested with ONNXRuntime, and the result looks good!") diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py new file mode 100644 index 000000000..670278c91 --- /dev/null +++ b/main/src/main/python/test_onnx.py @@ -0,0 +1,81 @@ +from pytorch2onnx import * +import json +import numpy as np +from pytorch.seqScorer import * + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--config', type=str, help='Filename of the configuration.') + parser.add_argument('--seed', type=int, default=1234) + args = parser.parse_args() + + config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') + taskManager = TaskManager(config, args.seed) + constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() + + x2i = json.load(open(args.model_file+".json")) + + c2i = x2i[0]['x2i']['initialLayer']['c2i'] + w2i = x2i[0]['x2i']['initialLayer']['w2i'] + t2i = x2i[1]['x2i']['finalLayer']["t2i"] + i2t = {i:t for t, i in t2i.items()} + + torch.manual_seed(taskManager.random) + random.seed(taskManager.random) + + onnx_model = onnx.load("model.onnx") + onnx.checker.check_model(onnx_model) + char_model = onnx.load("char.onnx") + onnx.checker.check_model(char_model) + + ort_session = onnxruntime.InferenceSession("model.onnx") + ort_char = onnxruntime.InferenceSession("char.onnx") + + scoreCountsByLabel = ScoreCountsByLabel() + + for taskId in range(0, taskManager.taskCount): + taskName = taskManager.tasks[taskId].taskName + sentences = taskManager.tasks[taskId].testSentences + if sentences: + reader = MetalRowReader() + for sent in sentences: + annotatedSentences = reader.toAnnotatedSentences(sent) + + for asent in annotatedSentences: + sentence = asent[0] + goldLabels = asent[1] + + words = sentence.words + + char_embs = [] + for word in words: + char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word]) + ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])} + ort_outs = ort_char.run(None, ort_inputs) + char_embs.append(ort_outs[0]) + char_embs = np.stack(char_embs) + embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) + + dummy_input = (embed_ids, word_ids, char_embs) + + ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} + ort_outs = ort_session.run(None, ort_inputs) + + emissionScores = ort_outs[0] + preds = [i2t[np.argmax(es)] for es in emissionScores] + + sc = SeqScorer.f1(goldLabels, preds) + scoreCountsByLabel.incAll(sc) + + + print (f"Accuracy : {scoreCountsByLabel.accuracy()}") + print (f"Precision : {scoreCountsByLabel.precision()}") + print (f"Recall on : {scoreCountsByLabel.recall()}") + print (f"Micro F1 : {scoreCountsByLabel.f1()}") + for label in scoreCountsByLabel.labels(): + print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + + \ No newline at end of file From 5f328f0eb0cb178ac4ffed28e4dd52ed1fe69d89 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Tue, 14 Dec 2021 23:35:27 -0700 Subject: [PATCH 052/134] Create test_onnx.scala --- main/src/main/scala/org/clulab/dynet/test_onnx.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 main/src/main/scala/org/clulab/dynet/test_onnx.scala diff --git a/main/src/main/scala/org/clulab/dynet/test_onnx.scala b/main/src/main/scala/org/clulab/dynet/test_onnx.scala new file mode 100644 index 000000000..3a337f4b7 --- /dev/null +++ b/main/src/main/scala/org/clulab/dynet/test_onnx.scala @@ -0,0 +1,12 @@ +package org.clulab.processors.clu + +import org.clulab.dynet.ConstEmbeddingsGlove +import org.clulab.embeddings.WordEmbeddingMapPool + +object GetWordEmbeddings extends App { + val constEmbeddingsGlove = ConstEmbeddingsGlove // Make sure that the embeddings have been loaded. + val wordEmbeddingMap = WordEmbeddingMapPool.get("glove.840B.300d.10f", compact = true).get + val embedding = wordEmbeddingMap.get("this").get + + println(embedding.mkString(" ")) +} \ No newline at end of file From 424e67a706b5f6bf4a45ea41a00df5b8e132ee91 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Dec 2021 02:26:52 -0700 Subject: [PATCH 053/134] Delete test_onnx.scala --- main/src/main/scala/org/clulab/dynet/test_onnx.scala | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 main/src/main/scala/org/clulab/dynet/test_onnx.scala diff --git a/main/src/main/scala/org/clulab/dynet/test_onnx.scala b/main/src/main/scala/org/clulab/dynet/test_onnx.scala deleted file mode 100644 index 3a337f4b7..000000000 --- a/main/src/main/scala/org/clulab/dynet/test_onnx.scala +++ /dev/null @@ -1,12 +0,0 @@ -package org.clulab.processors.clu - -import org.clulab.dynet.ConstEmbeddingsGlove -import org.clulab.embeddings.WordEmbeddingMapPool - -object GetWordEmbeddings extends App { - val constEmbeddingsGlove = ConstEmbeddingsGlove // Make sure that the embeddings have been loaded. - val wordEmbeddingMap = WordEmbeddingMapPool.get("glove.840B.300d.10f", compact = true).get - val embedding = wordEmbeddingMap.get("this").get - - println(embedding.mkString(" ")) -} \ No newline at end of file From 80aae2d507ecfa5a0887aaeda6bca3375bfec244 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Dec 2021 16:03:55 -0700 Subject: [PATCH 054/134] change the onnx model to fit scala code --- main/src/main/python/pytorch2onnx.py | 13 ++++++------- main/src/main/python/test_onnx.py | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 5fc059a31..ec30bfe6c 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -32,10 +32,9 @@ def forward(self, char_ids): class Saving_Model(torch.nn.Module): """docstring for Saving_Model""" - def __init__(self, model, constEmbeddings): + def __init__(self, model): super().__init__() self.model_length = len(model) - self.constEmbeddings = constEmbeddings self.intermediateLayerss = [None for _ in range(self.model_length)] self.finalLayers = [None for _ in range(self.model_length)] for i, layers in enumerate(model): @@ -45,9 +44,8 @@ def __init__(self, model, constEmbeddings): self.finalLayers[i] = layers.finalLayer self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) self.finalLayers = nn.ModuleList(self.finalLayers) - def forward(self, embed_ids, word_ids, charEmbedding): + def forward(self, embeddings, word_ids, charEmbedding): # Can I assuem there is only one initial layer? - embeddings = constEmbeddings.emb(embed_ids) learnedWordEmbeddings = self.word_lookup(word_ids) embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] embedParts = [ep for ep in embedParts if ep is not None] @@ -76,7 +74,7 @@ def forward(self, embed_ids, word_ids, charEmbedding): constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() export_char = Char_RNN(model) - export_model = Saving_Model(model, constEmbeddings) + export_model = Saving_Model(model) export_model.eval() export_char.eval() for param in export_model.parameters(): @@ -111,10 +109,11 @@ def forward(self, embed_ids, word_ids, charEmbedding): char_embs.append(char_out) char_embs = torch.stack(char_embs) embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - output = export_model(embed_ids, word_ids, char_embs) + output = export_model(embeddings, word_ids, char_embs) - dummy_input = (embed_ids, word_ids, char_embs) + dummy_input = (embeddings, word_ids, char_embs) torch.onnx.export(export_char, char_ids, diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index 670278c91..da191f7b3 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -57,9 +57,10 @@ char_embs.append(ort_outs[0]) char_embs = np.stack(char_embs) embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids) word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) - dummy_input = (embed_ids, word_ids, char_embs) + dummy_input = (embeddings, word_ids, char_embs) ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) From 3c902277539010d69a2bb55f1e0d461cfe4618fb Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Dec 2021 16:28:44 -0700 Subject: [PATCH 055/134] Update test_onnx.py --- main/src/main/python/test_onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index da191f7b3..5c0231992 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -56,8 +56,8 @@ ort_outs = ort_char.run(None, ort_inputs) char_embs.append(ort_outs[0]) char_embs = np.stack(char_embs) - embed_ids = np.array([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - embeddings = constEmbeddings.emb(embed_ids) + embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy() word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) dummy_input = (embeddings, word_ids, char_embs) From 90079833e454a2987658c1b52bbb13066d263f76 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Dec 2021 22:00:57 -0700 Subject: [PATCH 056/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index ec30bfe6c..f3d3b58c3 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -134,7 +134,8 @@ def forward(self, embeddings, word_ids, charEmbedding): output_names = ['output'], # the model's output names dynamic_axes = {'embed' : {0 : 'sentence length'}, 'words' : {0 : 'sentence length'}, - 'chars' : {0 : 'sentence length'}}) + 'chars' : {0 : 'sentence length'}, + 'output': {0 : 'sentence length'}}) onnx_model = onnx.load("model.onnx") onnx.checker.check_model(onnx_model) From 82ba7006a9eaeca1f94ca9b421734999eb7952ae Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 15 Dec 2021 23:27:41 -0700 Subject: [PATCH 057/134] Update test_onnx.py --- main/src/main/python/test_onnx.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index 5c0231992..dc5d7ae1e 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -2,6 +2,7 @@ import json import numpy as np from pytorch.seqScorer import * +import time if __name__ == '__main__': @@ -34,7 +35,7 @@ ort_char = onnxruntime.InferenceSession("char.onnx") scoreCountsByLabel = ScoreCountsByLabel() - + start_time = time.time() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName sentences = taskManager.tasks[taskId].testSentences @@ -78,5 +79,6 @@ print (f"Micro F1 : {scoreCountsByLabel.f1()}") for label in scoreCountsByLabel.labels(): print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") - + duration = time.time() - start_time + print (duration) \ No newline at end of file From 46fb8ad0b3aeefa1e46407d6afb49becbdf8a990 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 14:12:39 -0700 Subject: [PATCH 058/134] set random seed for onnx --- main/src/main/python/test_onnx.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index dc5d7ae1e..1f2082ad4 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -3,9 +3,12 @@ import numpy as np from pytorch.seqScorer import * import time +import random if __name__ == '__main__': + random.seed(100) + parser = argparse.ArgumentParser() parser.add_argument('--model_file', type=str, help='Filename of the model.') parser.add_argument('--config', type=str, help='Filename of the configuration.') From a734a8924d0034652313490c866161db4e8a532d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 14:15:17 -0700 Subject: [PATCH 059/134] Update test_onnx.py --- main/src/main/python/test_onnx.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index 1f2082ad4..dc5d7ae1e 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -3,12 +3,9 @@ import numpy as np from pytorch.seqScorer import * import time -import random if __name__ == '__main__': - random.seed(100) - parser = argparse.ArgumentParser() parser.add_argument('--model_file', type=str, help='Filename of the model.') parser.add_argument('--config', type=str, help='Filename of the configuration.') From c38dc95da7074570dabc3026a17f63c6409dbe7d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 14:23:52 -0700 Subject: [PATCH 060/134] paths to data and embeddings --- main/src/main/resources/org/clulab/glove.conf | 2 +- main/src/main/resources/org/clulab/mtl-en-ner.conf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/main/src/main/resources/org/clulab/glove.conf b/main/src/main/resources/org/clulab/glove.conf index 22f1e4b36..6b8bd08b0 100644 --- a/main/src/main/resources/org/clulab/glove.conf +++ b/main/src/main/resources/org/clulab/glove.conf @@ -1,5 +1,5 @@ glove { - matrixResourceName = "/org/clulab/glove/glove.840B.300d.10f" + matrixResourceName = "glove.840B.300d.10f.txt" isResource = true } \ No newline at end of file diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf index 6cd5eecc2..9c07bdd75 100644 --- a/main/src/main/resources/org/clulab/mtl-en-ner.conf +++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf @@ -20,9 +20,9 @@ mtl { task1 { name = "En NER" - train = "dynet/en/ner/train.txt" - dev = "dynet/en/ner/dev.txt" - test = "dynet/en/ner/test.txt" + train = "ner/train.txt" + dev = "ner/dev.txt" + test = "ner/test.txt" layers { final { From 753c20e3c5a7fc6ef2a057e5462ca62fe0dd53f1 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 14:34:35 -0700 Subject: [PATCH 061/134] debug the randomness --- main/src/main/python/pytorch/metal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 298320d08..175ee040e 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -266,7 +266,7 @@ def parse(self, sentence, constEmbeddings): def test(self): - torch.manual_seed(self.taskManager.random) + # torch.manual_seed(self.taskManager.random) random.seed(self.taskManager.random) for taskId in range(0, self.taskManager.taskCount): From dd6517b3e3bacf24baa1504d36085d3660635f87 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 14:39:38 -0700 Subject: [PATCH 062/134] debug randomness --- main/src/main/python/pytorch/metal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 175ee040e..588d40272 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings): def test(self): - # torch.manual_seed(self.taskManager.random) - random.seed(self.taskManager.random) + torch.manual_seed(self.taskManager.random) + # random.seed(self.taskManager.random) for taskId in range(0, self.taskManager.taskCount): taskName = self.taskManager.tasks[taskId].taskName From fea59e0c7f8ecdbc6c872a1668d1bf59abc331a0 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 15:25:46 -0700 Subject: [PATCH 063/134] Update metal.py --- main/src/main/python/pytorch/metal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 588d40272..298320d08 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -267,7 +267,7 @@ def parse(self, sentence, constEmbeddings): def test(self): torch.manual_seed(self.taskManager.random) - # random.seed(self.taskManager.random) + random.seed(self.taskManager.random) for taskId in range(0, self.taskManager.taskCount): taskName = self.taskManager.tasks[taskId].taskName From 6d8446ae46281fb64a44c788eda9d2cc1430966f Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 15:30:31 -0700 Subject: [PATCH 064/134] debug randomness --- main/src/main/python/pytorch/metal.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 298320d08..c605c42c9 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -243,18 +243,18 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): return ( scoreCountsByLabel.accuracy(), scoreCountsByLabel.precision(), scoreCountsByLabel.recall(), scoreCountsByLabel.f1() ) def predictJointly(self, sentence, constEmbeddings): - for layers in self.model: - layers.start_eval() + # for layers in self.model: + # layers.start_eval() return Layers.predictJointly(self.model, sentence, constEmbeddings) def predict(self, taskId, sentence, constEmbeddings): - for layers in self.model: - layers.start_eval() + # for layers in self.model: + # layers.start_eval() return Layers.predict(self.model, taskId, sentence, constEmbeddings) def predictWithScores(self, taskId, sentence, constEmbeddings): - for layers in self.model: - layers.start_eval() + # for layers in self.model: + # layers.start_eval() return Layers.predictWithScores(self.model, taskId, sentence, constEmbeddings) # Custom method for the parsing algorithm @@ -268,7 +268,8 @@ def test(self): torch.manual_seed(self.taskManager.random) random.seed(self.taskManager.random) - + for layers in self.model: + layers.start_eval() for taskId in range(0, self.taskManager.taskCount): taskName = self.taskManager.tasks[taskId].taskName testSentences = self.taskManager.tasks[taskId].testSentences From a4a1db634e51979774e70f9f674afeee5869e1dd Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 15:30:48 -0700 Subject: [PATCH 065/134] Update metal.py --- main/src/main/python/pytorch/metal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index c605c42c9..312d762e8 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings): def test(self): - torch.manual_seed(self.taskManager.random) - random.seed(self.taskManager.random) + # torch.manual_seed(self.taskManager.random) + # random.seed(self.taskManager.random) for layers in self.model: layers.start_eval() for taskId in range(0, self.taskManager.taskCount): From 4889201dec3bb5763139cc090431e0f61b7ae261 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 15:44:45 -0700 Subject: [PATCH 066/134] move dropout inside model --- main/src/main/python/pytorch/forwardLayer.py | 20 ++++++++++---------- main/src/main/python/pytorch/utils.py | 7 ------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 2241db86a..d1bebbdab 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -20,7 +20,7 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d self.pH = nn.Linear(actualInputSize, len(t2i)) nn.init.xavier_uniform_(self.pH.weight) self.pRoot = Variable(torch.rand(inputSize)) #TODO: Not sure about the shape here - self.dropoutProb = dropoutProb + self.dropout = nn.Dropout(dropoutProb) self.inDim = spanLength(spans) if spans is not None else inputSize self.outDim = len(t2i) @@ -37,18 +37,18 @@ def pickSpan(self, v, i): vs.append(e) return torch.cat(vs, dim=i) - def forward(self, inputExpressions, doDropout, headPositionsOpt = None): + def forward(self, inputExpressions, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - argExp = expressionDropout(self.pickSpan(inputExpressions, 1), self.dropoutProb, doDropout) - emissionScores = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) + argExp = self.dropout(self.pickSpan(inputExpressions, 1)) + emissionScores = self.dropout(self.pH(argExp)) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: emissionScores = F.relu(emissionScores) # for i, e in enumerate(inputExpressions): - # argExp = expressionDropout(self.pickSpan(e), self.dropoutProb, doDropout) - # l1 = expressionDropout(self.pH(argExp), self.dropoutProb, doDropout) + # argExp = self.dropout(self.pickSpan(e)) + # l1 = self.dropout(self.pH(argExp)) # if self.nonlinearity == NONLIN_TANH: # l1 = F.tanh(l1) # elif self.nonlinearity == NONLIN_RELU: @@ -60,15 +60,15 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - argExp = expressionDropout(self.pickSpan(e, 0), self.dropoutProb, doDropout) + argExp = self.dropout(self.pickSpan(e, 0)) if headPosition >= 0: # there is an explicit head in the sentence - predExp = expressionDropout(self.pickSpan(inputExpressions[headPosition], 0), self.dropoutProb, doDropout) + predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) else: # the head is root. we used a dedicated Parameter for root - predExp = expressionDropout(self.pickSpan(self.pRoot, 0), self.dropoutProb, doDropout) + predExp = self.dropout(self.pickSpan(self.pRoot, 0)) ss = torch.cat([argExp, predExp]) - l1 = expressionDropout(self.pH(ss), self.dropoutProb, doDropout) + l1 = self.dropout(self.pH(ss)) if self.nonlinearity == NONLIN_TANH: l1 = F.tanh(l1) elif self.nonlinearity == NONLIN_RELU: diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index b6218ba2f..d996deb02 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -101,13 +101,6 @@ def transduce(embeddings, builder): return output -def expressionDropout(expression, dropoutProb, doDropout): - if doDropout and dropoutProb > 0: - dropout = nn.Dropout(dropoutProb) - return dropout(expression) - else: - return expression - def sentenceLossGreedy(emissionScoresForSeq, golds): assert(emissionScoresForSeq.size(0) == len(golds)) criterion = nn.CrossEntropyLoss() From 31810313e6a21cdcdc6fb652d8987fed266be715 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 15:49:58 -0700 Subject: [PATCH 067/134] Update layers.py --- main/src/main/python/pytorch/layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 4acf2f2cf..cd1616e56 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, doDropout, sentence.headPositions) + states = self.finalLayer(states, sentence.headPositions) return states @@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, doDropout, headPositions) + states = self.finalLayer(states, headPositions) return states From 593bc7f7390113b431d20695d0cb8a94b0210a87 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:08:44 -0700 Subject: [PATCH 068/134] move RNNs inside model... --- main/src/main/python/pytorch/embeddingLayer.py | 6 +++--- main/src/main/python/pytorch/rnnLayer.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 502bb21bb..85dc6ffb0 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -57,7 +57,7 @@ def __init__(self, w2i, # word to index self.useIsPredicate = useIsPredicate self.wordLookupParameters = wordLookupParameters self.charLookupParameters = charLookupParameters - self.charRnnBuilder = charRnnBuilder + self.charRnnBuilder = nn.LSTM(*charRnnBuilder) self.posTagLookupParameters = posTagLookupParameters self.neTagLookupParameters = neTagLookupParameters self.distanceLookupParameters = distanceLookupParameters @@ -222,7 +222,7 @@ def load(cls, x2i): wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) - charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None @@ -276,7 +276,7 @@ def initialize(cls, config, paramPrefix, wordCounter): charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) nn.init.xavier_uniform_(charLookupParameters.weight) - charRnnBuilder = nn.LSTM(charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) if(posTagEmbeddingSize > 0): tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt")) diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index 5c8681269..f83fb2420 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -18,7 +18,7 @@ def __init__(self, self.rnnStateSize = rnnStateSize self.useHighwayConnections = useHighwayConnections self.rnnType = rnnType - self.wordRnnBuilder = wordRnnBuilder + self.wordRnnBuilder = mkBuilder(*wordRnnBuilder) self.dropoutProb = dropoutProb highwaySize = inputSize if useHighwayConnections else 0 @@ -57,7 +57,7 @@ def load(cls, x2i): useHighwayConnections = x2i['useHighwayConnections'] == 1 dropoutProb = x2i['dropoutProb'] - builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) + builder = (rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) @@ -73,7 +73,7 @@ def initialize(cls, config, paramPrefix, inputSize): rnnType = config.get_string(paramPrefix + ".type", "lstm") dropoutProb = config.get_float(paramPrefix + ".dropoutProb", DEFAULT_DROPOUT_PROBABILITY) - builder = mkBuilder(rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) + builder = (rnnType, numLayers, inputSize, rnnStateSize, dropoutProb) return cls(inputSize, numLayers, rnnStateSize, useHighwayConnections, rnnType, builder, dropoutProb) From df5cc6ab5249894a3055f7b016e38193b545f908 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:14:23 -0700 Subject: [PATCH 069/134] Update embeddingLayer.py --- main/src/main/python/pytorch/embeddingLayer.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 85dc6ffb0..821147075 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -57,7 +57,7 @@ def __init__(self, w2i, # word to index self.useIsPredicate = useIsPredicate self.wordLookupParameters = wordLookupParameters self.charLookupParameters = charLookupParameters - self.charRnnBuilder = nn.LSTM(*charRnnBuilder) + self.charRnnBuilder = mkBuilder(*charRnnBuilder) self.posTagLookupParameters = posTagLookupParameters self.neTagLookupParameters = neTagLookupParameters self.distanceLookupParameters = distanceLookupParameters @@ -222,7 +222,7 @@ def load(cls, x2i): wordLookupParameters = nn.Embedding(len(w2i), learnedWordEmbeddingSize) charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) - charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, True, dropoutProb) posTagLookupParameters = nn.Embedding(len(tag2i), posTagEmbeddingSize) if x2i['hasTag2i'] == 1 else None neTagLookupParameters = nn.Embedding(len(ne2i), neTagEmbeddingSize) if x2i['hasNe2i'] == 1 else None @@ -276,7 +276,7 @@ def initialize(cls, config, paramPrefix, wordCounter): charLookupParameters = nn.Embedding(len(c2i), charEmbeddingSize) nn.init.xavier_uniform_(charLookupParameters.weight) - charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, bidirectional=True, dropout=dropoutProb) + charRnnBuilder = (charEmbeddingSize, charRnnStateSize, 1, True, dropoutProb) if(posTagEmbeddingSize > 0): tag2i = readString2Ids(config.get_string(paramPrefix + ".tag2i", "../resources/org/clulab/tag2i-en.txt")) @@ -324,11 +324,9 @@ def initialize(cls, config, paramPrefix, wordCounter): positionLookupParameters, dropoutProb) - - - - - +def mkBuilder(inputSize, rnnStateSize, numLayers, bi, dropoutProb): + return nn.LSTM(inputSize, rnnStateSize, numLayers, bidirectional=bi, dropout=dropoutProb) + From cd5368a93d4117ce381f151e518c91c9f55e1abe Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:29:50 -0700 Subject: [PATCH 070/134] debug randomness --- main/src/main/python/pytorch/forwardLayer.py | 19 ++++++++++++------- main/src/main/python/pytorch/layers.py | 4 ++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index d1bebbdab..af7455438 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -37,11 +37,12 @@ def pickSpan(self, v, i): vs.append(e) return torch.cat(vs, dim=i) - def forward(self, inputExpressions, headPositionsOpt = None): + def forward(self, inputExpressions, , doDropout, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - argExp = self.dropout(self.pickSpan(inputExpressions, 1)) - emissionScores = self.dropout(self.pH(argExp)) + if doDropout: + argExp = self.dropout(self.pickSpan(inputExpressions, 1)) + emissionScores = self.dropout(self.pH(argExp)) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: @@ -60,15 +61,19 @@ def forward(self, inputExpressions, headPositionsOpt = None): raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - argExp = self.dropout(self.pickSpan(e, 0)) + if doDropout: + argExp = self.dropout(self.pickSpan(e, 0)) if headPosition >= 0: # there is an explicit head in the sentence - predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) + if doDropout: + predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) else: # the head is root. we used a dedicated Parameter for root - predExp = self.dropout(self.pickSpan(self.pRoot, 0)) + if doDropout: + predExp = self.dropout(self.pickSpan(self.pRoot, 0)) ss = torch.cat([argExp, predExp]) - l1 = self.dropout(self.pH(ss)) + if doDropout: + l1 = self.dropout(self.pH(ss)) if self.nonlinearity == NONLIN_TANH: l1 = F.tanh(l1) elif self.nonlinearity == NONLIN_RELU: diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index cd1616e56..4acf2f2cf 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, sentence.headPositions) + states = self.finalLayer(states, doDropout, sentence.headPositions) return states @@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, headPositions) + states = self.finalLayer(states, doDropout, headPositions) return states From f19eaf14339b28cc83d7d807db2fa8cc72cd07ad Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:33:34 -0700 Subject: [PATCH 071/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index af7455438..7a901bbe9 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -37,7 +37,7 @@ def pickSpan(self, v, i): vs.append(e) return torch.cat(vs, dim=i) - def forward(self, inputExpressions, , doDropout, headPositionsOpt = None): + def forward(self, inputExpressions, doDropout, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? if doDropout: From 31201f0ccc4552bb9b605483bd9cbced06be1334 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:41:46 -0700 Subject: [PATCH 072/134] dropout --- main/src/main/python/pytorch/forwardLayer.py | 19 +++++++------------ main/src/main/python/pytorch/layers.py | 4 ++-- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 7a901bbe9..d1bebbdab 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -37,12 +37,11 @@ def pickSpan(self, v, i): vs.append(e) return torch.cat(vs, dim=i) - def forward(self, inputExpressions, doDropout, headPositionsOpt = None): + def forward(self, inputExpressions, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - if doDropout: - argExp = self.dropout(self.pickSpan(inputExpressions, 1)) - emissionScores = self.dropout(self.pH(argExp)) + argExp = self.dropout(self.pickSpan(inputExpressions, 1)) + emissionScores = self.dropout(self.pH(argExp)) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: @@ -61,19 +60,15 @@ def forward(self, inputExpressions, doDropout, headPositionsOpt = None): raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - if doDropout: - argExp = self.dropout(self.pickSpan(e, 0)) + argExp = self.dropout(self.pickSpan(e, 0)) if headPosition >= 0: # there is an explicit head in the sentence - if doDropout: - predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) + predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) else: # the head is root. we used a dedicated Parameter for root - if doDropout: - predExp = self.dropout(self.pickSpan(self.pRoot, 0)) + predExp = self.dropout(self.pickSpan(self.pRoot, 0)) ss = torch.cat([argExp, predExp]) - if doDropout: - l1 = self.dropout(self.pH(ss)) + l1 = self.dropout(self.pH(ss)) if self.nonlinearity == NONLIN_TANH: l1 = F.tanh(l1) elif self.nonlinearity == NONLIN_RELU: diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index 4acf2f2cf..cd1616e56 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -105,7 +105,7 @@ def forward(self, sentence, constEmbeddings, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, doDropout, sentence.headPositions) + states = self.finalLayer(states, sentence.headPositions) return states @@ -116,7 +116,7 @@ def forwardFrom(self, inStates, headPositions, doDropout): for intermediateLayer in self.intermediateLayers: states = intermediateLayer(states, doDropout) if self.finalLayer is not None: - states = self.finalLayer(states, doDropout, headPositions) + states = self.finalLayer(states, headPositions) return states From f00163edb0c22d6b1429f0cb7e4a1e131cf576f4 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:43:51 -0700 Subject: [PATCH 073/134] debug dropout --- main/src/main/python/pytorch/forwardLayer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index d1bebbdab..22fde017e 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -41,7 +41,13 @@ def forward(self, inputExpressions, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? argExp = self.dropout(self.pickSpan(inputExpressions, 1)) - emissionScores = self.dropout(self.pH(argExp)) + temp = self.pH(argExp) + emissionScores = self.dropout(temp) + + try: + np.testing.assert_allclose(temp.cpu().numpy(), emissionScores.cpu().numpy(), rtol=1e-03, atol=1e-05) + except AssertionError as e: + print (e) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: From 4a194a7dad18cdf017d75308190f8d21cc533ce4 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 16:45:46 -0700 Subject: [PATCH 074/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 22fde017e..f4dd4c702 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -45,7 +45,7 @@ def forward(self, inputExpressions, headPositionsOpt = None): emissionScores = self.dropout(temp) try: - np.testing.assert_allclose(temp.cpu().numpy(), emissionScores.cpu().numpy(), rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) if self.nonlinearity == NONLIN_TANH: From 0d3f146fc17caaae691087b2e497de0c75fe3c0d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 17:39:43 -0700 Subject: [PATCH 075/134] average models --- main/src/main/python/pytorch/layers.py | 28 ++++++++++++++++++++++++++ main/src/main/python/pytorch/metal.py | 19 +++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index cd1616e56..b93d61edc 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -97,6 +97,34 @@ def load_state_dict(self, params): if self.finalLayer is not None: self.finalLayer.load_state_dict(params['finalLayer']) + def add_state_dict(self, layers): + if self.initialLayer is not None: + for key in self.initialLayer.state_dict(): + if self.initialLayer.state_dict()[key].data.dtype == torch.float32: + self.initialLayer.state_dict()[key].data += layers.initialLayer.state_dict()[key].data.clone() + for i, il in enumerate(self.intermediateLayers): + for key in il.state_dict(): + if il.state_dict()[key].data.dtype == torch.float32: + il.state_dict()[key].data += layers.intermediateLayers[i].state_dict()[key].data.clone() + if self.finalLayer is not None: + for key in self.finalLayer.state_dict(): + if self.finalLayer.state_dict()[key].data.dtype == torch.float32: + self.finalLayer.state_dict()[key].data += layers.finalLayer.state_dict()[key].data.clone() + + def avg_state_dict(self, num_models): + if self.initialLayer is not None: + for key in self.initialLayer.state_dict(): + if self.initialLayer.state_dict()[key].data.dtype == torch.float32: + self.initialLayer.state_dict()[key].data /= num_models + for i, il in enumerate(self.intermediateLayers): + for key in il.state_dict(): + if il.state_dict()[key].data.dtype == torch.float32: + il.state_dict()[key].data /= num_models + if self.finalLayer is not None: + for key in self.finalLayer.state_dict(): + if self.finalLayer.state_dict()[key].data.dtype == torch.float32: + self.finalLayer.state_dict()[key].data /= num_models + def forward(self, sentence, constEmbeddings, doDropout): if self.initialLayer is None: diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 312d762e8..b7eee5892 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -312,6 +312,25 @@ def load(cls, modelFilenamePrefix): return layersSeq + @classmethod + def load_multi(cls, models): + print (f"Loading MTL models from {models}...") + + layersSeq = list() + for model in models: + checkpoint = torch.load(model+".torch") + for i, param in enumerate(checkpoint): + layers = Layers.loadX2i(param['x2i']) + layers.load_state_dict(param['model']) + if len(layersSeq) Date: Wed, 26 Jan 2022 17:41:32 -0700 Subject: [PATCH 076/134] Update run.py --- main/src/main/python/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index 01c37404b..b9808ce56 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -25,7 +25,10 @@ config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) modelName = args.model_file - model = Metal.load(modelName) + if len(modelName.split())==1: + model = Metal.load(modelName) + else: + model = Metal.load_multi(modelName.split()) mtl = Metal(taskManager, model) mtl.test() elif args.shell: From b9a8ced1065c2f5b6e1e51a76a29c128447b3881 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 17:42:05 -0700 Subject: [PATCH 077/134] Update metal.py --- main/src/main/python/pytorch/metal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index b7eee5892..22e204c2d 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -266,8 +266,8 @@ def parse(self, sentence, constEmbeddings): def test(self): - # torch.manual_seed(self.taskManager.random) - # random.seed(self.taskManager.random) + torch.manual_seed(self.taskManager.random) + random.seed(self.taskManager.random) for layers in self.model: layers.start_eval() for taskId in range(0, self.taskManager.taskCount): From f4c9f93571e7bbe871519104c9dda580dd9f7f76 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 17:54:22 -0700 Subject: [PATCH 078/134] Update run.py --- main/src/main/python/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index b9808ce56..2bec47674 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -6,7 +6,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--model_file', type=str, help='Filename of the model.', nargs='+') parser.add_argument('--train', action='store_true', help='Set the code to training purpose.') parser.add_argument('--test', action='store_true', help='Set the code to testing purpose.') parser.add_argument('--shell', action='store_true', help='Set the code to shell mode.') From 36adfb645d885975fbb070724f8a793b18ea8693 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 17:57:51 -0700 Subject: [PATCH 079/134] Update run.py --- main/src/main/python/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index 2bec47674..2301d856d 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -25,10 +25,11 @@ config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) modelName = args.model_file - if len(modelName.split())==1: - model = Metal.load(modelName) + print (modelName) + if len(modelName)==1: + model = Metal.load(modelName[0]) else: - model = Metal.load_multi(modelName.split()) + model = Metal.load_multi(modelName) mtl = Metal(taskManager, model) mtl.test() elif args.shell: From aa491ffa74d9e41b7ce9165f4a96bdfea62e2884 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 18:00:16 -0700 Subject: [PATCH 080/134] fixed typo --- main/src/main/python/pytorch/metal.py | 2 +- main/src/main/python/run.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 22e204c2d..70ed4c3d2 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -328,7 +328,7 @@ def load_multi(cls, models): layersSeq[i].add_state_dict(layers) for layers in layersSeq: layers.avg_state_dict(len(models)) - rint (f"Loading MTL models from {models} complete.") + print (f"Loading MTL models from {models} complete.") return layersSeq @classmethod diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index 2301d856d..4cea28453 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -25,7 +25,6 @@ config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) modelName = args.model_file - print (modelName) if len(modelName)==1: model = Metal.load(modelName[0]) else: From dd0dd2a1c8c71c7df1b616911316747dbb3c8b5b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 20:25:30 -0700 Subject: [PATCH 081/134] debug randomness --- main/src/main/python/pytorch/embeddingLayer.py | 5 +++-- main/src/main/python/pytorch/forwardLayer.py | 3 ++- main/src/main/python/pytorch/metal.py | 4 ++-- main/src/main/python/pytorch/rnnLayer.py | 1 + main/src/main/python/pytorch/utils.py | 1 + 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 821147075..7ba4c7272 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -81,6 +81,7 @@ def forward(self, sentence, constEmbeddings, doDropout): # const word embeddings such as GloVe constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings) + print (constEmbeddingsExpressions) assert(constEmbeddingsExpressions.size(0) == len(words)) if(tags): assert(len(tags) == len(words)) if(nes): assert(len(nes) == len(words)) @@ -110,12 +111,12 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0 ids.append(id) learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids)) - + print ("learnedWordEmbeddings", learnedWordEmbeddings) # # biLSTM over character embeddings # charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) - + print ("charEmbedding", charEmbedding) # # POS tag embedding # diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index f4dd4c702..2ca54d132 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -43,7 +43,8 @@ def forward(self, inputExpressions, headPositionsOpt = None): argExp = self.dropout(self.pickSpan(inputExpressions, 1)) temp = self.pH(argExp) emissionScores = self.dropout(temp) - + print ("final, argExp",argExp) + print ("final, emissionScores",emissionScores) try: np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05) except AssertionError as e: diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 70ed4c3d2..113b8442a 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -219,7 +219,7 @@ def evaluate(self, taskId, taskName, sentences, name, epoch=-1): annotatedSentences = reader.toAnnotatedSentences(sent) - for asent in annotatedSentences: + for asent in annotatedSentences[:1]: sentence = asent[0] goldLabels = asent[1] @@ -266,7 +266,7 @@ def parse(self, sentence, constEmbeddings): def test(self): - torch.manual_seed(self.taskManager.random) + # torch.manual_seed(self.taskManager.random) random.seed(self.taskManager.random) for layers in self.model: layers.start_eval() diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index f83fb2420..b257c4b16 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -29,6 +29,7 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) States = transduce(inputExpressions, self.wordRnnBuilder) + print ("Intermediate, States",States) States = States.squeeze(1) if self.useHighwayConnections: States = torch.cat([States, inputExpressions], dim=1) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index d996deb02..b99ba767b 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -44,6 +44,7 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) + print ("charEmbeddings",charEmbeddings) output = transduce(charEmbeddings, charRnnBuilder) result = output.squeeze(1)[-1] # Zheng: Not sure if this is the right way to concatenate the two direction hidden states From eeab1b08da5d9818b0bcf7c67c16a6627ef111c2 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 20:28:37 -0700 Subject: [PATCH 082/134] Update mtl-en-ner.conf --- main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf index 9c07bdd75..f97791325 100644 --- a/main/src/main/resources/org/clulab/mtl-en-ner.conf +++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf @@ -22,7 +22,7 @@ mtl { name = "En NER" train = "ner/train.txt" dev = "ner/dev.txt" - test = "ner/test.txt" + test = "ner/test2.txt" layers { final { From 6cda51e331e62f5f9fd4146ba67c468329d916a4 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 21:39:02 -0700 Subject: [PATCH 083/134] Update utils.py --- main/src/main/python/pytorch/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index b99ba767b..a6168b9aa 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -87,17 +87,17 @@ def transduce(embeddings, builder): if mode == 'LSTM': if bi_direct: # change 1 to the layers we need - (h, c) = (torch.rand(2, 1, hidden_dim), torch.rand(2, 1, hidden_dim)) + (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) else: - (h, c) = (torch.rand(1, 1, hidden_dim), torch.rand(1, 1, hidden_dim)) + (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) elif mode == 'GRU': if bi_direct: - h = torch.rand(2, 1, hidden_dim) + h = torch.zeros(2, 1, hidden_dim) output, h = builder(embeddings.unsqueeze(1), h) else: - h = torch.rand(1, 1, hidden_dim) + h = torch.zeros(1, 1, hidden_dim) output, h = builder(embeddings.unsqueeze(1), h) return output From d8e6734a18cc18674aef25e0c99dbc4166da1344 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 21:43:56 -0700 Subject: [PATCH 084/134] Update utils.py --- main/src/main/python/pytorch/utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index a6168b9aa..e5ba2ac8e 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -87,18 +87,14 @@ def transduce(embeddings, builder): if mode == 'LSTM': if bi_direct: # change 1 to the layers we need - (h, c) = (torch.zeros(2, 1, hidden_dim), torch.zeros(2, 1, hidden_dim)) - output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) + output, (h, c) = builder(embeddings.unsqueeze(1)) else: - (h, c) = (torch.zeros(1, 1, hidden_dim), torch.zeros(1, 1, hidden_dim)) - output, (h, c) = builder(embeddings.unsqueeze(1), (h, c)) + output, (h, c) = builder(embeddings.unsqueeze(1)) elif mode == 'GRU': if bi_direct: - h = torch.zeros(2, 1, hidden_dim) - output, h = builder(embeddings.unsqueeze(1), h) + output, h = builder(embeddings.unsqueeze(1)) else: - h = torch.zeros(1, 1, hidden_dim) - output, h = builder(embeddings.unsqueeze(1), h) + output, h = builder(embeddings.unsqueeze(1)) return output From 2b8a78c5cfb1be632c1188f2b4eff514262e2c9e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 21:50:55 -0700 Subject: [PATCH 085/134] solve the randomness --- main/src/main/python/pytorch/embeddingLayer.py | 2 -- main/src/main/python/pytorch/forwardLayer.py | 2 -- main/src/main/python/pytorch/rnnLayer.py | 1 - main/src/main/python/pytorch/utils.py | 1 - main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +- 5 files changed, 1 insertion(+), 7 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 7ba4c7272..2177f502e 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -111,12 +111,10 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h if(doDropout and id > 0 and self.w2f[word] == 1 and random.random() < 0.5): id = 0 ids.append(id) learnedWordEmbeddings = self.wordLookupParameters(torch.LongTensor(ids)) - print ("learnedWordEmbeddings", learnedWordEmbeddings) # # biLSTM over character embeddings # charEmbedding = torch.stack([mkCharacterEmbedding(word, self.c2i, self.charLookupParameters, self.charRnnBuilder) for word in words]) - print ("charEmbedding", charEmbedding) # # POS tag embedding # diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 2ca54d132..825e2e3a5 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -43,8 +43,6 @@ def forward(self, inputExpressions, headPositionsOpt = None): argExp = self.dropout(self.pickSpan(inputExpressions, 1)) temp = self.pH(argExp) emissionScores = self.dropout(temp) - print ("final, argExp",argExp) - print ("final, emissionScores",emissionScores) try: np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05) except AssertionError as e: diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index b257c4b16..f83fb2420 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -29,7 +29,6 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) States = transduce(inputExpressions, self.wordRnnBuilder) - print ("Intermediate, States",States) States = States.squeeze(1) if self.useHighwayConnections: States = torch.cat([States, inputExpressions], dim=1) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index e5ba2ac8e..2d9775eae 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -44,7 +44,6 @@ def save(file, values, comment): def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) - print ("charEmbeddings",charEmbeddings) output = transduce(charEmbeddings, charRnnBuilder) result = output.squeeze(1)[-1] # Zheng: Not sure if this is the right way to concatenate the two direction hidden states diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf index f97791325..9c07bdd75 100644 --- a/main/src/main/resources/org/clulab/mtl-en-ner.conf +++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf @@ -22,7 +22,7 @@ mtl { name = "En NER" train = "ner/train.txt" dev = "ner/dev.txt" - test = "ner/test2.txt" + test = "ner/test.txt" layers { final { From 7c83bfe166b5a60a2eadfa38938b315df82c8077 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 21:53:10 -0700 Subject: [PATCH 086/134] Update embeddingLayer.py --- main/src/main/python/pytorch/embeddingLayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 2177f502e..21e199116 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -81,7 +81,6 @@ def forward(self, sentence, constEmbeddings, doDropout): # const word embeddings such as GloVe constEmbeddingsExpressions = self.mkConstEmbeddings(words, constEmbeddings) - print (constEmbeddingsExpressions) assert(constEmbeddingsExpressions.size(0) == len(words)) if(tags): assert(len(tags) == len(words)) if(nes): assert(len(nes) == len(words)) From f44f00dc2e9fb1097b3c1b84c8279b168771ad7b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 22:09:16 -0700 Subject: [PATCH 087/134] Update mtl-en-ner.conf --- main/src/main/resources/org/clulab/mtl-en-ner.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/resources/org/clulab/mtl-en-ner.conf b/main/src/main/resources/org/clulab/mtl-en-ner.conf index 9c07bdd75..aacc06054 100644 --- a/main/src/main/resources/org/clulab/mtl-en-ner.conf +++ b/main/src/main/resources/org/clulab/mtl-en-ner.conf @@ -8,7 +8,7 @@ mtl { learnedWordEmbeddingSize = 128 charEmbeddingSize = 32 charRnnStateSize = 16 - c2i = "org/clulab/c2i-en.txt" + c2i = "../resources/org/clulab/c2i-en.txt" } intermediate1 { From 14bede9e79491f0e0a56fb6e9f87f3e07c17dda0 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 22:11:51 -0700 Subject: [PATCH 088/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 825e2e3a5..d1bebbdab 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -41,12 +41,7 @@ def forward(self, inputExpressions, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? argExp = self.dropout(self.pickSpan(inputExpressions, 1)) - temp = self.pH(argExp) - emissionScores = self.dropout(temp) - try: - np.testing.assert_allclose(temp.detach().cpu().numpy(), emissionScores.detach().cpu().numpy(), rtol=1e-03, atol=1e-05) - except AssertionError as e: - print (e) + emissionScores = self.dropout(self.pH(argExp)) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: From 382958b55a19491ac990a5bc5701976b7fdc6feb Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 22:12:08 -0700 Subject: [PATCH 089/134] Update metal.py --- main/src/main/python/pytorch/metal.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 113b8442a..59b009f59 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -266,8 +266,6 @@ def parse(self, sentence, constEmbeddings): def test(self): - # torch.manual_seed(self.taskManager.random) - random.seed(self.taskManager.random) for layers in self.model: layers.start_eval() for taskId in range(0, self.taskManager.taskCount): From a67bc71d1bd361b45ba857ebbab1596b70efc4a4 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 26 Jan 2022 23:40:34 -0700 Subject: [PATCH 090/134] fix bugs --- main/src/main/python/pytorch/greedyForwardLayer.py | 1 - main/src/main/python/pytorch/viterbiForwardLayer.py | 1 - 2 files changed, 2 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index b913c10d5..f74829577 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -18,7 +18,6 @@ def saveX2i(self): x2i["span"] = spanToString(self.spans) if self.spans else "" x2i["nonlinearity"] = self.nonlinearity x2i["t2i"] = self.t2i - x2i["dropoutProb"] = self.dropoutProb return x2i diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 5aa9e6669..06aa72728 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -114,7 +114,6 @@ def saveX2i(self): x2i["span"] = spanToString(self.spans) if self.spans else "" x2i["nonlinearity"] = self.nonlinearity x2i["t2i"] = self.t2i - x2i["dropoutProb"] = self.dropoutProb return x2i From 89104bbbfcf71874d91aa34e6a9ac4bd4f0d4b74 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 27 Jan 2022 00:59:40 -0700 Subject: [PATCH 091/134] Update run.py --- main/src/main/python/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/run.py b/main/src/main/python/run.py index 4cea28453..7d22e56fe 100644 --- a/main/src/main/python/run.py +++ b/main/src/main/python/run.py @@ -17,7 +17,7 @@ if args.train: config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) - modelName = args.model_file + modelName = args.model_file[0] mtl = Metal(taskManager, None) mtl.train(modelName) From 8d31a31baa801dcb9229290e3cbf9b5e8fc3363f Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 27 Jan 2022 10:18:03 -0700 Subject: [PATCH 092/134] fix bug --- main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 06aa72728..45dafb44b 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -108,7 +108,7 @@ def loss(self, finalStates, goldLabelStrings): def saveX2i(self): x2i = dict() - x2i["inferenceType"] = TYPE_GREEDY + x2i["inferenceType"] = TYPE_VITERBI_STRING x2i["inputSize"] = self.inputSize x2i["isDual"] = 1 if self.isDual else 0 x2i["span"] = spanToString(self.spans) if self.spans else "" From 9435f6da7ff389dabf5995137e44e06d4f2977c2 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 27 Jan 2022 10:18:36 -0700 Subject: [PATCH 093/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index d1bebbdab..a783154c7 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -82,10 +82,11 @@ def load(x2i): from pytorch.greedyForwardLayer import GreedyForwardLayer from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] - if inferenceType == TYPE_VITERBI: - return ViterbiForwardLayer.load(x2i) - elif inferenceType == TYPE_GREEDY: - return GreedyForwardLayer.load(x2i) + return ViterbiForwardLayer.load(x2i) + # if inferenceType == TYPE_VITERBI: + # return ViterbiForwardLayer.load(x2i) + # elif inferenceType == TYPE_GREEDY: + # return GreedyForwardLayer.load(x2i) else: raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") From 1752b2637b90c9d81c22f811b29d2a188f0336d3 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 27 Jan 2022 10:20:44 -0700 Subject: [PATCH 094/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index a783154c7..0ff193ff8 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -87,8 +87,8 @@ def load(x2i): # return ViterbiForwardLayer.load(x2i) # elif inferenceType == TYPE_GREEDY: # return GreedyForwardLayer.load(x2i) - else: - raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") + # else: + # raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") @staticmethod def initialize(config, paramPrefix, labelCounter, isDual, inputSize): From c6d8fcc122f2d3f809cc7de552ed6f56e4002325 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 27 Jan 2022 10:22:37 -0700 Subject: [PATCH 095/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 0ff193ff8..d1bebbdab 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -82,13 +82,12 @@ def load(x2i): from pytorch.greedyForwardLayer import GreedyForwardLayer from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] - return ViterbiForwardLayer.load(x2i) - # if inferenceType == TYPE_VITERBI: - # return ViterbiForwardLayer.load(x2i) - # elif inferenceType == TYPE_GREEDY: - # return GreedyForwardLayer.load(x2i) - # else: - # raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") + if inferenceType == TYPE_VITERBI: + return ViterbiForwardLayer.load(x2i) + elif inferenceType == TYPE_GREEDY: + return GreedyForwardLayer.load(x2i) + else: + raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") @staticmethod def initialize(config, paramPrefix, labelCounter, isDual, inputSize): From 249afc96b3b84d11e18518fcdc7f67480240782f Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:25:06 -0700 Subject: [PATCH 096/134] Update forwardLayer.py --- main/src/main/python/pytorch/forwardLayer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index d1bebbdab..3d945837d 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -82,6 +82,7 @@ def load(x2i): from pytorch.greedyForwardLayer import GreedyForwardLayer from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] + print (inferenceType) if inferenceType == TYPE_VITERBI: return ViterbiForwardLayer.load(x2i) elif inferenceType == TYPE_GREEDY: From fe4367b4b0cbeeeb184ab6870062e1f82d554f09 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:31:30 -0700 Subject: [PATCH 097/134] fix bug --- main/src/main/python/pytorch/forwardLayer.py | 6 +++--- main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 3d945837d..3a310aa84 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -82,10 +82,10 @@ def load(x2i): from pytorch.greedyForwardLayer import GreedyForwardLayer from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] - print (inferenceType) - if inferenceType == TYPE_VITERBI: + print (inferenceType, TYPE_VITERBI) + if inferenceType == TYPE_VITERBI or inferenceType == TYPE_VITERBI_STRING:#this is a temporary solution to handle a typo in viterbi forward layer... return ViterbiForwardLayer.load(x2i) - elif inferenceType == TYPE_GREEDY: + elif inferenceType == TYPE_GREEDY or inferenceType == TYPE_GREEDY_STRING: return GreedyForwardLayer.load(x2i) else: raise RuntimeError(f"ERROR: unknown forward layer type {inferenceType}!") diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 45dafb44b..77c0514f9 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -108,7 +108,7 @@ def loss(self, finalStates, goldLabelStrings): def saveX2i(self): x2i = dict() - x2i["inferenceType"] = TYPE_VITERBI_STRING + x2i["inferenceType"] = TYPE_VITERBI x2i["inputSize"] = self.inputSize x2i["isDual"] = 1 if self.isDual else 0 x2i["span"] = spanToString(self.spans) if self.spans else "" From 10301770a30513ac71a1c3e424c4e1b81fa77501 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:31:49 -0700 Subject: [PATCH 098/134] remove debug print --- main/src/main/python/pytorch/forwardLayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 3a310aa84..33040cf95 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -82,7 +82,6 @@ def load(x2i): from pytorch.greedyForwardLayer import GreedyForwardLayer from pytorch.viterbiForwardLayer import ViterbiForwardLayer inferenceType = x2i["inferenceType"] - print (inferenceType, TYPE_VITERBI) if inferenceType == TYPE_VITERBI or inferenceType == TYPE_VITERBI_STRING:#this is a temporary solution to handle a typo in viterbi forward layer... return ViterbiForwardLayer.load(x2i) elif inferenceType == TYPE_GREEDY or inferenceType == TYPE_GREEDY_STRING: From 0b57ad8db080deabdc5ab7b193e3c4a14ef59d47 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:51:28 -0700 Subject: [PATCH 099/134] add averaging models feature --- main/src/main/python/pytorch2onnx.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index f3d3b58c3..5fa59992a 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -60,7 +60,7 @@ def forward(self, embeddings, word_ids, charEmbedding): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--model_file', type=str, help='Filename of the model.') + parser.add_argument('--model_file', type=str, help='Filename of the model.', nargs='+') parser.add_argument('--config', type=str, help='Filename of the configuration.') parser.add_argument('--seed', type=int, default=1234) args = parser.parse_args() @@ -68,7 +68,10 @@ def forward(self, embeddings, word_ids, charEmbedding): config = ConfigFactory.parse_file(f'../resources/org/clulab/{args.config}.conf') taskManager = TaskManager(config, args.seed) modelName = args.model_file - model = Metal.load(modelName) + if len(modelName)==1: + model = Metal.load(modelName[0]) + else: + model = Metal.load_multi(modelName) for layers in model: layers.start_eval() constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() From bab975ef44f11d207e303685c51fc61089eca6fc Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:53:31 -0700 Subject: [PATCH 100/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 5fa59992a..2df5bd32b 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -54,7 +54,7 @@ def forward(self, embeddings, word_ids, charEmbedding): for il in self.intermediateLayerss[i]: state = il(state, False) if self.finalLayers[i]: - state = self.finalLayers[i](state, False, None)#headPositions set to be None for now, we can add it in input list later + state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later return state if __name__ == '__main__': From 50c112740d0a0bab0e6e8ee788725e6417bcdd06 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 20:56:03 -0700 Subject: [PATCH 101/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 2df5bd32b..926ed1000 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -88,7 +88,7 @@ def forward(self, embeddings, word_ids, charEmbedding): torch.manual_seed(taskManager.random) random.seed(taskManager.random) - x2i = json.load(open(args.model_file+".json")) + x2i = json.load(open(args.model_file[0]+".json")) c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] From 05c090a68ab9b419abd8c29243e249b74a4b7ef5 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:15:20 -0700 Subject: [PATCH 102/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 926ed1000..c0b6cdea8 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -72,6 +72,10 @@ def forward(self, embeddings, word_ids, charEmbedding): model = Metal.load(modelName[0]) else: model = Metal.load_multi(modelName) + + mtl = Metal(taskManager, model) + mtl.test() + for layers in model: layers.start_eval() constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() From 08fd0d04b25f3001b877768d662312c768fc006a Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:31:26 -0700 Subject: [PATCH 103/134] debug performance difference between torch and onnx --- main/src/main/python/pytorch2onnx.py | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index c0b6cdea8..ea55627a3 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -97,6 +97,45 @@ def forward(self, embeddings, word_ids, charEmbedding): c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] + for taskId in range(0, taskManager.taskCount): + taskName = taskManager.tasks[taskId].taskName + sentences = taskManager.tasks[taskId].testSentences + if sentences: + reader = MetalRowReader() + for sent in sentences: + annotatedSentences = reader.toAnnotatedSentences(sent) + + for asent in annotatedSentences: + sentence = asent[0] + goldLabels = asent[1] + + words = sentence.words + + char_embs = [] + for word in words: + char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word]) + char_out = export_char(char_ids) + char_embs.append(char_out) + char_embs = np.stack(char_embs) + embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy() + word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) + + emissionScores = export_model(embeddings, word_ids, char_embs) + + preds = [i2t[np.argmax(es)] for es in emissionScores] + + sc = SeqScorer.f1(goldLabels, preds) + scoreCountsByLabel.incAll(sc) + + + print (f"Accuracy : {scoreCountsByLabel.accuracy()}") + print (f"Precision : {scoreCountsByLabel.precision()}") + print (f"Recall on : {scoreCountsByLabel.recall()}") + print (f"Micro F1 : {scoreCountsByLabel.f1()}") + for label in scoreCountsByLabel.labels(): + print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName testSentences = taskManager.tasks[taskId].testSentences From f56722000e272eb7d3aa2cce7d172d22b3fc0511 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:38:36 -0700 Subject: [PATCH 104/134] debug performance difference between torch and onnx --- main/src/main/python/pytorch2onnx.py | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index ea55627a3..1b46d2349 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -192,6 +192,52 @@ def forward(self, embeddings, word_ids, charEmbedding): ort_char = onnxruntime.InferenceSession("char.onnx") # compute ONNX Runtime output prediction + for taskId in range(0, taskManager.taskCount): + taskName = taskManager.tasks[taskId].taskName + sentences = taskManager.tasks[taskId].testSentences + if sentences: + reader = MetalRowReader() + for sent in sentences: + annotatedSentences = reader.toAnnotatedSentences(sent) + + for asent in annotatedSentences: + sentence = asent[0] + goldLabels = asent[1] + + words = sentence.words + + char_embs = [] + for word in words: + char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word]) + ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])} + ort_outs = ort_char.run(None, ort_inputs) + char_embs.append(ort_outs[0]) + char_embs = np.stack(char_embs) + embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy() + word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) + + dummy_input = (embeddings, word_ids, char_embs) + + ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} + ort_outs = ort_session.run(None, ort_inputs) + + emissionScores = ort_outs[0] + preds = [i2t[np.argmax(es)] for es in emissionScores] + + sc = SeqScorer.f1(goldLabels, preds) + scoreCountsByLabel.incAll(sc) + + + print (f"Accuracy : {scoreCountsByLabel.accuracy()}") + print (f"Precision : {scoreCountsByLabel.precision()}") + print (f"Recall on : {scoreCountsByLabel.recall()}") + print (f"Micro F1 : {scoreCountsByLabel.f1()}") + for label in scoreCountsByLabel.labels(): + print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + duration = time.time() - start_time + print (duration) + ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])} ort_outs = ort_char.run(None, ort_inputs) try: From f8f1ca17da5a3732b7b32e5f47de42219ab93f50 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:46:43 -0700 Subject: [PATCH 105/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 1b46d2349..009f5d273 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -113,7 +113,7 @@ def forward(self, embeddings, word_ids, charEmbedding): char_embs = [] for word in words: - char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word]) + char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) char_out = export_char(char_ids) char_embs.append(char_out) char_embs = np.stack(char_embs) From 298bdc48a242316f3e62d5a97599f1bc40828455 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:53:06 -0700 Subject: [PATCH 106/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 009f5d273..aedbc905a 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -118,8 +118,8 @@ def forward(self, embeddings, word_ids, charEmbedding): char_embs.append(char_out) char_embs = np.stack(char_embs) embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy() - word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) + embeddings = constEmbeddings.emb(embed_ids) + word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) emissionScores = export_model(embeddings, word_ids, char_embs) From 949522ca4322b48f57a2c6c681c9c33d38ef1f9b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 21:59:20 -0700 Subject: [PATCH 107/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index aedbc905a..b617b6c35 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -116,7 +116,7 @@ def forward(self, embeddings, word_ids, charEmbedding): char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) char_out = export_char(char_ids) char_embs.append(char_out) - char_embs = np.stack(char_embs) + char_embs = torch.stack(char_embs) embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) From d05d81b927b3c9d6bd29e843b30f4ee887c50a27 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 22:05:27 -0700 Subject: [PATCH 108/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index b617b6c35..d1b93ccda 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -97,6 +97,9 @@ def forward(self, embeddings, word_ids, charEmbedding): c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] + t2i = x2i[1]['x2i']['finalLayer']["t2i"] + i2t = {i:t for t, i in t2i.items()} + for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName sentences = taskManager.tasks[taskId].testSentences From 9bc0ac8291054c85b6579a80ffb1b103711bac37 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 22:12:06 -0700 Subject: [PATCH 109/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index d1b93ccda..4bcd3f836 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -124,7 +124,7 @@ def forward(self, embeddings, word_ids, charEmbedding): embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - emissionScores = export_model(embeddings, word_ids, char_embs) + emissionScores = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() preds = [i2t[np.argmax(es)] for es in emissionScores] From ebbfdcf1081140090f59d895543e30658c18ec30 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 22:18:06 -0700 Subject: [PATCH 110/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 4bcd3f836..3675d9d48 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -8,6 +8,7 @@ from pytorch.utils import * from pytorch.constEmbeddingsGlove import ConstEmbeddingsGlove from sequences.rowReaders import * +from pytorch.seqScorer import * import onnx import onnxruntime From 12a777dbd09057f11c5dd02150b16174c27a8d00 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 22:45:46 -0700 Subject: [PATCH 111/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 3675d9d48..28c548b60 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -100,7 +100,7 @@ def forward(self, embeddings, word_ids, charEmbedding): t2i = x2i[1]['x2i']['finalLayer']["t2i"] i2t = {i:t for t, i in t2i.items()} - + scoreCountsByLabel = ScoreCountsByLabel() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName sentences = taskManager.tasks[taskId].testSentences @@ -139,7 +139,7 @@ def forward(self, embeddings, word_ids, charEmbedding): print (f"Micro F1 : {scoreCountsByLabel.f1()}") for label in scoreCountsByLabel.labels(): print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") - + scoreCountsByLabel = ScoreCountsByLabel() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName testSentences = taskManager.tasks[taskId].testSentences From 84d551786cf151b657f07b00539d15403a8c1d4c Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 22:59:56 -0700 Subject: [PATCH 112/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 47 +--------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 28c548b60..7b4b65eb2 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -56,6 +56,7 @@ def forward(self, embeddings, word_ids, charEmbedding): state = il(state, False) if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later + state = self.finalLayers[i].inference(state) return state if __name__ == '__main__': @@ -196,52 +197,6 @@ def forward(self, embeddings, word_ids, charEmbedding): ort_char = onnxruntime.InferenceSession("char.onnx") # compute ONNX Runtime output prediction - for taskId in range(0, taskManager.taskCount): - taskName = taskManager.tasks[taskId].taskName - sentences = taskManager.tasks[taskId].testSentences - if sentences: - reader = MetalRowReader() - for sent in sentences: - annotatedSentences = reader.toAnnotatedSentences(sent) - - for asent in annotatedSentences: - sentence = asent[0] - goldLabels = asent[1] - - words = sentence.words - - char_embs = [] - for word in words: - char_ids = np.array([c2i.get(c, UNK_EMBEDDING) for c in word]) - ort_inputs = {ort_char.get_inputs()[i].name: x for i, x in enumerate([char_ids])} - ort_outs = ort_char.run(None, ort_inputs) - char_embs.append(ort_outs[0]) - char_embs = np.stack(char_embs) - embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - embeddings = constEmbeddings.emb(embed_ids).detach().cpu().numpy() - word_ids = np.array([w2i[word] if word in w2i else 0 for word in words]) - - dummy_input = (embeddings, word_ids, char_embs) - - ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} - ort_outs = ort_session.run(None, ort_inputs) - - emissionScores = ort_outs[0] - preds = [i2t[np.argmax(es)] for es in emissionScores] - - sc = SeqScorer.f1(goldLabels, preds) - scoreCountsByLabel.incAll(sc) - - - print (f"Accuracy : {scoreCountsByLabel.accuracy()}") - print (f"Precision : {scoreCountsByLabel.precision()}") - print (f"Recall on : {scoreCountsByLabel.recall()}") - print (f"Micro F1 : {scoreCountsByLabel.f1()}") - for label in scoreCountsByLabel.labels(): - print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") - duration = time.time() - start_time - print (duration) - ort_inputs = {ort_char.get_inputs()[i].name: to_numpy(x) for i, x in enumerate([char_ids])} ort_outs = ort_char.run(None, ort_inputs) try: From a2bfc832fea368b87111259a8fdf34de6fbdcc19 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 23:22:42 -0700 Subject: [PATCH 113/134] fix bug in viterbi decoding --- main/src/main/python/pytorch/viterbiForwardLayer.py | 4 ++++ main/src/main/python/pytorch2onnx.py | 12 +++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 77c0514f9..c96ac2caa 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -124,6 +124,10 @@ def inference(self, emissionScores): score, labelsIds = self._viterbi_decode(emissionScores) return [self.i2t[i] for i in labelsIds] + def inference2(self, emissionScores): + score, labelsIds = self._viterbi_decode(emissionScores) + return labelsIds + def inferenceWithScores(emissionScores): raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!") diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 7b4b65eb2..c70e3603b 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -56,8 +56,8 @@ def forward(self, embeddings, word_ids, charEmbedding): state = il(state, False) if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later - state = self.finalLayers[i].inference(state) - return state + ids = self.finalLayers[i].inference2(state) + return ids if __name__ == '__main__': @@ -101,6 +101,7 @@ def forward(self, embeddings, word_ids, charEmbedding): t2i = x2i[1]['x2i']['finalLayer']["t2i"] i2t = {i:t for t, i in t2i.items()} + scoreCountsByLabel = ScoreCountsByLabel() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName @@ -126,9 +127,9 @@ def forward(self, embeddings, word_ids, charEmbedding): embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - emissionScores = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() + ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() - preds = [i2t[np.argmax(es)] for es in emissionScores] + preds = [i2t[i] for i in ids] sc = SeqScorer.f1(goldLabels, preds) scoreCountsByLabel.incAll(sc) @@ -140,6 +141,7 @@ def forward(self, embeddings, word_ids, charEmbedding): print (f"Micro F1 : {scoreCountsByLabel.f1()}") for label in scoreCountsByLabel.labels(): print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") + scoreCountsByLabel = ScoreCountsByLabel() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName @@ -207,7 +209,7 @@ def forward(self, embeddings, word_ids, charEmbedding): ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) try: - np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(np.array(output), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) From 3269cd4c41eedf285178b21528a2b8fadd7d6505 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 23:28:30 -0700 Subject: [PATCH 114/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index c70e3603b..acc24c121 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -127,7 +127,7 @@ def forward(self, embeddings, word_ids, charEmbedding): embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() + ids = export_model(embeddings, word_ids, char_embs) preds = [i2t[i] for i in ids] From c90015c7ae7c24426426a49e176562b356474328 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 23:38:12 -0700 Subject: [PATCH 115/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index acc24c121..9a869860b 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding): if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later ids = self.finalLayers[i].inference2(state) - return ids + return torch.LongTensor(ids) if __name__ == '__main__': @@ -127,7 +127,7 @@ def forward(self, embeddings, word_ids, charEmbedding): embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - ids = export_model(embeddings, word_ids, char_embs) + ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() preds = [i2t[i] for i in ids] From 81c1bc51d51afe0748f3f2de26039cc454c9fd9a Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 23:49:18 -0700 Subject: [PATCH 116/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 9a869860b..5eb5ef047 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -205,11 +205,11 @@ def forward(self, embeddings, word_ids, charEmbedding): np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) - + print (ort_session.get_inputs()) ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) try: - np.testing.assert_allclose(np.array(output), ort_outs[0], rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) From 91911ae227758c0e1561116284d766728d350cef Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Feb 2022 23:49:48 -0700 Subject: [PATCH 117/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 44 ---------------------------- 1 file changed, 44 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 5eb5ef047..54645bd94 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -99,50 +99,6 @@ def forward(self, embeddings, word_ids, charEmbedding): c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] - t2i = x2i[1]['x2i']['finalLayer']["t2i"] - i2t = {i:t for t, i in t2i.items()} - - scoreCountsByLabel = ScoreCountsByLabel() - for taskId in range(0, taskManager.taskCount): - taskName = taskManager.tasks[taskId].taskName - sentences = taskManager.tasks[taskId].testSentences - if sentences: - reader = MetalRowReader() - for sent in sentences: - annotatedSentences = reader.toAnnotatedSentences(sent) - - for asent in annotatedSentences: - sentence = asent[0] - goldLabels = asent[1] - - words = sentence.words - - char_embs = [] - for word in words: - char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) - char_out = export_char(char_ids) - char_embs.append(char_out) - char_embs = torch.stack(char_embs) - embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) - embeddings = constEmbeddings.emb(embed_ids) - word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - - ids = export_model(embeddings, word_ids, char_embs).detach().cpu().numpy() - - preds = [i2t[i] for i in ids] - - sc = SeqScorer.f1(goldLabels, preds) - scoreCountsByLabel.incAll(sc) - - - print (f"Accuracy : {scoreCountsByLabel.accuracy()}") - print (f"Precision : {scoreCountsByLabel.precision()}") - print (f"Recall on : {scoreCountsByLabel.recall()}") - print (f"Micro F1 : {scoreCountsByLabel.f1()}") - for label in scoreCountsByLabel.labels(): - print (f"\tP/R/F1 for label {label} ({scoreCountsByLabel.map[label].gold}): {scoreCountsByLabel.precision(label)} / {scoreCountsByLabel.recall(label)} / {scoreCountsByLabel.f1(label)}") - - scoreCountsByLabel = ScoreCountsByLabel() for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName testSentences = taskManager.tasks[taskId].testSentences From b1be4b57b78e82b529484c84dd2e3edf01f78a4d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:03:13 -0700 Subject: [PATCH 118/134] debug decoder --- main/src/main/python/pytorch/greedyForwardLayer.py | 4 ++++ main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +- main/src/main/python/pytorch2onnx.py | 7 ++----- main/src/main/python/test_onnx.py | 3 +-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index f74829577..56d668c02 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -28,6 +28,10 @@ def inference(self, emissionScores): emissionScores = emissionScoresToArrays(emissionScores) return [self.i2t[np.argmax(es)] for es in emissionScores] + def inference2(self, emissionScores): + emissionScores = emissionScoresToArrays(emissionScores) + return torch.LongTensor([np.argmax(es) for es in emissionScores]) + def inferenceWithScores(self, emissionScores): emissionScores = emissionScoresToArrays(emissionScores) return [sorted([(i, s) for i, s in enumerate(scoresForPosition)], key=lambda x: x[1]) for scoresForPosition in emissionScores] diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index c96ac2caa..a4be12236 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -126,7 +126,7 @@ def inference(self, emissionScores): def inference2(self, emissionScores): score, labelsIds = self._viterbi_decode(emissionScores) - return labelsIds + return torch.LongTensor(labelsIds) def inferenceWithScores(emissionScores): raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!") diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 54645bd94..112401178 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -56,8 +56,8 @@ def forward(self, embeddings, word_ids, charEmbedding): state = il(state, False) if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later - ids = self.finalLayers[i].inference2(state) - return torch.LongTensor(ids) + ids = self.finalLayers[-1].inference2(state) + return ids if __name__ == '__main__': @@ -75,9 +75,6 @@ def forward(self, embeddings, word_ids, charEmbedding): else: model = Metal.load_multi(modelName) - mtl = Metal(taskManager, model) - mtl.test() - for layers in model: layers.start_eval() constEmbeddings = ConstEmbeddingsGlove.get_ConstLookupParams() diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index dc5d7ae1e..fb8e76757 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -66,8 +66,7 @@ ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) - emissionScores = ort_outs[0] - preds = [i2t[np.argmax(es)] for es in emissionScores] + preds = [i2t[i] for i in ort_outs[0]] sc = SeqScorer.f1(goldLabels, preds) scoreCountsByLabel.incAll(sc) From 40654a6e0a63365711ddeefd646280cd86bd41d9 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:07:40 -0700 Subject: [PATCH 119/134] decoder error... --- main/src/main/python/pytorch/greedyForwardLayer.py | 2 +- main/src/main/python/pytorch/viterbiForwardLayer.py | 2 +- main/src/main/python/pytorch2onnx.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index 56d668c02..19d788977 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -30,7 +30,7 @@ def inference(self, emissionScores): def inference2(self, emissionScores): emissionScores = emissionScoresToArrays(emissionScores) - return torch.LongTensor([np.argmax(es) for es in emissionScores]) + return [np.argmax(es) for es in emissionScores] def inferenceWithScores(self, emissionScores): emissionScores = emissionScoresToArrays(emissionScores) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index a4be12236..c96ac2caa 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -126,7 +126,7 @@ def inference(self, emissionScores): def inference2(self, emissionScores): score, labelsIds = self._viterbi_decode(emissionScores) - return torch.LongTensor(labelsIds) + return labelsIds def inferenceWithScores(emissionScores): raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!") diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 112401178..58439d0cc 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -162,7 +162,7 @@ def forward(self, embeddings, word_ids, charEmbedding): ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) try: - np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(output, ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) From 7cda4dd57a6896438aca13ca1d22dd9eb4d1f341 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:23:31 -0700 Subject: [PATCH 120/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 58439d0cc..4abc154d7 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding): if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later ids = self.finalLayers[-1].inference2(state) - return ids + return [ids] if __name__ == '__main__': From 7e673e74078d52e7364c6d3fc70ff53fa24caacc Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:29:24 -0700 Subject: [PATCH 121/134] Update viterbiForwardLayer.py --- main/src/main/python/pytorch/viterbiForwardLayer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index c96ac2caa..4b2e14060 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -125,6 +125,7 @@ def inference(self, emissionScores): return [self.i2t[i] for i in labelsIds] def inference2(self, emissionScores): + print (self.transitions) score, labelsIds = self._viterbi_decode(emissionScores) return labelsIds From 1857a5ffc1f14c364235b217e1ee674b0dcabb23 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:42:33 -0700 Subject: [PATCH 122/134] trying to fix the viterbi decoder --- main/src/main/python/pytorch/greedyForwardLayer.py | 3 +-- main/src/main/python/pytorch/viterbiForwardLayer.py | 5 ++--- main/src/main/python/pytorch2onnx.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/main/src/main/python/pytorch/greedyForwardLayer.py b/main/src/main/python/pytorch/greedyForwardLayer.py index 19d788977..145878c91 100644 --- a/main/src/main/python/pytorch/greedyForwardLayer.py +++ b/main/src/main/python/pytorch/greedyForwardLayer.py @@ -29,8 +29,7 @@ def inference(self, emissionScores): return [self.i2t[np.argmax(es)] for es in emissionScores] def inference2(self, emissionScores): - emissionScores = emissionScoresToArrays(emissionScores) - return [np.argmax(es) for es in emissionScores] + return torch.argmax(emissionScores, dim=1) def inferenceWithScores(self, emissionScores): emissionScores = emissionScoresToArrays(emissionScores) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index 4b2e14060..f3edbeb4f 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -125,9 +125,8 @@ def inference(self, emissionScores): return [self.i2t[i] for i in labelsIds] def inference2(self, emissionScores): - print (self.transitions) - score, labelsIds = self._viterbi_decode(emissionScores) - return labelsIds + def inference2(self, emissionScores): + return torch.argmax(emissionScores, dim=1) def inferenceWithScores(emissionScores): raise RuntimeError("ERROR: inferenceWithScores not supported for ViterbiLayer!") diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 4abc154d7..112401178 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -57,7 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding): if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later ids = self.finalLayers[-1].inference2(state) - return [ids] + return ids if __name__ == '__main__': @@ -162,7 +162,7 @@ def forward(self, embeddings, word_ids, charEmbedding): ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) try: - np.testing.assert_allclose(output, ort_outs[0], rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) From 14beb420178e4f892a9ba7855ae19ac250723c2e Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Feb 2022 00:42:57 -0700 Subject: [PATCH 123/134] Update viterbiForwardLayer.py --- main/src/main/python/pytorch/viterbiForwardLayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main/src/main/python/pytorch/viterbiForwardLayer.py b/main/src/main/python/pytorch/viterbiForwardLayer.py index f3edbeb4f..77025fd70 100644 --- a/main/src/main/python/pytorch/viterbiForwardLayer.py +++ b/main/src/main/python/pytorch/viterbiForwardLayer.py @@ -125,7 +125,6 @@ def inference(self, emissionScores): return [self.i2t[i] for i in labelsIds] def inference2(self, emissionScores): - def inference2(self, emissionScores): return torch.argmax(emissionScores, dim=1) def inferenceWithScores(emissionScores): From 3aee7e7333c33eb3656b9f71f111ddd3214f29cc Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Mar 2022 22:57:23 -0700 Subject: [PATCH 124/134] add other embeddings to onnx model --- main/src/main/python/pytorch2onnx.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 112401178..2533ea19d 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -41,14 +41,37 @@ def __init__(self, model): for i, layers in enumerate(model): if layers.initialLayer is not None: self.word_lookup = layers.initialLayer.wordLookupParameters + self.postag_lookup = layers.initialLayer.posTagLookupParameters + self.netag_lookup = layers.initialLayer.neTagLookupParameters + self.dist_lookup = layers.initialLayer.distanceLookupParameters + self.pos_lookup = layers.initialLayer.positionLookupParameters + self.useIsPredicate = layers.initialLayer.useIsPredicate self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers) self.finalLayers[i] = layers.finalLayer self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) self.finalLayers = nn.ModuleList(self.finalLayers) - def forward(self, embeddings, word_ids, charEmbedding): + def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, headPositions=None): # Can I assuem there is only one initial layer? learnedWordEmbeddings = self.word_lookup(word_ids) - embedParts = [embeddings, learnedWordEmbeddings, charEmbedding]#, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + posTagEmbed = self.postag_lookup(tags) if tags and self.postag_lookup else None + neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None + predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None + if headPositions and self.dist_lookup: + dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)] + for i in range(dists): + if dists[i] < -self.distanceWindowSize: + dists[i] = self.distanceWindowSize-1 + if dists[i] > self.distanceWindowSize: + dist[i] = self.distanceWindowSize+1 + distanceEmbedding = self.dist_lookup(torch.LongTensor(dists)) + else: + distanceEmbedding = None + if self.pos_lookup: + values = [i if i<100 else 100 for i, wid in enumerate(word_ids)] + positionEmbedding = self.pos_lookup(torch.LongTensor(values)) + else: + positionEmbedding = None + embedParts = [embeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] embedParts = [ep for ep in embedParts if ep is not None] state = torch.cat(embedParts, dim=1) for i in range(self.model_length): From cd46faa68e10e4f9ff95697cec2cb87bb33a4a27 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 2 Mar 2022 23:49:43 -0700 Subject: [PATCH 125/134] Update embeddingLayer.py --- main/src/main/python/pytorch/embeddingLayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 21e199116..ce8dffef3 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -142,7 +142,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h # if headPositions and self.distanceLookupParameters: dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)] - for i in range(dists): + for i in range(len(dists)): if dists[i] < -self.distanceWindowSize: dists[i] = self.distanceWindowSize-1 if dists[i] > self.distanceWindowSize: From fd514a22e8a25e0a0f61833631bb707e0393e039 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Mar 2022 00:05:09 -0700 Subject: [PATCH 126/134] fix bug in distance embeddings --- main/src/main/python/pytorch/embeddingLayer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index ce8dffef3..6239e8746 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -141,12 +141,12 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize] # if headPositions and self.distanceLookupParameters: - dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)] + dists = [i-predicatePosition+51 for i, predicatePosition in enumerate(headPositions)] for i in range(len(dists)): - if dists[i] < -self.distanceWindowSize: - dists[i] = self.distanceWindowSize-1 - if dists[i] > self.distanceWindowSize: - dist[i] = self.distanceWindowSize+1 + if dists[i] < 1: + dists[i] = 0 + if dists[i] > self.distanceWindowSize + 51: + dists[i] = self.distanceWindowSize + 52 distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists)) else: distanceEmbedding = None From 278697147ea515e8299e4162c4282a5f18e7f1a9 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Mar 2022 00:16:11 -0700 Subject: [PATCH 127/134] Update embeddingLayer.py --- main/src/main/python/pytorch/embeddingLayer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 6239e8746..86c8b8414 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -141,12 +141,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h # We cut the distance down to values inside the window [-distanceWindowSize, +distanceWindowSize] # if headPositions and self.distanceLookupParameters: - dists = [i-predicatePosition+51 for i, predicatePosition in enumerate(headPositions)] - for i in range(len(dists)): - if dists[i] < 1: - dists[i] = 0 - if dists[i] > self.distanceWindowSize + 51: - dists[i] = self.distanceWindowSize + 52 + dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)] distanceEmbedding = self.distanceLookupParameters(torch.LongTensor(dists)) else: distanceEmbedding = None From 9b97c68773717a2ff669b970102967a19fa4acbd Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 3 Mar 2022 00:40:02 -0700 Subject: [PATCH 128/134] Update embeddingLayer.py --- main/src/main/python/pytorch/embeddingLayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/python/pytorch/embeddingLayer.py b/main/src/main/python/pytorch/embeddingLayer.py index 86c8b8414..e3479119a 100644 --- a/main/src/main/python/pytorch/embeddingLayer.py +++ b/main/src/main/python/pytorch/embeddingLayer.py @@ -66,7 +66,7 @@ def __init__(self, w2i, # word to index posTagDim = posTagEmbeddingSize if posTagLookupParameters else 0 neTagDim = neTagEmbeddingSize if neTagLookupParameters else 0 - distanceDim = distanceWindowSize if distanceLookupParameters else 0 + distanceDim = distanceEmbeddingSize if distanceLookupParameters else 0 predicateDim = 1 if distanceLookupParameters and useIsPredicate else 0 positionDim = positionEmbeddingSize if positionLookupParameters else 0 self.outDim = ConstEmbeddingsGlove.dim + learnedWordEmbeddingSize + charRnnStateSize * 2 + posTagDim + neTagDim + distanceDim + positionDim + predicateDim @@ -132,7 +132,7 @@ def mkEmbeddings(self, words, constEmbeddings, doDropout, tags=None, nes=None, h # 1 if this word is the predicate # if headPositions and self.useIsPredicate: - predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) + predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]).unsqueeze(1) else: predEmbed = None From a20b2c58c789cda22784a3ee6dd055e2cf06e329 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 9 Mar 2022 15:38:35 -0700 Subject: [PATCH 129/134] Update pytorch2onnx.py --- main/src/main/python/pytorch2onnx.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 2533ea19d..12a2c9a80 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -57,12 +57,7 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None if headPositions and self.dist_lookup: - dists = [i-predicatePosition for i, predicatePosition in enumerate(headPositions)] - for i in range(dists): - if dists[i] < -self.distanceWindowSize: - dists[i] = self.distanceWindowSize-1 - if dists[i] > self.distanceWindowSize: - dist[i] = self.distanceWindowSize+1 + dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)] distanceEmbedding = self.dist_lookup(torch.LongTensor(dists)) else: distanceEmbedding = None @@ -118,6 +113,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] + t2i = x2i[0]['x2i']['initialLayer']['tag2i'] + n2i = x2i[0]['x2i']['initialLayer']['ne2i'] for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName @@ -131,6 +128,9 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head goldLabels = asent[1] words = sentence.words + tags = sentence.posTags + nes = sentence.neTags + headPositions = sentence.headPositions char_embs = [] for word in words: char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) @@ -140,9 +140,11 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - output = export_model(embeddings, word_ids, char_embs) + tags_ids = torch.LongTensor([t2i[tag] if tag in t2i else 0 for tag in tags]) + nes_ids = torch.LongTensor([n2i[ne] if ne in n2i else 0 for ne in nes]) + output = export_model(embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions) - dummy_input = (embeddings, word_ids, char_embs) + dummy_input = (embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions) torch.onnx.export(export_char, char_ids, @@ -159,11 +161,14 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['embed', 'words', 'chars'], # the model's input names + input_names = ['embed', 'words', 'chars', 'tags_ids', 'nes_ids', 'headPositions'], # the model's input names output_names = ['output'], # the model's output names dynamic_axes = {'embed' : {0 : 'sentence length'}, 'words' : {0 : 'sentence length'}, 'chars' : {0 : 'sentence length'}, + 'tags_ids' : {0 : 'sentence length'}, + 'nes_ids' : {0 : 'sentence length'}, + 'headPositions' : {0 : 'sentence length'}, 'output': {0 : 'sentence length'}}) onnx_model = onnx.load("model.onnx") From a0022227c6e20eb9bca650eacff28d3be0ed588d Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Wed, 9 Mar 2022 23:49:13 -0700 Subject: [PATCH 130/134] implement viterbi decoding --- main/src/main/python/pytorch2onnx.py | 56 ++++++++-------------------- main/src/main/python/test_onnx.py | 48 +++++++++++++++++++++++- 2 files changed, 63 insertions(+), 41 deletions(-) diff --git a/main/src/main/python/pytorch2onnx.py b/main/src/main/python/pytorch2onnx.py index 12a2c9a80..080b0efd8 100644 --- a/main/src/main/python/pytorch2onnx.py +++ b/main/src/main/python/pytorch2onnx.py @@ -15,6 +15,8 @@ import json +import numpy as np + def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() @@ -41,32 +43,14 @@ def __init__(self, model): for i, layers in enumerate(model): if layers.initialLayer is not None: self.word_lookup = layers.initialLayer.wordLookupParameters - self.postag_lookup = layers.initialLayer.posTagLookupParameters - self.netag_lookup = layers.initialLayer.neTagLookupParameters - self.dist_lookup = layers.initialLayer.distanceLookupParameters - self.pos_lookup = layers.initialLayer.positionLookupParameters - self.useIsPredicate = layers.initialLayer.useIsPredicate self.intermediateLayerss[i] = nn.ModuleList(layers.intermediateLayers) self.finalLayers[i] = layers.finalLayer self.intermediateLayerss = nn.ModuleList(self.intermediateLayerss) self.finalLayers = nn.ModuleList(self.finalLayers) - def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, headPositions=None): + def forward(self, embeddings, word_ids, charEmbedding): # Can I assuem there is only one initial layer? learnedWordEmbeddings = self.word_lookup(word_ids) - posTagEmbed = self.postag_lookup(tags) if tags and self.postag_lookup else None - neTagEmbed = self.netag_lookup(nes) if nes and self.netag_lookup else None - predEmbed = torch.FloatTensor([1 if i==predicatePosition else 0 for i, predicatePosition in enumerate(headPositions)]) if headPositions and self.useIsPredicate else None - if headPositions and self.dist_lookup: - dists = [max(i-predicatePosition+self.distanceWindowSize+1, 0) if i-predicatePosition <= self.distanceWindowSize else 2 * self.distanceWindowSize + 2 for i, predicatePosition in enumerate(headPositions)] - distanceEmbedding = self.dist_lookup(torch.LongTensor(dists)) - else: - distanceEmbedding = None - if self.pos_lookup: - values = [i if i<100 else 100 for i, wid in enumerate(word_ids)] - positionEmbedding = self.pos_lookup(torch.LongTensor(values)) - else: - positionEmbedding = None - embedParts = [embeddings, learnedWordEmbeddings, charEmbedding, posTagEmbed, neTagEmbed, distanceEmbedding, positionEmbedding, predEmbed] + embedParts = [embeddings, learnedWordEmbeddings, charEmbedding] embedParts = [ep for ep in embedParts if ep is not None] state = torch.cat(embedParts, dim=1) for i in range(self.model_length): @@ -74,8 +58,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head state = il(state, False) if self.finalLayers[i]: state = self.finalLayers[i](state, None)#headPositions set to be None for now, we can add it in input list later - ids = self.finalLayers[-1].inference2(state) - return ids + transitions = self.finalLayers[-1].transitions + return state, transitions if __name__ == '__main__': @@ -113,8 +97,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head c2i = x2i[0]['x2i']['initialLayer']['c2i'] w2i = x2i[0]['x2i']['initialLayer']['w2i'] - t2i = x2i[0]['x2i']['initialLayer']['tag2i'] - n2i = x2i[0]['x2i']['initialLayer']['ne2i'] + t2i = x2i[1]['x2i']['finalLayer']["t2i"] + i2t = {i:t for t, i in t2i.items()} for taskId in range(0, taskManager.taskCount): taskName = taskManager.tasks[taskId].taskName @@ -128,9 +112,7 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head goldLabels = asent[1] words = sentence.words - tags = sentence.posTags - nes = sentence.neTags - headPositions = sentence.headPositions + char_embs = [] for word in words: char_ids = torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word]) @@ -140,11 +122,8 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head embed_ids = torch.LongTensor([constEmbeddings.w2i[word] if word in constEmbeddings.w2i else 0 for word in words]) embeddings = constEmbeddings.emb(embed_ids) word_ids = torch.LongTensor([w2i[word] if word in w2i else 0 for word in words]) - tags_ids = torch.LongTensor([t2i[tag] if tag in t2i else 0 for tag in tags]) - nes_ids = torch.LongTensor([n2i[ne] if ne in n2i else 0 for ne in nes]) - output = export_model(embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions) - - dummy_input = (embeddings, word_ids, char_embs, tags_ids, nes_ids, headPositions) + state, transitions = export_model(embeddings, word_ids, char_embs) + dummy_input = (embeddings, word_ids, char_embs) torch.onnx.export(export_char, char_ids, @@ -161,15 +140,12 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head export_params=True, # store the trained parameter weights inside the model file opset_version=10, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['embed', 'words', 'chars', 'tags_ids', 'nes_ids', 'headPositions'], # the model's input names - output_names = ['output'], # the model's output names + input_names = ['embed', 'words', 'chars'], # the model's input names + output_names = ['state', 'transitions'], # the model's output names dynamic_axes = {'embed' : {0 : 'sentence length'}, 'words' : {0 : 'sentence length'}, 'chars' : {0 : 'sentence length'}, - 'tags_ids' : {0 : 'sentence length'}, - 'nes_ids' : {0 : 'sentence length'}, - 'headPositions' : {0 : 'sentence length'}, - 'output': {0 : 'sentence length'}}) + 'state': {0 : 'sentence length'}}) onnx_model = onnx.load("model.onnx") onnx.checker.check_model(onnx_model) @@ -186,11 +162,11 @@ def forward(self, embeddings, word_ids, charEmbedding, tags=None, nes=None, head np.testing.assert_allclose(to_numpy(char_out), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) - print (ort_session.get_inputs()) ort_inputs = {ort_session.get_inputs()[i].name: to_numpy(x) for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) + try: - np.testing.assert_allclose(output.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) + np.testing.assert_allclose(state.detach().cpu().numpy(), ort_outs[0], rtol=1e-03, atol=1e-05) except AssertionError as e: print (e) diff --git a/main/src/main/python/test_onnx.py b/main/src/main/python/test_onnx.py index fb8e76757..7c33d4894 100644 --- a/main/src/main/python/test_onnx.py +++ b/main/src/main/python/test_onnx.py @@ -4,6 +4,50 @@ from pytorch.seqScorer import * import time +def viterbi_decode(feats, transitions, t2i): + backpointers = [] + + # Initialize the viterbi variables in log space + init_vvars = np.full((1, len(t2i)), -10000.) + init_vvars[0][t2i[START_TAG]] = 0 + + # forward_var at step i holds the viterbi variables for step i-1 + forward_var = init_vvars + for feat in feats: + bptrs_t = [] # holds the backpointers for this step + viterbivars_t = [] # holds the viterbi variables for this step + + for next_tag in range(len(t2i)): + # next_tag_var[i] holds the viterbi variable for tag i at the + # previous step, plus the score of transitioning + # from tag i to next_tag. + # We don't include the emission scores here because the max + # does not depend on them (we add them in below) + next_tag_var = forward_var + transitions[next_tag] + best_tag_id = np.argmax(next_tag_var, 1)[0] + bptrs_t.append(best_tag_id) + viterbivars_t.append(next_tag_var[0][best_tag_id].reshape(1)) + # Now add in the emission scores, and assign forward_var to the set + # of viterbi variables we just computed + forward_var = (np.concatenate(viterbivars_t) + feat).reshape(1, -1) + backpointers.append(bptrs_t) + + # Transition to STOP_TAG + terminal_var = forward_var + transitions[t2i[STOP_TAG]] + best_tag_id = np.argmax(terminal_var, 1)[0] + path_score = terminal_var[0][best_tag_id] + + # Follow the back pointers to decode the best path. + best_path = [best_tag_id] + for bptrs_t in reversed(backpointers): + best_tag_id = bptrs_t[best_tag_id] + best_path.append(best_tag_id) + # Pop off the start tag (we dont want to return that to the caller) + start = best_path.pop() + assert start == t2i[START_TAG] # Sanity check + best_path.reverse() + return path_score, best_path + if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -66,7 +110,9 @@ ort_inputs = {ort_session.get_inputs()[i].name: x for i, x in enumerate(dummy_input)} ort_outs = ort_session.run(None, ort_inputs) - preds = [i2t[i] for i in ort_outs[0]] + _, ids = viterbi_decode(ort_outs[0], ort_outs[1], t2i) + + preds = [i2t[i] for i in ids] sc = SeqScorer.f1(goldLabels, preds) scoreCountsByLabel.incAll(sc) From 743bbc532f162b522ef00911f9b1975451e6a43b Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 10 Mar 2022 11:02:21 -0700 Subject: [PATCH 131/134] remove pick span and transduce to simplify the model --- main/src/main/python/pytorch/forwardLayer.py | 38 ++++++++------------ main/src/main/python/pytorch/rnnLayer.py | 2 +- main/src/main/python/pytorch/utils.py | 30 ++-------------- 3 files changed, 18 insertions(+), 52 deletions(-) diff --git a/main/src/main/python/pytorch/forwardLayer.py b/main/src/main/python/pytorch/forwardLayer.py index 33040cf95..d071c66a8 100644 --- a/main/src/main/python/pytorch/forwardLayer.py +++ b/main/src/main/python/pytorch/forwardLayer.py @@ -25,48 +25,40 @@ def __init__(self, inputSize, isDual, t2i, i2t, actualInputSize, nonlinearity, d self.inDim = spanLength(spans) if spans is not None else inputSize self.outDim = len(t2i) - - def pickSpan(self, v, i): - if self.spans is None: - return v - else: - # Zheng: Will spans overlap? - vs = list() - for span in self.spans: - e = torch.index_select(v, i, torch.tensor(range(span[0], span[1]))) - vs.append(e) - return torch.cat(vs, dim=i) + # remove pick span part to simplify the ONNX converting + # def pickSpan(self, v, i): + # if self.spans is None: + # return v + # else: + # # Zheng: Will spans overlap? + # vs = list() + # for span in self.spans: + # e = torch.index_select(v, i, torch.tensor(range(span[0], span[1]))) + # vs.append(e) + # return torch.cat(vs, dim=i) def forward(self, inputExpressions, headPositionsOpt = None): if not self.isDual: # Zheng: Why the for loop here? Can we just use matrix manipulation? - argExp = self.dropout(self.pickSpan(inputExpressions, 1)) + argExp = self.dropout(inputExpressions) emissionScores = self.dropout(self.pH(argExp)) if self.nonlinearity == NONLIN_TANH: emissionScores = F.tanh(emissionScores) elif self.nonlinearity == NONLIN_RELU: emissionScores = F.relu(emissionScores) - # for i, e in enumerate(inputExpressions): - # argExp = self.dropout(self.pickSpan(e)) - # l1 = self.dropout(self.pH(argExp)) - # if self.nonlinearity == NONLIN_TANH: - # l1 = F.tanh(l1) - # elif self.nonlinearity == NONLIN_RELU: - # l1 = F.relu(l1) - # emissionScores.append(l1) else: emissionScores = list() if headPositionsOpt is None: raise RuntimeError("ERROR: dual task without information about head positions!") for i, e in enumerate(inputExpressions): headPosition = headPositionsOpt[i] - argExp = self.dropout(self.pickSpan(e, 0)) + argExp = self.dropout(e) if headPosition >= 0: # there is an explicit head in the sentence - predExp = self.dropout(self.pickSpan(inputExpressions[headPosition], 0)) + predExp = self.dropout(inputExpressions[headPosition]) else: # the head is root. we used a dedicated Parameter for root - predExp = self.dropout(self.pickSpan(self.pRoot, 0)) + predExp = self.dropout(self.pRoot) ss = torch.cat([argExp, predExp]) l1 = self.dropout(self.pH(ss)) if self.nonlinearity == NONLIN_TANH: diff --git a/main/src/main/python/pytorch/rnnLayer.py b/main/src/main/python/pytorch/rnnLayer.py index f83fb2420..32cb6edb1 100644 --- a/main/src/main/python/pytorch/rnnLayer.py +++ b/main/src/main/python/pytorch/rnnLayer.py @@ -28,7 +28,7 @@ def forward(self, inputExpressions, dropout): assert(inputExpressions is not None) - States = transduce(inputExpressions, self.wordRnnBuilder) + States, _ = self.wordRnnBuilder(inputExpressions.unsqueeze(1)) States = States.squeeze(1) if self.useHighwayConnections: States = torch.cat([States, inputExpressions], dim=1) diff --git a/main/src/main/python/pytorch/utils.py b/main/src/main/python/pytorch/utils.py index 2d9775eae..abcb1e6f9 100644 --- a/main/src/main/python/pytorch/utils.py +++ b/main/src/main/python/pytorch/utils.py @@ -42,19 +42,15 @@ def save(file, values, comment): file.write("\n") def mkCharacterEmbedding(word, c2i, charLookupParameters, charRnnBuilder): - hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(torch.LongTensor([c2i.get(c, UNK_EMBEDDING) for c in word])) - output = transduce(charEmbeddings, charRnnBuilder) + output, _ = charRnnBuilder(charEmbeddings.unsqueeze(1)) result = output.squeeze(1)[-1] - # Zheng: Not sure if this is the right way to concatenate the two direction hidden states return result def mkCharacterEmbedding2(char_ids, charLookupParameters, charRnnBuilder): - hidden_dim = charRnnBuilder.hidden_size charEmbeddings = charLookupParameters(char_ids) - output = transduce(charEmbeddings, charRnnBuilder) + output, _ = charRnnBuilder(charEmbeddings.unsqueeze(1)) result = output.squeeze(1)[-1] - # Zheng: Not sure if this is the right way to concatenate the two direction hidden states return result def readString2Ids(s2iFilename): @@ -75,28 +71,6 @@ def readChar2Ids(s2iFilename): s2i[chr(int(k))] = int(v) return s2i -def transduce(embeddings, builder): - - builder = builder.float() - - hidden_dim = builder.hidden_size - bi_direct = builder.bidirectional - mode = builder.mode - - if mode == 'LSTM': - if bi_direct: - # change 1 to the layers we need - output, (h, c) = builder(embeddings.unsqueeze(1)) - else: - output, (h, c) = builder(embeddings.unsqueeze(1)) - elif mode == 'GRU': - if bi_direct: - output, h = builder(embeddings.unsqueeze(1)) - else: - output, h = builder(embeddings.unsqueeze(1)) - - return output - def sentenceLossGreedy(emissionScoresForSeq, golds): assert(emissionScoresForSeq.size(0) == len(golds)) criterion = nn.CrossEntropyLoss() From ca34b1c0a2310fb2a391a18a1494748ad5fbdedd Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Thu, 10 Mar 2022 11:06:26 -0700 Subject: [PATCH 132/134] Update mtl-en-pos-chunk-srlp.conf --- .../org/clulab/mtl-en-pos-chunk-srlp.conf | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf b/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf index 828fd973d..b23692fff 100644 --- a/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf +++ b/main/src/main/resources/org/clulab/mtl-en-pos-chunk-srlp.conf @@ -9,7 +9,7 @@ mtl { learnedWordEmbeddingSize = 128 charEmbeddingSize = 32 charRnnStateSize = 16 - c2i = "org/clulab/c2i-en.txt" + c2i = "../resources/org/clulab/c2i-en.txt" } intermediate1 { @@ -21,9 +21,9 @@ mtl { task1 { name = "En POS tagging" - train = "dynet/en/pos/train.txt" - dev = "dynet/en/pos/dev.txt" - test = "dynet/en/pos/test.txt" + train = "/data/nlp/corpora/processors-dynet/en/pos/train.txt" + dev = "/data/nlp/corpora/processors-dynet/en/pos/dev.txt" + test = "/data/nlp/corpora/processors-dynet/en/pos/test.txt" layers { final { @@ -34,9 +34,9 @@ mtl { task2 { name = "En chunking" - train = "dynet/en/chunking/train.txt" - dev = "dynet/en/chunking/test.txt" - test = "dynet/en/chunking/test.txt" + train = "/data/nlp/corpora/processors-dynet/en/chunking/train.txt" + dev = "/data/nlp/corpora/processors-dynet/en/chunking/test.txt" + test = "/data/nlp/corpora/processors-dynet/en/chunking/test.txt" layers { final { @@ -47,9 +47,9 @@ mtl { task3 { name = "En SRL predicates" - train = "dynet/en/srl/train.preds" - dev = "dynet/en/srl/dev.preds" - test = "dynet/en/srl/test-wsj.preds" + train = "/data/nlp/corpora/processors-dynet/en/srl/train.preds" + dev = "/data/nlp/corpora/processors-dynet/en/srl/dev.preds" + test = "/data/nlp/corpora/processors-dynet/en/srl/test-wsj.preds" layers { final { From 52d903c04bf8d2f7f0241209fcacc7b544507951 Mon Sep 17 00:00:00 2001 From: Zheng Tang Date: Sun, 20 Mar 2022 22:10:30 -0700 Subject: [PATCH 133/134] save the json only once to save memory and space --- main/src/main/python/pytorch/layers.py | 7 +----- main/src/main/python/pytorch/metal.py | 31 ++++++++++++++++---------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/main/src/main/python/pytorch/layers.py b/main/src/main/python/pytorch/layers.py index b93d61edc..926bc6606 100644 --- a/main/src/main/python/pytorch/layers.py +++ b/main/src/main/python/pytorch/layers.py @@ -74,20 +74,15 @@ def start_eval(self): def get_state_dict(self): params = dict() - j_params = dict() if self.initialLayer is not None: params['initialLayer'] = self.initialLayer.state_dict() - j_params['initialLayer'] = {k:v.data.tolist() for k, v in params['initialLayer'].items()} if self.intermediateLayers: params['intermediateLayers'] = list() - j_params['intermediateLayers'] = list() for il in self.intermediateLayers: params['intermediateLayers'].append(il.state_dict()) - j_params['intermediateLayers'].append({k:v.data.tolist() for k, v in params['intermediateLayers'][-1].items()}) if self.finalLayer is not None: params['finalLayer'] = self.finalLayer.state_dict() - j_params['finalLayer'] = {k:v.data.tolist() for k, v in params['finalLayer'].items()} - return params, j_params + return params def load_state_dict(self, params): if self.initialLayer is not None: diff --git a/main/src/main/python/pytorch/metal.py b/main/src/main/python/pytorch/metal.py index 59b009f59..1c6deb81d 100644 --- a/main/src/main/python/pytorch/metal.py +++ b/main/src/main/python/pytorch/metal.py @@ -277,12 +277,14 @@ def test(self): def save(self, baseFilename): params = list() - j_params = list() + if "-epoch0" in baseFilename: + j_params = list() for layers in self.model: - sd, j_sd = layers.get_state_dict() - x2i = layers.saveX2i() - params.append({"model": sd, "x2i": x2i}) - j_params.append({"x2i": x2i}) + sd = layers.get_state_dict() + params.append(sd) + if "-epoch0" in baseFilename: + x2i = layers.saveX2i() + j_params.append({"x2i": x2i}) # torch pickle save try: @@ -292,8 +294,9 @@ def save(self, baseFilename): print("[Warning: Saving failed... continuing anyway.]") # We can also save as text json file: - with open(baseFilename+".json", "w") as f: - f.write(json.dumps(j_params)) + if "-epoch0" in baseFilename: + with open(baseFilename.replace("-epoch0", "")+".json", "w") as f: + f.write(json.dumps(j_params)) @classmethod @@ -301,9 +304,11 @@ def load(cls, modelFilenamePrefix): print (f"Loading MTL model from {modelFilenamePrefix}...") layersSeq = list() checkpoint = torch.load(modelFilenamePrefix+".torch") - for param in checkpoint: - layers = Layers.loadX2i(param['x2i']) - layers.load_state_dict(param['model']) + with open(modelFilenamePrefix+".json") as f: + x2i = josn.load(f) + for i, param in enumerate(checkpoint): + layers = Layers.loadX2i(x2i[i]) + layers.load_state_dict(param) layersSeq.append(layers) print (f"Loading MTL model from {modelFilenamePrefix} complete.") @@ -317,9 +322,11 @@ def load_multi(cls, models): layersSeq = list() for model in models: checkpoint = torch.load(model+".torch") + with open(model+".json") as f: + x2i = josn.load(f) for i, param in enumerate(checkpoint): - layers = Layers.loadX2i(param['x2i']) - layers.load_state_dict(param['model']) + layers = Layers.loadX2i(x2i[i]) + layers.load_state_dict(param) if len(layersSeq) Date: Sun, 20 Mar 2022 22:23:53 -0700 Subject: [PATCH 134/134] Update mtl-en-srla.conf --- main/src/main/resources/org/clulab/mtl-en-srla.conf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main/src/main/resources/org/clulab/mtl-en-srla.conf b/main/src/main/resources/org/clulab/mtl-en-srla.conf index 6dcf5bbd8..8f5181484 100644 --- a/main/src/main/resources/org/clulab/mtl-en-srla.conf +++ b/main/src/main/resources/org/clulab/mtl-en-srla.conf @@ -16,9 +16,9 @@ mtl { distanceEmbeddingSize = 16 distanceWindowSize = 50 useIsPredicate = true - c2i = "org/clulab/c2i-en.txt" - tag2i = "org/clulab/tag2i-en.txt" - ne2i = "org/clulab/ne2i-en.txt" + c2i = "../resources/org/clulab/c2i-en.txt" + tag2i = "../resources/org/clulab/tag2i-en.txt" + ne2i = "../resources/org/clulab/ne2i-en.txt" } intermediate1 { @@ -31,9 +31,9 @@ mtl { task1 { name = "En SRL arguments" - train = "dynet/en/srl/train.args" - dev = "dynet/en/srl/dev.args" - test = "dynet/en/srl/test-wsj.args" + train = "/data/nlp/corpora/processors-dynet/en/srl/train.args" + dev = "/data/nlp/corpora/processors-dynet/en/srl/dev.args" + test = "/data/nlp/corpora/processors-dynet/en/srl/test-wsj.args" type = "dual" layers {