From 5f93c727612813a3edca3185f3a801bc9aee9a5b Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 11:32:44 -0500
Subject: [PATCH 01/88] Minor style & documentation updates

---
 parser/corpus.cc | 4 ++--
 parser/corpus.h  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/parser/corpus.cc b/parser/corpus.cc
index 1928cfa..9815f48 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -134,8 +134,8 @@ void TrainingCorpus::OracleTransitionsCorpusReader::LoadCorrectActions(
   };
 
   while (getline(actionsFile, lineS)) {
-    ReplaceStringInPlace(lineS, "-RRB-", "_RRB_");
-    ReplaceStringInPlace(lineS, "-LRB-", "_LRB_");
+    ReplaceStringInPlace(&lineS, "-RRB-", "_RRB_");
+    ReplaceStringInPlace(&lineS, "-LRB-", "_LRB_");
     // An empty line marks the end of a sentence.
     if (lineS.empty()) {
       next_is_action_line = false;
diff --git a/parser/corpus.h b/parser/corpus.h
index 9be1a9d..9a92c0a 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -250,17 +250,17 @@ class TrainingCorpus : public Corpus {
       else return 0;
     }
   private:
-    bool is_training;
+    bool is_training; // can be dev rather than actual training
     void LoadCorrectActions(const std::string& file,
                             TrainingCorpus* corpus) const;
   };
 
-  static inline void ReplaceStringInPlace(std::string& subject,
+  static inline void ReplaceStringInPlace(std::string* subject,
                                           const std::string& search,
                                           const std::string& replace) {
     size_t pos = 0;
-    while ((pos = subject.find(search, pos)) != std::string::npos) {
-      subject.replace(pos, search.length(), replace);
+    while ((pos = subject->find(search, pos)) != std::string::npos) {
+      subject->replace(pos, search.length(), replace);
       pos += replace.length();
     }
   }

From 51cda203336a5ccf247056a97cc205c07ac6b354 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 12:08:41 -0500
Subject: [PATCH 02/88] Made TrainingCorpus an intermediate level in the
 hierarchy

---
 parser/corpus.cc             |  6 +++---
 parser/corpus.h              | 19 +++++++++++++------
 parser/lstm-parser-driver.cc | 16 +++++++++-------
 parser/lstm-parser.cc        | 16 ++++++++--------
 parser/lstm-parser.h         |  7 ++++---
 5 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 9815f48..63b353c 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -79,7 +79,7 @@ void ConllUCorpusReader::ReadSentences(const string& file,
 
 
 
-void TrainingCorpus::CountSingletons() {
+void ParserTrainingCorpus::CountSingletons() {
   // compute the singletons in the parser's training data
   map<unsigned, unsigned> counts;
   for (const auto& sent : sentences) {
@@ -94,8 +94,8 @@ void TrainingCorpus::CountSingletons() {
 }
 
 
-void TrainingCorpus::OracleTransitionsCorpusReader::LoadCorrectActions(
-    const string& file, TrainingCorpus* corpus) const {
+void ParserTrainingCorpus::OracleTransitionsCorpusReader::LoadCorrectActions(
+    const string& file, ParserTrainingCorpus* corpus) const {
   // TODO: break up this function?
   cerr << "Loading " << (is_training ? "training" : "dev")
        << " corpus from " << file << "..." << endl;
diff --git a/parser/corpus.h b/parser/corpus.h
index 9a92c0a..2562f40 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -207,11 +207,17 @@ class Corpus {
   // Corpus for subclasses to inherit and use. Subclasses are then responsible
   // for doing any corpus-reading or setup.
   Corpus(CorpusVocabulary* vocab) : vocab(vocab) {}
-
 };
 
 
 class TrainingCorpus : public Corpus {
+public:
+  TrainingCorpus(CorpusVocabulary* vocab) : Corpus(vocab) {}
+  std::vector<std::vector<unsigned>> correct_act_sent;
+};
+
+
+class ParserTrainingCorpus : public TrainingCorpus {
 public:
   friend class OracleTransitionsCorpusReader;
 
@@ -220,9 +226,9 @@ class TrainingCorpus : public Corpus {
   std::vector<std::vector<unsigned>> correct_act_sent;
   std::set<unsigned> singletons;
 
-  TrainingCorpus(CorpusVocabulary* vocab, const std::string& file,
-                 bool is_training) :
-      Corpus(vocab) {
+  ParserTrainingCorpus(CorpusVocabulary* vocab, const std::string& file,
+                       bool is_training) :
+      TrainingCorpus(vocab) {
     OracleTransitionsCorpusReader reader(is_training);
     reader.ReadSentences(file, this);
   }
@@ -234,7 +240,8 @@ class TrainingCorpus : public Corpus {
         is_training(is_training) {}
 
     virtual void ReadSentences(const std::string& file, Corpus* corpus) const {
-      TrainingCorpus* training_corpus = static_cast<TrainingCorpus *>(corpus);
+      ParserTrainingCorpus* training_corpus =
+          static_cast<ParserTrainingCorpus *>(corpus);
       LoadCorrectActions(file, training_corpus);
     }
 
@@ -252,7 +259,7 @@ class TrainingCorpus : public Corpus {
   private:
     bool is_training; // can be dev rather than actual training
     void LoadCorrectActions(const std::string& file,
-                            TrainingCorpus* corpus) const;
+                            ParserTrainingCorpus* corpus) const;
   };
 
   static inline void ReplaceStringInPlace(std::string* subject,
diff --git a/parser/lstm-parser-driver.cc b/parser/lstm-parser-driver.cc
index 8d6b235..a1caacc 100644
--- a/parser/lstm-parser-driver.cc
+++ b/parser/lstm-parser-driver.cc
@@ -135,7 +135,7 @@ int main(int argc, char** argv) {
     parser.reset(new LSTMParser(cmd_options, words, false));
   }
 
-  unique_ptr<TrainingCorpus> dev_corpus; // shared by train/evaluate
+  unique_ptr<ParserTrainingCorpus> dev_corpus; // shared by train/evaluate
 
   if (train) {
     if (!conf.count("training_data") || !conf.count("dev_data")) {
@@ -145,14 +145,16 @@ int main(int argc, char** argv) {
     }
 
     signal(SIGINT, signal_callback_handler);
-    TrainingCorpus training_corpus(&parser->vocab,
-                                   conf["training_data"].as<string>(), true);
+    ParserTrainingCorpus training_corpus(&parser->vocab,
+                                         conf["training_data"].as<string>(),
+                                         true);
     parser->FinalizeVocab();
     cerr << "Total number of words: " << training_corpus.vocab->CountWords()
          << endl;
     // OOV words will be replaced by UNK tokens
-    dev_corpus.reset(new TrainingCorpus(&parser->vocab,
-                                        conf["dev_data"].as<string>(), false));
+    dev_corpus.reset(
+        new ParserTrainingCorpus(&parser->vocab, conf["dev_data"].as<string>(),
+                                 false));
 
     ostringstream os;
     os << "parser_" << (parser->options.use_pos ? "pos" : "nopos")
@@ -180,8 +182,8 @@ int main(int argc, char** argv) {
     cerr << "Evaluating model on " << conf["dev_data"].as<string>() << endl;
     if (!train) { // Didn't already load dev corpus for training
       dev_corpus.reset(
-          new TrainingCorpus(&parser->vocab, conf["dev_data"].as<string>(),
-                             false));
+          new ParserTrainingCorpus(&parser->vocab,
+                                   conf["dev_data"].as<string>(), false));
     }
     parser->Evaluate(*dev_corpus);
   }
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 790aba0..ad1b1cf 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -447,9 +447,9 @@ void LSTMParser::SaveModel(const string& model_fname, bool softlink_created) {
 }
 
 
-void LSTMParser::Train(const TrainingCorpus& corpus,
-                       const TrainingCorpus& dev_corpus, const double unk_prob,
-                       const string& model_fname,
+void LSTMParser::Train(const ParserTrainingCorpus& corpus,
+                       const ParserTrainingCorpus& dev_corpus,
+                       const double unk_prob, const string& model_fname,
                        const volatile bool* requested_stop) {
   bool softlink_created = false;
   int best_correct_heads = 0;
@@ -627,11 +627,11 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
     }
 
     if (evaluate) {
-      // Downcast to TrainingCorpus to get gold-standard data. We can only get
-      // here if this function was called by Evaluate, which statically checks
-      // that the corpus is in fact a TrainingCorpus, so this cast is safe.
-      const TrainingCorpus& training_corpus =
-          static_cast<const TrainingCorpus&>(corpus);
+      // Downcast to ParserTrainingCorpus to get gold-standard data. We can only
+      // get here if this function was called by Evaluate, which statically
+      // checks that the corpus is in fact a TrainingCorpus, so casting is safe.
+      const ParserTrainingCorpus& training_corpus =
+          static_cast<const ParserTrainingCorpus&>(corpus);
       const vector<unsigned>& actions = training_corpus.correct_act_sent[sii];
       ParseTree ref = RecoverParseTree(sentence, actions, corpus.vocab->actions,
                                        corpus.vocab->actions_to_arc_labels,
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 5d89922..ad2c9b1 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -191,15 +191,16 @@ class LSTMParser {
       const std::vector<std::string>& actions_to_arc_labels,
       bool labeled = false);
 
-  void Train(const TrainingCorpus& corpus, const TrainingCorpus& dev_corpus,
-             const double unk_prob, const std::string& model_fname,
+  void Train(const ParserTrainingCorpus& corpus,
+             const ParserTrainingCorpus& dev_corpus, const double unk_prob,
+             const std::string& model_fname,
              const volatile bool* requested_stop = nullptr);
 
   void Test(const Corpus& corpus) {
     DoTest(corpus, false, true);
   }
 
-  void Evaluate(const TrainingCorpus& corpus, bool output_parses=false) {
+  void Evaluate(const ParserTrainingCorpus& corpus, bool output_parses=false) {
     DoTest(corpus, true, output_parses);
   }
 

From 4423e464a85daa07062a15372b78e0e8bee6d566 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 12:44:20 -0500
Subject: [PATCH 03/88] Unified sentence-reading for oracle transition readers

---
 parser/corpus.cc |  2 +-
 parser/corpus.h  | 34 ++++++++++++++++++++++------------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 63b353c..3e5d532 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -94,7 +94,7 @@ void ParserTrainingCorpus::CountSingletons() {
 }
 
 
-void ParserTrainingCorpus::OracleTransitionsCorpusReader::LoadCorrectActions(
+void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
     const string& file, ParserTrainingCorpus* corpus) const {
   // TODO: break up this function?
   cerr << "Loading " << (is_training ? "training" : "dev")
diff --git a/parser/corpus.h b/parser/corpus.h
index 2562f40..c18fa93 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -212,8 +212,24 @@ class Corpus {
 
 class TrainingCorpus : public Corpus {
 public:
-  TrainingCorpus(CorpusVocabulary* vocab) : Corpus(vocab) {}
   std::vector<std::vector<unsigned>> correct_act_sent;
+
+protected:
+  class OracleTransitionsCorpusReader : public CorpusReader {
+  public:
+    OracleTransitionsCorpusReader(bool is_training) :
+        is_training(is_training) {
+    }
+  protected:
+    bool is_training; // can be dev rather than actual training
+  };
+
+  TrainingCorpus(CorpusVocabulary* vocab,
+                 const OracleTransitionsCorpusReader& reader,
+                 const std::string& file) :
+      Corpus(vocab, reader, file) {}
+
+  TrainingCorpus(CorpusVocabulary* vocab) : Corpus(vocab) {}
 };
 
 
@@ -222,22 +238,17 @@ class ParserTrainingCorpus : public TrainingCorpus {
   friend class OracleTransitionsCorpusReader;
 
   bool USE_SPELLING = false;
-
-  std::vector<std::vector<unsigned>> correct_act_sent;
   std::set<unsigned> singletons;
 
   ParserTrainingCorpus(CorpusVocabulary* vocab, const std::string& file,
                        bool is_training) :
-      TrainingCorpus(vocab) {
-    OracleTransitionsCorpusReader reader(is_training);
-    reader.ReadSentences(file, this);
-  }
+      TrainingCorpus(vocab, OracleParseTransitionsReader(is_training), file) {}
 
 private:
-  class OracleTransitionsCorpusReader : public CorpusReader {
+  class OracleParseTransitionsReader : public OracleTransitionsCorpusReader{
   public:
-    OracleTransitionsCorpusReader(bool is_training) :
-        is_training(is_training) {}
+    OracleParseTransitionsReader(bool is_training) :
+        OracleTransitionsCorpusReader(is_training) {}
 
     virtual void ReadSentences(const std::string& file, Corpus* corpus) const {
       ParserTrainingCorpus* training_corpus =
@@ -245,7 +256,7 @@ class ParserTrainingCorpus : public TrainingCorpus {
       LoadCorrectActions(file, training_corpus);
     }
 
-    virtual ~OracleTransitionsCorpusReader() {};
+    virtual ~OracleParseTransitionsReader() {};
 
     static inline unsigned UTF8Len(unsigned char x) {
       if (x < 0x80) return 1;
@@ -257,7 +268,6 @@ class ParserTrainingCorpus : public TrainingCorpus {
       else return 0;
     }
   private:
-    bool is_training; // can be dev rather than actual training
     void LoadCorrectActions(const std::string& file,
                             ParserTrainingCorpus* corpus) const;
   };

From 5e27f5b2ba7e1ea0e9e1ca3be1c6dc9b0fe8a121 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 17:58:10 -0500
Subject: [PATCH 04/88] Fixed build settings to be better cmake style

---
 CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d065fb4..7d1b1f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,12 @@
 project(lstm-parser)
 cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE RelWithDebInfo)
+endif(NOT CMAKE_BUILD_TYPE)
+
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
-set(CMAKE_CXX_FLAGS "-Wall -std=c++11 -O3 -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
 
 enable_testing()
 

From 1514d35cae0576a8b36a46ff6208256b4ab33eac Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 18:07:06 -0500
Subject: [PATCH 05/88] Split up transition-reading code

Shortens function; also allows it to be reused for other
transition-based NLP systems.
---
 parser/corpus.cc      | 165 +++++++++++++++++++++++-------------------
 parser/corpus.h       |  26 +++++--
 parser/lstm-parser.cc |   3 +-
 3 files changed, 111 insertions(+), 83 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 3e5d532..4214e8e 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -94,9 +94,93 @@ void ParserTrainingCorpus::CountSingletons() {
 }
 
 
+void ParserTrainingCorpus::OracleParseTransitionsReader::RecordWord(
+    const string& word, const string& pos, unsigned next_token_index,
+    CorpusVocabulary* vocab, ParserTrainingCorpus* corpus,
+    map<unsigned, unsigned>* sentence, map<unsigned, unsigned>* sentence_pos,
+    map<unsigned, string>* sentence_unk_surface_forms) const {
+  // We assume that we'll have seen all POS tags in training, so don't
+  // worry about OOV tags.
+  unsigned pos_id = vocab->GetOrAddEntry(pos, &vocab->pos_to_int,
+                                         &vocab->int_to_pos);
+
+  unsigned word_id;
+  if (is_training) {
+    unsigned num_words = vocab->CountWords(); // store for later check
+    word_id = vocab->GetOrAddWord(word, true);
+    if (vocab->CountWords() > num_words) {
+      // A new word was added; add its chars, too.
+      unsigned j = 0;
+      while (j < word.length()) {
+        unsigned char_utf8_len = UTF8Len(word[j]);
+        string next_utf8_char = word.substr(j, char_utf8_len);
+        vocab->GetOrAddEntry(next_utf8_char, &vocab->chars_to_int,
+                             &vocab->int_to_chars);
+        j += char_utf8_len;
+      }
+    } else {
+      // It's an old word. Make sure it's marked as present in training.
+      vocab->int_to_training_word[word_id] = true;
+    }
+  } else {
+    // add an empty string for any token except OOVs (it is easy to
+    // recover the surface form of non-OOV using intToWords(id)).
+    // OOV word
+    if (corpus->USE_SPELLING) {
+      word_id = vocab->GetOrAddWord(word); // don't record as training
+      (*sentence_unk_surface_forms)[next_token_index] = "";
+    } else {
+      auto word_iter = vocab->words_to_int.find(word);
+      if (word_iter == vocab->words_to_int.end()) {
+        // Save the surface form of this OOV.
+        (*sentence_unk_surface_forms)[next_token_index] = word;
+        word_id = vocab->words_to_int[vocab->UNK];
+      } else {
+        (*sentence_unk_surface_forms)[next_token_index] = "";
+        word_id = word_iter->second;
+      }
+    }
+  }
+
+  (*sentence)[next_token_index] = word_id;
+  (*sentence_pos)[next_token_index] = pos_id;
+}
+
+void ParserTrainingCorpus::OracleParseTransitionsReader::RecordAction(
+    const string& action, bool start_of_sentence, CorpusVocabulary* vocab,
+    ParserTrainingCorpus* corpus) const {
+  auto PushAction = // should be inlined; defined here for DRY reasons
+      [corpus, start_of_sentence](unsigned action_index) {
+    if (start_of_sentence)
+      corpus->correct_act_sent.push_back( {action_index} );
+    else
+      corpus->correct_act_sent.back().push_back(action_index);
+  };
+
+  auto action_iter = find(vocab->actions.begin(), vocab->actions.end(), action);
+  if (action_iter != vocab->actions.end()) {
+    unsigned action_index = distance(vocab->actions.begin(), action_iter);
+    PushAction(action_index);
+  } else { // A not-previously-seen action
+    if (is_training) {
+      vocab->actions.push_back(action);
+      unsigned action_index = vocab->actions.size() - 1;
+      PushAction(action_index);
+    } else {
+      // TODO: right now, new actions which haven't been observed in
+      // training are not added to correct_act_sent. In dev, this may
+      // be a problem if there is little training data.
+      cerr << "WARNING: encountered unknown transition in dev corpus: "
+           << action << endl;
+      if (start_of_sentence)
+      corpus->correct_act_sent.push_back({});
+    }
+  }
+}
+
+
 void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
     const string& file, ParserTrainingCorpus* corpus) const {
-  // TODO: break up this function?
   cerr << "Loading " << (is_training ? "training" : "dev")
        << " corpus from " << file << "..." << endl;
   ifstream actionsFile(file);
@@ -192,84 +276,14 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
             pos = CorpusVocabulary::ROOT;
           }
 
-          // We assume that we'll have seen all POS tags in training, so don't
-          // worry about OOV tags.
-          unsigned pos_id = vocab->GetOrAddEntry(pos, &vocab->pos_to_int,
-                                                 &vocab->int_to_pos);
           // Use 1-indexed token IDs to leave room for ROOT in position 0.
           unsigned next_token_index = sentence.size() + 1;
-          unsigned word_id;
-          if (is_training) {
-            unsigned num_words = vocab->CountWords(); // store for later check
-            word_id = vocab->GetOrAddWord(word, true);
-            if (vocab->CountWords() > num_words) {
-              // A new word was added; add its chars, too.
-              unsigned j = 0;
-              while (j < word.length()) {
-                unsigned char_utf8_len = UTF8Len(word[j]);
-                string next_utf8_char = word.substr(j, char_utf8_len);
-                vocab->GetOrAddEntry(next_utf8_char, &vocab->chars_to_int,
-                                     &vocab->int_to_chars);
-                j += char_utf8_len;
-              }
-            } else {
-              // It's an old word. Make sure it's marked as present in training.
-              vocab->int_to_training_word[word_id] = true;
-            }
-          } else {
-            // add an empty string for any token except OOVs (it is easy to
-            // recover the surface form of non-OOV using intToWords(id)).
-            // OOV word
-            if (corpus->USE_SPELLING) {
-              word_id = vocab->GetOrAddWord(word); // don't record as training
-              sentence_unk_surface_forms[next_token_index] = "";
-            } else {
-              auto word_iter = vocab->words_to_int.find(word);
-              if (word_iter == vocab->words_to_int.end()) {
-                // Save the surface form of this OOV.
-                sentence_unk_surface_forms[next_token_index] = word;
-                word_id = vocab->words_to_int[vocab->UNK];
-              } else {
-                sentence_unk_surface_forms[next_token_index] = "";
-                word_id = word_iter->second;
-              }
-            }
-          }
-
-          sentence[next_token_index] = word_id;
-          sentence_pos[next_token_index] = pos_id;
+          RecordWord(word, pos, next_token_index, vocab, corpus, &sentence,
+                     &sentence_pos, &sentence_unk_surface_forms);
         } while (iss);
       }
-    } else if (next_is_action_line) {
-      auto action_iter = find(vocab->actions.begin(), vocab->actions.end(),
-                              lineS);
-      if (action_iter != vocab->actions.end()) {
-        unsigned action_index = distance(vocab->actions.begin(), action_iter);
-        if (start_of_sentence)
-          corpus->correct_act_sent.push_back({action_index});
-        else
-          corpus->correct_act_sent.back().push_back(action_index);
-      } else { // A not-previously-seen action
-        if (is_training) {
-          vocab->actions.push_back(lineS);
-          vocab->actions_to_arc_labels.push_back(
-              vocab->GetLabelForAction(lineS));
-
-          unsigned action_index = vocab->actions.size() - 1;
-          if (start_of_sentence)
-            corpus->correct_act_sent.push_back({action_index});
-          else
-            corpus->correct_act_sent.back().push_back(action_index);
-        } else {
-          // TODO: right now, new actions which haven't been observed in
-          // training are not added to correct_act_sent. In dev, this may
-          // be a problem if there is little training data.
-          cerr << "WARNING: encountered unknown transition in dev corpus: "
-               << lineS << endl;
-          if (start_of_sentence)
-            corpus->correct_act_sent.push_back({});
-        }
-      }
+    } else { // next_is_action_line
+      RecordAction(lineS, start_of_sentence, vocab, corpus);
       start_of_sentence = false;
     }
 
@@ -292,6 +306,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   cerr << "done." << "\n";
   if (is_training) {
     for (auto a : vocab->actions) {
+      vocab->actions_to_arc_labels.push_back(vocab->GetLabelForAction(a));
       cerr << a << "\n";
     }
   }
diff --git a/parser/corpus.h b/parser/corpus.h
index c18fa93..05710ac 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -91,7 +91,7 @@ class CorpusVocabulary {
     }
   }
 
-  static inline std::string GetLabelForAction(const std::string& action) {
+  virtual std::string GetLabelForAction(const std::string& action) {
     if (boost::starts_with(action, "RIGHT-ARC") ||
         boost::starts_with(action, "LEFT-ARC")) {
       size_t first_char_in_rel = action.find('(') + 1;
@@ -224,11 +224,8 @@ class TrainingCorpus : public Corpus {
     bool is_training; // can be dev rather than actual training
   };
 
-  TrainingCorpus(CorpusVocabulary* vocab,
-                 const OracleTransitionsCorpusReader& reader,
-                 const std::string& file) :
-      Corpus(vocab, reader, file) {}
-
+  // Don't provide access to reader constructor -- object won't be fully
+  // constructed yet, so it would segfault.
   TrainingCorpus(CorpusVocabulary* vocab) : Corpus(vocab) {}
 };
 
@@ -242,7 +239,9 @@ class ParserTrainingCorpus : public TrainingCorpus {
 
   ParserTrainingCorpus(CorpusVocabulary* vocab, const std::string& file,
                        bool is_training) :
-      TrainingCorpus(vocab, OracleParseTransitionsReader(is_training), file) {}
+      TrainingCorpus(vocab) {
+    OracleParseTransitionsReader(is_training).ReadSentences(file, this);
+  }
 
 private:
   class OracleParseTransitionsReader : public OracleTransitionsCorpusReader{
@@ -267,6 +266,19 @@ class ParserTrainingCorpus : public TrainingCorpus {
       else if ((x >> 1) == 0x7e) return 6;
       else return 0;
     }
+
+  protected:
+    void RecordWord(
+        const std::string& word, const std::string& pos,
+        unsigned next_token_index, CorpusVocabulary* vocab,
+        ParserTrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
+        std::map<unsigned, unsigned>* sentence_pos,
+        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
+
+    void RecordAction(const std::string& action, bool start_of_sentence,
+                      CorpusVocabulary* vocab,
+                      ParserTrainingCorpus* corpus) const;
+
   private:
     void LoadCorrectActions(const std::string& file,
                             ParserTrainingCorpus* corpus) const;
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index ad1b1cf..720f323 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -629,7 +629,8 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
     if (evaluate) {
       // Downcast to ParserTrainingCorpus to get gold-standard data. We can only
       // get here if this function was called by Evaluate, which statically
-      // checks that the corpus is in fact a TrainingCorpus, so casting is safe.
+      // checks that the corpus is in fact a ParserTrainingCorpus, so this cast
+      // is safe.
       const ParserTrainingCorpus& training_corpus =
           static_cast<const ParserTrainingCorpus&>(corpus);
       const vector<unsigned>& actions = training_corpus.correct_act_sent[sii];

From a49a559d4433ffcced8f384eb6d96d4b78e98173 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 18:24:01 -0500
Subject: [PATCH 06/88] Moved string replacement function

---
 parser/corpus.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 05710ac..1098c00 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -220,6 +220,16 @@ class TrainingCorpus : public Corpus {
     OracleTransitionsCorpusReader(bool is_training) :
         is_training(is_training) {
     }
+
+    static inline void ReplaceStringInPlace(std::string* subject,
+                                            const std::string& search,
+                                            const std::string& replace) {
+      size_t pos = 0;
+      while ((pos = subject->find(search, pos)) != std::string::npos) {
+        subject->replace(pos, search.length(), replace);
+        pos += replace.length();
+      }
+    }
   protected:
     bool is_training; // can be dev rather than actual training
   };
@@ -284,16 +294,6 @@ class ParserTrainingCorpus : public TrainingCorpus {
                             ParserTrainingCorpus* corpus) const;
   };
 
-  static inline void ReplaceStringInPlace(std::string* subject,
-                                          const std::string& search,
-                                          const std::string& replace) {
-    size_t pos = 0;
-    while ((pos = subject->find(search, pos)) != std::string::npos) {
-      subject->replace(pos, search.length(), replace);
-      pos += replace.length();
-    }
-  }
-
   void CountSingletons();
 };
 

From 39d0efcae5c7e09fca0b021a4b56c2217b2829a1 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 18:39:56 -0500
Subject: [PATCH 07/88] Moved recording functions into base training corpus
 reader

Also some corrections for consistent variable naming conventions
---
 parser/corpus.cc | 67 ++++++++++++++++++++++++++----------------------
 parser/corpus.h  | 52 ++++++++++++++++++++-----------------
 2 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 4214e8e..1aa0845 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -78,7 +78,6 @@ void ConllUCorpusReader::ReadSentences(const string& file,
 }
 
 
-
 void ParserTrainingCorpus::CountSingletons() {
   // compute the singletons in the parser's training data
   map<unsigned, unsigned> counts;
@@ -94,9 +93,9 @@ void ParserTrainingCorpus::CountSingletons() {
 }
 
 
-void ParserTrainingCorpus::OracleParseTransitionsReader::RecordWord(
+void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
     const string& word, const string& pos, unsigned next_token_index,
-    CorpusVocabulary* vocab, ParserTrainingCorpus* corpus,
+    CorpusVocabulary* vocab, TrainingCorpus* corpus,
     map<unsigned, unsigned>* sentence, map<unsigned, unsigned>* sentence_pos,
     map<unsigned, string>* sentence_unk_surface_forms) const {
   // We assume that we'll have seen all POS tags in training, so don't
@@ -146,9 +145,10 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::RecordWord(
   (*sentence_pos)[next_token_index] = pos_id;
 }
 
-void ParserTrainingCorpus::OracleParseTransitionsReader::RecordAction(
+
+void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
     const string& action, bool start_of_sentence, CorpusVocabulary* vocab,
-    ParserTrainingCorpus* corpus) const {
+    TrainingCorpus* corpus) const {
   auto PushAction = // should be inlined; defined here for DRY reasons
       [corpus, start_of_sentence](unsigned action_index) {
     if (start_of_sentence)
@@ -179,12 +179,29 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::RecordAction(
 }
 
 
+void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
+    TrainingCorpus* corpus, map<unsigned, unsigned>* sentence,
+    map<unsigned, unsigned>* sentence_pos,
+    map<unsigned, string>* sentence_unk_surface_forms) const {
+  // Store the sentence variables and clear them for the next sentence.
+  corpus->sentences.push_back({});
+  corpus->sentences.back().swap(*sentence);
+  corpus->sentences_pos.push_back({});
+  corpus->sentences_pos.back().swap(*sentence_pos);
+  if (!is_training) {
+    corpus->sentences_unk_surface_forms.push_back({});
+    corpus->sentences_unk_surface_forms.back().swap(
+        *sentence_unk_surface_forms);
+  }
+}
+
+
 void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
     const string& file, ParserTrainingCorpus* corpus) const {
   cerr << "Loading " << (is_training ? "training" : "dev")
        << " corpus from " << file << "..." << endl;
-  ifstream actionsFile(file);
-  string lineS;
+  ifstream actions_file(file);
+  string line;
   CorpusVocabulary* vocab = corpus->vocab;
 
   bool next_is_action_line = false;
@@ -217,24 +234,16 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
     }
   };
 
-  while (getline(actionsFile, lineS)) {
-    ReplaceStringInPlace(&lineS, "-RRB-", "_RRB_");
-    ReplaceStringInPlace(&lineS, "-LRB-", "_LRB_");
+  while (getline(actions_file, line)) {
+    ReplaceStringInPlace(&line, "-RRB-", "_RRB_");
+    ReplaceStringInPlace(&line, "-LRB-", "_LRB_");
     // An empty line marks the end of a sentence.
-    if (lineS.empty()) {
+    if (line.empty()) {
       next_is_action_line = false;
       if (!first) { // if first, first line is blank, but no sentence yet
         FixRootID();
-        // Store the sentence variables and clear them for the next sentence.
-        corpus->sentences.push_back({});
-        corpus->sentences.back().swap(sentence);
-        corpus->sentences_pos.push_back({});
-        corpus->sentences_pos.back().swap(sentence_pos);
-        if (!is_training) {
-          corpus->sentences_unk_surface_forms.push_back({});
-          corpus->sentences_unk_surface_forms.back().swap(
-              sentence_unk_surface_forms);
-        }
+        RecordSentence(corpus, &sentence, &sentence_pos,
+                       &sentence_unk_surface_forms);
       }
       start_of_sentence = true;
       continue; // don't update next_is_action_line
@@ -247,9 +256,9 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
         // the initial line in each sentence should look like:
         // [][the-det, cat-noun, is-verb, on-adp, the-det, mat-noun, ,-punct, ROOT-ROOT]
         // first, get rid of the square brackets.
-        lineS = lineS.substr(3, lineS.size() - 4);
+        line = line.substr(3, line.size() - 4);
         // read the initial line, token by token "the-det," "cat-noun," ...
-        istringstream iss(lineS);
+        istringstream iss(line);
         do {
           string word;
           iss >> word;
@@ -283,7 +292,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
         } while (iss);
       }
     } else { // next_is_action_line
-      RecordAction(lineS, start_of_sentence, vocab, corpus);
+      RecordAction(line, start_of_sentence, vocab, corpus);
       start_of_sentence = false;
     }
 
@@ -293,15 +302,11 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   // Add the last sentence.
   if (sentence.size() > 0) {
     FixRootID();
-    corpus->sentences.push_back(move(sentence));
-    corpus->sentences_pos.push_back(move(sentence_pos));
-    if (!is_training) {
-      corpus->sentences_unk_surface_forms.push_back(
-          move(sentence_unk_surface_forms));
-    }
+    RecordSentence(corpus, &sentence, &sentence_pos,
+                   &sentence_unk_surface_forms);
   }
 
-  actionsFile.close();
+  actions_file.close();
 
   cerr << "done." << "\n";
   if (is_training) {
diff --git a/parser/corpus.h b/parser/corpus.h
index 1098c00..c80659d 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -213,6 +213,7 @@ class Corpus {
 class TrainingCorpus : public Corpus {
 public:
   std::vector<std::vector<unsigned>> correct_act_sent;
+  bool USE_SPELLING = false;
 
 protected:
   class OracleTransitionsCorpusReader : public CorpusReader {
@@ -230,8 +231,34 @@ class TrainingCorpus : public Corpus {
         pos += replace.length();
       }
     }
+
   protected:
     bool is_training; // can be dev rather than actual training
+
+    void RecordWord(
+        const std::string& word, const std::string& pos,
+        unsigned next_token_index, CorpusVocabulary* vocab,
+        TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
+        std::map<unsigned, unsigned>* sentence_pos,
+        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
+
+    void RecordAction(const std::string& action, bool start_of_sentence,
+                      CorpusVocabulary* vocab, TrainingCorpus* corpus) const;
+
+    void RecordSentence(
+        TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
+        std::map<unsigned, unsigned>* sentence_pos,
+        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
+
+    static inline unsigned UTF8Len(unsigned char x) {
+      if (x < 0x80) return 1;
+      else if ((x >> 5) == 0x06) return 2;
+      else if ((x >> 4) == 0x0e) return 3;
+      else if ((x >> 3) == 0x1e) return 4;
+      else if ((x >> 2) == 0x3e) return 5;
+      else if ((x >> 1) == 0x7e) return 6;
+      else return 0;
+    }
   };
 
   // Don't provide access to reader constructor -- object won't be fully
@@ -244,7 +271,6 @@ class ParserTrainingCorpus : public TrainingCorpus {
 public:
   friend class OracleTransitionsCorpusReader;
 
-  bool USE_SPELLING = false;
   std::set<unsigned> singletons;
 
   ParserTrainingCorpus(CorpusVocabulary* vocab, const std::string& file,
@@ -261,34 +287,12 @@ class ParserTrainingCorpus : public TrainingCorpus {
 
     virtual void ReadSentences(const std::string& file, Corpus* corpus) const {
       ParserTrainingCorpus* training_corpus =
-          static_cast<ParserTrainingCorpus *>(corpus);
+          static_cast<ParserTrainingCorpus*>(corpus);
       LoadCorrectActions(file, training_corpus);
     }
 
     virtual ~OracleParseTransitionsReader() {};
 
-    static inline unsigned UTF8Len(unsigned char x) {
-      if (x < 0x80) return 1;
-      else if ((x >> 5) == 0x06) return 2;
-      else if ((x >> 4) == 0x0e) return 3;
-      else if ((x >> 3) == 0x1e) return 4;
-      else if ((x >> 2) == 0x3e) return 5;
-      else if ((x >> 1) == 0x7e) return 6;
-      else return 0;
-    }
-
-  protected:
-    void RecordWord(
-        const std::string& word, const std::string& pos,
-        unsigned next_token_index, CorpusVocabulary* vocab,
-        ParserTrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
-        std::map<unsigned, unsigned>* sentence_pos,
-        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
-
-    void RecordAction(const std::string& action, bool start_of_sentence,
-                      CorpusVocabulary* vocab,
-                      ParserTrainingCorpus* corpus) const;
-
   private:
     void LoadCorrectActions(const std::string& file,
                             ParserTrainingCorpus* corpus) const;

From 40ec8b8b185fc16d3ececc8e477df6f1c58d5caf Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 20:26:25 -0500
Subject: [PATCH 08/88] Style fix

---
 parser/corpus.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 1aa0845..3696238 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -270,14 +270,14 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
             word = word.substr(0, word.size() - 1);
           }
           // split the string (at '-') into word and POS tag.
-          size_t posIndex = word.rfind('-');
-          if (posIndex == string::npos) {
+          size_t pos_index = word.rfind('-');
+          if (pos_index == string::npos) {
             cerr << "can't find the dash in '" << word << "'"
                  << endl;
           }
-          assert(posIndex != string::npos);
-          string pos = word.substr(posIndex + 1);
-          word = word.substr(0, posIndex);
+          assert(pos_index != string::npos);
+          string pos = word.substr(pos_index + 1);
+          word = word.substr(0, pos_index);
 
           if (pos == ORACLE_ROOT_POS) {
             // Prevent any confusion with the actual word "ROOT".

From 04013830158b62e9b5853b6bb3a063f1d967bfd0 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 20:57:42 -0500
Subject: [PATCH 09/88] Indentation fix

---
 parser/corpus.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 3696238..7470d5b 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -173,7 +173,7 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
       cerr << "WARNING: encountered unknown transition in dev corpus: "
            << action << endl;
       if (start_of_sentence)
-      corpus->correct_act_sent.push_back({});
+        corpus->correct_act_sent.push_back({});
     }
   }
 }

From dfd6a6e3ee8bf7b715b93c88723d169beaea32df Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 22:07:20 -0500
Subject: [PATCH 10/88] Cleaned up oracle action recording a bit

---
 parser/corpus.cc | 26 ++++++++++----------------
 parser/corpus.h  |  7 ++++---
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 7470d5b..9c299da 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -147,33 +147,23 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
 
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
-    const string& action, bool start_of_sentence, CorpusVocabulary* vocab,
+    const string& action, CorpusVocabulary* vocab,
     TrainingCorpus* corpus) const {
-  auto PushAction = // should be inlined; defined here for DRY reasons
-      [corpus, start_of_sentence](unsigned action_index) {
-    if (start_of_sentence)
-      corpus->correct_act_sent.push_back( {action_index} );
-    else
-      corpus->correct_act_sent.back().push_back(action_index);
-  };
-
   auto action_iter = find(vocab->actions.begin(), vocab->actions.end(), action);
   if (action_iter != vocab->actions.end()) {
     unsigned action_index = distance(vocab->actions.begin(), action_iter);
-    PushAction(action_index);
+    corpus->correct_act_sent.back().push_back(action_index);
   } else { // A not-previously-seen action
     if (is_training) {
       vocab->actions.push_back(action);
       unsigned action_index = vocab->actions.size() - 1;
-      PushAction(action_index);
+      corpus->correct_act_sent.back().push_back(action_index);
     } else {
       // TODO: right now, new actions which haven't been observed in
       // training are not added to correct_act_sent. In dev, this may
       // be a problem if there is little training data.
       cerr << "WARNING: encountered unknown transition in dev corpus: "
            << action << endl;
-      if (start_of_sentence)
-        corpus->correct_act_sent.push_back({});
     }
   }
 }
@@ -182,7 +172,7 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
     TrainingCorpus* corpus, map<unsigned, unsigned>* sentence,
     map<unsigned, unsigned>* sentence_pos,
-    map<unsigned, string>* sentence_unk_surface_forms) const {
+    map<unsigned, string>* sentence_unk_surface_forms, bool final) const {
   // Store the sentence variables and clear them for the next sentence.
   corpus->sentences.push_back({});
   corpus->sentences.back().swap(*sentence);
@@ -193,6 +183,9 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
     corpus->sentences_unk_surface_forms.back().swap(
         *sentence_unk_surface_forms);
   }
+  if (!final) {
+    corpus->correct_act_sent.push_back({});
+  }
 }
 
 
@@ -211,6 +204,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   map<unsigned, unsigned> sentence;
   map<unsigned, unsigned> sentence_pos;
   map<unsigned, string> sentence_unk_surface_forms;
+  corpus->correct_act_sent.push_back({});
 
   // We'll need to make sure ROOT token has a consistent ID.
   // (Should get inlined; defined here for DRY purposes.)
@@ -292,7 +286,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
         } while (iss);
       }
     } else { // next_is_action_line
-      RecordAction(line, start_of_sentence, vocab, corpus);
+      RecordAction(line, vocab, corpus);
       start_of_sentence = false;
     }
 
@@ -303,7 +297,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   if (sentence.size() > 0) {
     FixRootID();
     RecordSentence(corpus, &sentence, &sentence_pos,
-                   &sentence_unk_surface_forms);
+                   &sentence_unk_surface_forms, true);
   }
 
   actions_file.close();
diff --git a/parser/corpus.h b/parser/corpus.h
index c80659d..a7d1b09 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -242,13 +242,14 @@ class TrainingCorpus : public Corpus {
         std::map<unsigned, unsigned>* sentence_pos,
         std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
 
-    void RecordAction(const std::string& action, bool start_of_sentence,
-                      CorpusVocabulary* vocab, TrainingCorpus* corpus) const;
+    void RecordAction(const std::string& action, CorpusVocabulary* vocab,
+                      TrainingCorpus* corpus) const;
 
     void RecordSentence(
         TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
         std::map<unsigned, unsigned>* sentence_pos,
-        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
+        std::map<unsigned, std::string>* sentence_unk_surface_forms,
+        bool final = false) const;
 
     static inline unsigned UTF8Len(unsigned char x) {
       if (x < 0x80) return 1;

From 1991a7595fd4f9cbac498bce3dfc131d199d6a01 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 22:30:36 -0500
Subject: [PATCH 11/88] Switched action-to-label to be more general and use
 boost::regex

---
 CMakeLists.txt   |  2 +-
 parser/corpus.cc |  5 +++++
 parser/corpus.h  | 14 +++++++-------
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7d1b1f1..a4e62ba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ if(DEFINED ENV{BOOST_ROOT})
   set(Boost_NO_SYSTEM_PATHS ON)
 endif()
 set(Boost_REALPATH ON)
-find_package(Boost COMPONENTS program_options serialization iostreams REQUIRED)
+find_package(Boost COMPONENTS program_options serialization iostreams regex REQUIRED)
 include_directories(${Boost_INCLUDE_DIR})
 set(LIBS ${LIBS} ${Boost_LIBRARIES})
 
diff --git a/parser/corpus.cc b/parser/corpus.cc
index 9c299da..37e14ed 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -16,6 +16,11 @@ constexpr unsigned Corpus::ROOT_TOKEN_ID;
 const string CorpusVocabulary::BAD0 = "<BAD0>";
 const string CorpusVocabulary::UNK = "<UNK>";
 const string CorpusVocabulary::ROOT = "<ROOT>";
+// We assume that actions with arcs will be of the form
+// "action-name(arc-label)". Allow any non-paren characters, followed by the
+// label name in parens. (Group 1 is the label name.)
+const boost::regex CorpusVocabulary::ARC_ACTION_REGEX(
+    {"[^\\(\\)]+\\(([^\\(\\)]+)\\)"});
 const string ORACLE_ROOT_POS = "ROOT";
 
 
diff --git a/parser/corpus.h b/parser/corpus.h
index a7d1b09..cf00a8f 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -2,6 +2,7 @@
 #define CORPUS_H
 
 #include <boost/algorithm/string/predicate.hpp>
+#include <boost/regex.hpp>
 #include <boost/serialization/split_member.hpp>
 #include <exception>
 #include <stddef.h>
@@ -91,13 +92,10 @@ class CorpusVocabulary {
     }
   }
 
-  virtual std::string GetLabelForAction(const std::string& action) {
-    if (boost::starts_with(action, "RIGHT-ARC") ||
-        boost::starts_with(action, "LEFT-ARC")) {
-      size_t first_char_in_rel = action.find('(') + 1;
-      size_t last_char_in_rel = action.rfind(')') - 1;
-      return action.substr(
-          first_char_in_rel, last_char_in_rel - first_char_in_rel + 1);
+  static inline std::string GetLabelForAction(const std::string& action) {
+    boost::smatch match;
+    if (boost::regex_search(action, match, ARC_ACTION_REGEX)) {
+      return match[1];
     } else {
       return "NONE";
     }
@@ -106,6 +104,8 @@ class CorpusVocabulary {
 private:
   friend class boost::serialization::access;
 
+  static const boost::regex ARC_ACTION_REGEX;
+
   template<class Archive, class VocabType>
   // Shared code: serialize the number-to-string mappings, from which the
   // reverse mappings can be reconstructed.

From 566cf5789a31f500074cff7bbd2c04a66a4176cb Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 15 Jan 2017 23:17:11 -0500
Subject: [PATCH 12/88] Removed unnecessary record fn params; added vocab copy
 ctor

---
 parser/corpus.cc | 14 ++++++++------
 parser/corpus.h  | 16 ++++++++++++----
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 37e14ed..3daf5a0 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -100,11 +100,12 @@ void ParserTrainingCorpus::CountSingletons() {
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
     const string& word, const string& pos, unsigned next_token_index,
-    CorpusVocabulary* vocab, TrainingCorpus* corpus,
-    map<unsigned, unsigned>* sentence, map<unsigned, unsigned>* sentence_pos,
+    TrainingCorpus* corpus, map<unsigned, unsigned>* sentence,
+    map<unsigned, unsigned>* sentence_pos,
     map<unsigned, string>* sentence_unk_surface_forms) const {
   // We assume that we'll have seen all POS tags in training, so don't
   // worry about OOV tags.
+  CorpusVocabulary* vocab = corpus->vocab;
   unsigned pos_id = vocab->GetOrAddEntry(pos, &vocab->pos_to_int,
                                          &vocab->int_to_pos);
 
@@ -152,8 +153,8 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
 
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
-    const string& action, CorpusVocabulary* vocab,
-    TrainingCorpus* corpus) const {
+    const string& action, TrainingCorpus* corpus) const {
+  CorpusVocabulary* vocab = corpus->vocab;
   auto action_iter = find(vocab->actions.begin(), vocab->actions.end(), action);
   if (action_iter != vocab->actions.end()) {
     unsigned action_index = distance(vocab->actions.begin(), action_iter);
@@ -163,6 +164,7 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
       vocab->actions.push_back(action);
       unsigned action_index = vocab->actions.size() - 1;
       corpus->correct_act_sent.back().push_back(action_index);
+      vocab->actions_to_arc_labels.push_back(vocab->GetLabelForAction(action));
     } else {
       // TODO: right now, new actions which haven't been observed in
       // training are not added to correct_act_sent. In dev, this may
@@ -286,12 +288,12 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
 
           // Use 1-indexed token IDs to leave room for ROOT in position 0.
           unsigned next_token_index = sentence.size() + 1;
-          RecordWord(word, pos, next_token_index, vocab, corpus, &sentence,
+          RecordWord(word, pos, next_token_index, corpus, &sentence,
                      &sentence_pos, &sentence_unk_surface_forms);
         } while (iss);
       }
     } else { // next_is_action_line
-      RecordAction(line, vocab, corpus);
+      RecordAction(line, corpus);
       start_of_sentence = false;
     }
 
diff --git a/parser/corpus.h b/parser/corpus.h
index cf00a8f..bdf036d 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -44,6 +44,15 @@ class CorpusVocabulary {
     AddEntry(BAD0, &chars_to_int, &int_to_chars);
   }
 
+  // Copy constructor: Copy everything except action-related stuff, on the
+  // assumption that we're copying the vocabulary for use in another task with
+  // different actions.
+  CorpusVocabulary(const CorpusVocabulary& other) :
+      words_to_int(other.words_to_int), int_to_words(other.int_to_words),
+      int_to_training_word(other.int_to_training_word),
+      pos_to_int(other.pos_to_int), int_to_pos(other.int_to_pos),
+      chars_to_int(other.chars_to_int), int_to_chars(other.int_to_chars) {}
+
   inline unsigned CountPOS() { return pos_to_int.size(); }
   inline unsigned CountWords() { return words_to_int.size(); }
   inline unsigned CountChars() { return chars_to_int.size(); }
@@ -237,13 +246,12 @@ class TrainingCorpus : public Corpus {
 
     void RecordWord(
         const std::string& word, const std::string& pos,
-        unsigned next_token_index, CorpusVocabulary* vocab,
-        TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
+        unsigned next_token_index, TrainingCorpus* corpus,
+        std::map<unsigned, unsigned>* sentence,
         std::map<unsigned, unsigned>* sentence_pos,
         std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
 
-    void RecordAction(const std::string& action, CorpusVocabulary* vocab,
-                      TrainingCorpus* corpus) const;
+    void RecordAction(const std::string& action, TrainingCorpus* corpus) const;
 
     void RecordSentence(
         TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,

From 96fadfc1788f7f041ce84c29fc5c18bfd5279f5c Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Fri, 20 Jan 2017 10:53:33 -0500
Subject: [PATCH 13/88] Fixed training command in README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b7d53bd..369808e 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Given a `training.conll` file and a `development.conll` formatted according to t
     java -jar ParserOracleArcStdWithSwap.jar -t -1 -l 1 -c training.conll > trainingOracle.txt
     java -jar ParserOracleArcStdWithSwap.jar -t -1 -l 1 -c development.conll > devOracle.txt
 
-    parser/lstm-parse -P -t trainingOracle.txt -d devOracle.txt --hidden_dim 100 --lstm_input_dim 100 -w sskip.100.vectors --pretrained_dim 100 --rel_dim 20 --action_dim 20
+    parser/lstm-parse --train -t trainingOracle.txt -d devOracle.txt --hidden_dim 100 --lstm_input_dim 100 --words sskip.100.vectors --rel_dim 20 --action_dim 20 --use_pos_tags
 
 Link to the word vectors used in the ACL 2015 paper for English:  [sskip.100.vectors](https://drive.google.com/file/d/0B8nESzOdPhLsdWF2S1Ayb1RkTXc/view?usp=sharing).
 

From 9fc237dfb07bca04b0c35d6c6fa6935e3cf86e5e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 24 Jan 2017 14:15:00 -0500
Subject: [PATCH 14/88] Correct-count pointer for logprob defaults to null

---
 parser/lstm-parser.cc | 2 +-
 parser/lstm-parser.h  | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 720f323..bea19b9 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -339,7 +339,7 @@ vector<unsigned> LSTMParser::LogProbParser(
     // If we have reference actions (for training), use the reference action.
     if (build_training_graph) {
       action = correct_actions[action_count];
-      if (best_a == action) {
+      if (correct && best_a == action) {
         (*correct)++;
       }
     }
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index ad2c9b1..5ec3d2d 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -181,7 +181,8 @@ class LSTMParser {
 
   ParseTree Parse(const std::map<unsigned, unsigned>& sentence,
                   const std::map<unsigned, unsigned>& sentence_pos,
-                  const CorpusVocabulary& vocab, bool labeled, double* correct);
+                  const CorpusVocabulary& vocab, bool labeled,
+                  double* correct = nullptr);
 
   // take a vector of actions and return a parse tree
   ParseTree RecoverParseTree(
@@ -209,7 +210,7 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentence,
       const std::map<unsigned, unsigned>& sentence_pos,
       const CorpusVocabulary& vocab, cnn::ComputationGraph *cg,
-      double* correct);
+      double* correct = nullptr);
 
   void LoadPretrainedWords(const std::string& words_path);
 
@@ -230,7 +231,7 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentPos,
       const std::vector<unsigned>& correct_actions,
       const std::vector<std::string>& action_names,
-      const std::vector<std::string>& int_to_words, double* right);
+      const std::vector<std::string>& int_to_words, double* correct);
 
   void SaveModel(const std::string& model_fname, bool softlink_created);
 

From 651a3cafc8f168db9289d3f67dc6994732154f5e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 24 Jan 2017 17:02:07 -0500
Subject: [PATCH 15/88] Minor variable name cleanup for consistency

---
 parser/lstm-parser.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index bea19b9..5a89940 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -348,16 +348,16 @@ vector<unsigned> LSTMParser::LogProbParser(
     results.push_back(action);
 
     // add current action to action LSTM
-    Expression actione = lookup(*hg, p_a, action);
-    action_lstm.add_input(actione);
+    Expression action_e = lookup(*hg, p_a, action);
+    action_lstm.add_input(action_e);
 
     // get relation embedding from action (TODO: convert to rel from action?)
     Expression relation = lookup(*hg, p_r, action);
 
     // do action
-    const string& actionString = action_names[action];
-    const char ac = actionString[0];
-    const char ac2 = actionString[1];
+    const string& action_string = action_names[action];
+    const char ac = action_string[0];
+    const char ac2 = action_string[1];
 
     if (ac == 'S' && ac2 == 'H') {  // SHIFT
       assert(buffer.size() > 1); // dummy symbol means > 1 (not >= 1)

From e6afc7375d3a4550184b1825baf48f47e4399a64 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 31 Jan 2017 15:27:30 -0500
Subject: [PATCH 16/88] Allowed accessing final parser state outside
 LogProbParser

---
 parser/lstm-parser.cc | 15 +++++++++++----
 parser/lstm-parser.h  |  6 ++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 5a89940..35096f5 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -227,7 +227,8 @@ vector<unsigned> LSTMParser::LogProbParser(
     const map<unsigned, unsigned>& sent,  // sentence with OOVs replaced
     const map<unsigned, unsigned>& sent_pos,
     const vector<unsigned>& correct_actions, const vector<string>& action_names,
-    const vector<string>& int_to_words, double* correct) {
+    const vector<string>& int_to_words, double* correct,
+    Expression* final_parser_state) {
   // TODO: break up this function?
   assert(finalized);
   vector<unsigned> results;
@@ -306,6 +307,7 @@ vector<unsigned> LSTMParser::LogProbParser(
   stack_lstm.add_input(stack.back());
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
+  Expression p_t; // declared outside to allow access later
   while (stack.size() > 2 || buffer.size() > 1) {
     // get list of possible actions for the current parser state
     vector<unsigned> current_valid_actions;
@@ -317,7 +319,7 @@ vector<unsigned> LSTMParser::LogProbParser(
     }
 
     // p_t = pbias + S * slstm + B * blstm + A * almst
-    Expression p_t = affine_transform(
+    p_t = affine_transform(
         {pbias, S, stack_lstm.back(), B, buffer_lstm.back(), A,
          action_lstm.back()});
     Expression nlp_t = rectify(p_t);
@@ -424,6 +426,10 @@ vector<unsigned> LSTMParser::LogProbParser(
   assert(bufferi.size() == 1);
   Expression tot_neglogprob = -sum(log_probs);
   assert(tot_neglogprob.pg != nullptr);
+
+  if (final_parser_state) {
+    *final_parser_state = p_t;
+  }
   return results;
 }
 
@@ -573,10 +579,11 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 }
 
 
+// TODO: fix this so that correct actually does something sometimes
 vector<unsigned> LSTMParser::LogProbParser(
     const map<unsigned, unsigned>& sentence,
     const map<unsigned, unsigned>& sentence_pos, const CorpusVocabulary& vocab,
-    ComputationGraph *cg, double* correct) {
+    ComputationGraph *cg, double* correct, Expression* final_parser_state) {
   map<unsigned, unsigned> tsentence(sentence); // sentence with OOVs replaced
   for (auto& index_and_id : tsentence) { // use reference to overwrite
     if (!vocab.int_to_training_word[index_and_id.second]) {
@@ -585,7 +592,7 @@ vector<unsigned> LSTMParser::LogProbParser(
   }
   return LogProbParser(cg, sentence, tsentence, sentence_pos,
                        vector<unsigned>(), vocab.actions,
-                       vocab.int_to_words, correct);
+                       vocab.int_to_words, correct, final_parser_state);
 }
 
 
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 5ec3d2d..6096056 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -210,7 +210,8 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentence,
       const std::map<unsigned, unsigned>& sentence_pos,
       const CorpusVocabulary& vocab, cnn::ComputationGraph *cg,
-      double* correct = nullptr);
+      double* correct = nullptr,
+      cnn::expr::Expression* final_parser_state = nullptr);
 
   void LoadPretrainedWords(const std::string& words_path);
 
@@ -231,7 +232,8 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentPos,
       const std::vector<unsigned>& correct_actions,
       const std::vector<std::string>& action_names,
-      const std::vector<std::string>& int_to_words, double* correct);
+      const std::vector<std::string>& int_to_words, double* correct,
+      cnn::expr::Expression* final_parser_state = nullptr);
 
   void SaveModel(const std::string& model_fname, bool softlink_created);
 

From 3f77fcfb9ceec9ef169bec1c5e1bae825fed4c01 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 31 Jan 2017 16:05:44 -0500
Subject: [PATCH 17/88] Fixed command line processing to output help if no args
 given

---
 parser/lstm-parser-driver.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/lstm-parser-driver.cc b/parser/lstm-parser-driver.cc
index a1caacc..e5aaf91 100644
--- a/parser/lstm-parser-driver.cc
+++ b/parser/lstm-parser-driver.cc
@@ -54,7 +54,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
   po::options_description dcmdline_options;
   dcmdline_options.add(opts);
   po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("help")) {
+  if (conf->count("help") || argc == 1) {
     cerr << dcmdline_options << endl;
     exit(0);
   }

From 6975c90b2fdbadbe5dc66f1e85360f260cebf314 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 31 Jan 2017 16:27:29 -0500
Subject: [PATCH 18/88] More naming consistency

---
 parser/lstm-parser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 6096056..09587f0 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -229,7 +229,7 @@ class LSTMParser {
       cnn::ComputationGraph* hg,
       const std::map<unsigned, unsigned>& raw_sent,  // raw sentence
       const std::map<unsigned, unsigned>& sent,  // sentence with OOVs replaced
-      const std::map<unsigned, unsigned>& sentPos,
+      const std::map<unsigned, unsigned>& sent_pos,
       const std::vector<unsigned>& correct_actions,
       const std::vector<std::string>& action_names,
       const std::vector<std::string>& int_to_words, double* correct,

From 0f05ecde018360aeba7197c7f4e8c067a210b180 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 31 Jan 2017 16:41:32 -0500
Subject: [PATCH 19/88] Got rid of meaningless 'err' reporting when evaluating

---
 parser/lstm-parser.cc | 34 ++++++++++++++++------------------
 parser/lstm-parser.h  |  4 +---
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 35096f5..82e8543 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -541,7 +541,6 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
       // dev_size = 100;
       double llh = 0;
       double trs = 0;
-      double correct = 0;
       double correct_heads = 0;
       double total_heads = 0;
       auto t_start = chrono::high_resolution_clock::now();
@@ -549,7 +548,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         const map<unsigned, unsigned>& sentence = dev_corpus.sentences[sii];
         const map<unsigned, unsigned>& sentence_pos =
             dev_corpus.sentences_pos[sii];
-        ParseTree hyp = Parse(sentence, sentence_pos, vocab, false, &correct);
+        ParseTree hyp = Parse(sentence, sentence_pos, vocab, false);
 
         double lp = 0;
         llh -= lp;
@@ -562,13 +561,15 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         correct_heads += ComputeCorrect(ref, hyp);
         total_heads += sentence.size() - 1; // -1 to account for ROOT
       }
+
       auto t_end = chrono::high_resolution_clock::now();
+      auto ms = chrono::duration<double, milli>(t_end - t_start).count();
       cerr << "  **dev (iter=" << iter << " epoch="
-           << (tot_seen / num_sentences) << ")\tllh=" << llh << " ppl: "
-           << exp(llh / trs) << " err: " << (trs - correct) / trs << " uas: "
-           << (correct_heads / total_heads) << "\t[" << dev_size << " sents in "
-           << chrono::duration<double, milli>(t_end - t_start).count() << " ms]"
-           << endl;
+           << (tot_seen / num_sentences) << ")\tllh=" << llh
+           << " ppl: " << exp(llh / trs)
+           << " uas: " << (correct_heads / total_heads)
+           << "\t[" << dev_size << " sents in " << ms << " ms]" << endl;
+
       if (correct_heads > best_correct_heads) {
         best_correct_heads = correct_heads;
         SaveModel(model_fname, softlink_created);
@@ -583,7 +584,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 vector<unsigned> LSTMParser::LogProbParser(
     const map<unsigned, unsigned>& sentence,
     const map<unsigned, unsigned>& sentence_pos, const CorpusVocabulary& vocab,
-    ComputationGraph *cg, double* correct, Expression* final_parser_state) {
+    ComputationGraph *cg, Expression* final_parser_state) {
   map<unsigned, unsigned> tsentence(sentence); // sentence with OOVs replaced
   for (auto& index_and_id : tsentence) { // use reference to overwrite
     if (!vocab.int_to_training_word[index_and_id.second]) {
@@ -592,17 +593,15 @@ vector<unsigned> LSTMParser::LogProbParser(
   }
   return LogProbParser(cg, sentence, tsentence, sentence_pos,
                        vector<unsigned>(), vocab.actions,
-                       vocab.int_to_words, correct, final_parser_state);
+                       vocab.int_to_words, nullptr, final_parser_state);
 }
 
 
 ParseTree LSTMParser::Parse(const map<unsigned, unsigned>& sentence,
-                const map<unsigned, unsigned>& sentence_pos,
-                const CorpusVocabulary& vocab,
-                bool labeled, double* correct) {
+                            const map<unsigned, unsigned>& sentence_pos,
+                            const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
-  vector<unsigned> pred = LogProbParser(sentence, sentence_pos, vocab, &cg,
-                                        correct);
+  vector<unsigned> pred = LogProbParser(sentence, sentence_pos, vocab, &cg);
   return RecoverParseTree(sentence, pred, vocab.actions,
                           vocab.actions_to_arc_labels, labeled);
 }
@@ -616,7 +615,6 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
   }
   double llh = 0;
   double trs = 0;
-  double correct = 0;
   double correct_heads = 0;
   double total_heads = 0;
   auto t_start = chrono::high_resolution_clock::now();
@@ -626,7 +624,7 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
     const map<unsigned, unsigned>& sentence_pos = corpus.sentences_pos[sii];
     const map<unsigned, string>& sentence_unk_str =
         corpus.sentences_unk_surface_forms[sii];
-    ParseTree hyp = Parse(sentence, sentence_pos, vocab, true, &correct);
+    ParseTree hyp = Parse(sentence, sentence_pos, vocab, true);
     if (output_parses) {
       OutputConll(sentence, sentence_pos, sentence_unk_str,
                   corpus.vocab->int_to_words, corpus.vocab->int_to_pos,
@@ -651,8 +649,8 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
   }
   auto t_end = chrono::high_resolution_clock::now();
   if (evaluate) {
-    cerr << "TEST llh=" << llh << " ppl: " << exp(llh / trs) << " err: "
-         << (trs - correct) / trs << " uas: " << (correct_heads / total_heads)
+    cerr << "TEST llh=" << llh << " ppl: " << exp(llh / trs)
+         << " uas: " << (correct_heads / total_heads)
          << "\t[" << corpus_size << " sents in "
          << chrono::duration<double, milli>(t_end - t_start).count() << " ms]"
          << endl;
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 09587f0..680594a 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -181,8 +181,7 @@ class LSTMParser {
 
   ParseTree Parse(const std::map<unsigned, unsigned>& sentence,
                   const std::map<unsigned, unsigned>& sentence_pos,
-                  const CorpusVocabulary& vocab, bool labeled,
-                  double* correct = nullptr);
+                  const CorpusVocabulary& vocab, bool labeled);
 
   // take a vector of actions and return a parse tree
   ParseTree RecoverParseTree(
@@ -210,7 +209,6 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentence,
       const std::map<unsigned, unsigned>& sentence_pos,
       const CorpusVocabulary& vocab, cnn::ComputationGraph *cg,
-      double* correct = nullptr,
       cnn::expr::Expression* final_parser_state = nullptr);
 
   void LoadPretrainedWords(const std::string& words_path);

From 01736cb41204ce7db994bca4606b8c28302d6c5a Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 31 Jan 2017 17:04:07 -0500
Subject: [PATCH 20/88] Added logprob from NN to ParseTree data structure

Makes numbers in logging accurate
---
 parser/lstm-parser.cc | 13 +++++++++----
 parser/lstm-parser.h  |  7 ++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 82e8543..512a093 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -174,7 +174,7 @@ bool LSTMParser::IsActionForbidden(const string& a, unsigned bsize,
 ParseTree LSTMParser::RecoverParseTree(
     const map<unsigned, unsigned>& sentence, const vector<unsigned>& actions,
     const vector<string>& action_names,
-    const vector<string>& actions_to_arc_labels, bool labeled) {
+    const vector<string>& actions_to_arc_labels, double logprob, bool labeled) {
   ParseTree tree(sentence, labeled);
   vector<int> bufferi(sentence.size() + 1);
   bufferi[0] = -999;
@@ -217,6 +217,8 @@ ParseTree LSTMParser::RecoverParseTree(
   }
   assert(bufferi.size() == 1);
   //assert(stacki.size() == 2);
+
+  tree.logprob = logprob;
   return tree;
 }
 
@@ -548,10 +550,10 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         const map<unsigned, unsigned>& sentence = dev_corpus.sentences[sii];
         const map<unsigned, unsigned>& sentence_pos =
             dev_corpus.sentences_pos[sii];
+
         ParseTree hyp = Parse(sentence, sentence_pos, vocab, false);
+        llh += hyp.logprob;
 
-        double lp = 0;
-        llh -= lp;
         const vector<unsigned>& actions = dev_corpus.correct_act_sent[sii];
         ParseTree ref = RecoverParseTree(
             sentence, actions, dev_corpus.vocab->actions,
@@ -602,8 +604,9 @@ ParseTree LSTMParser::Parse(const map<unsigned, unsigned>& sentence,
                             const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
   vector<unsigned> pred = LogProbParser(sentence, sentence_pos, vocab, &cg);
+  double lp = as_scalar(cg.incremental_forward());
   return RecoverParseTree(sentence, pred, vocab.actions,
-                          vocab.actions_to_arc_labels, labeled);
+                          vocab.actions_to_arc_labels, labeled, lp);
 }
 
 
@@ -643,10 +646,12 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
                                        corpus.vocab->actions_to_arc_labels,
                                        true);
       trs += actions.size();
+      llh += hyp.logprob;
       correct_heads += ComputeCorrect(ref, hyp);
       total_heads += sentence.size() - 1; // -1 to account for ROOT
     }
   }
+
   auto t_end = chrono::high_resolution_clock::now();
   if (evaluate) {
     cerr << "TEST llh=" << llh << " ppl: " << exp(llh / trs)
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 680594a..9f90f48 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -71,11 +71,12 @@ class ParseTree {
   static std::string NO_LABEL;
   // Barebones representation of a parse tree.
   const std::map<unsigned, unsigned>& sentence;
+  double logprob;
 
   ParseTree(const std::map<unsigned, unsigned>& sentence, bool labeled = true) :
       sentence(sentence),
-      arc_labels( labeled ? new std::map<unsigned, std::string> : nullptr) {
-  }
+      logprob(0),
+      arc_labels( labeled ? new std::map<unsigned, std::string> : nullptr) {}
 
   inline void SetParent(unsigned child_index, unsigned parent_index,
                       const std::string& arc_label="") {
@@ -188,7 +189,7 @@ class LSTMParser {
       const std::map<unsigned, unsigned>& sentence,
       const std::vector<unsigned>& actions,
       const std::vector<std::string>& action_names,
-      const std::vector<std::string>& actions_to_arc_labels,
+      const std::vector<std::string>& actions_to_arc_labels, double logprob = 0,
       bool labeled = false);
 
   void Train(const ParserTrainingCorpus& corpus,

From c693089e9040e7885f0fd13a902f7626c1990024 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 2 Feb 2017 16:02:07 -0500
Subject: [PATCH 21/88] Some typedefs

---
 parser/corpus.cc | 22 ++++++++++------------
 parser/corpus.h  | 18 ++++++++++--------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 3daf5a0..929aeed 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -27,9 +27,9 @@ const string ORACLE_ROOT_POS = "ROOT";
 void ConllUCorpusReader::ReadSentences(const string& file,
                                        Corpus* corpus) const {
   string next_line;
-  map<unsigned, string> current_sentence_unk_surface_forms;
-  map<unsigned, unsigned> current_sentence;
-  map<unsigned, unsigned> current_sentence_pos;
+  Corpus::SentenceUnkMap current_sentence_unk_surface_forms;
+  Corpus::SentenceMap current_sentence;
+  Corpus::SentenceMap current_sentence_pos;
 
   ifstream conll_file(file);
   unsigned unk_word_symbol = corpus->vocab->GetWord(CorpusVocabulary::UNK);
@@ -100,9 +100,8 @@ void ParserTrainingCorpus::CountSingletons() {
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
     const string& word, const string& pos, unsigned next_token_index,
-    TrainingCorpus* corpus, map<unsigned, unsigned>* sentence,
-    map<unsigned, unsigned>* sentence_pos,
-    map<unsigned, string>* sentence_unk_surface_forms) const {
+    TrainingCorpus* corpus, SentenceMap* sentence, SentenceMap* sentence_pos,
+    SentenceUnkMap* sentence_unk_surface_forms) const {
   // We assume that we'll have seen all POS tags in training, so don't
   // worry about OOV tags.
   CorpusVocabulary* vocab = corpus->vocab;
@@ -177,9 +176,8 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
 
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
-    TrainingCorpus* corpus, map<unsigned, unsigned>* sentence,
-    map<unsigned, unsigned>* sentence_pos,
-    map<unsigned, string>* sentence_unk_surface_forms, bool final) const {
+    TrainingCorpus* corpus, SentenceMap* sentence, SentenceMap* sentence_pos,
+    SentenceUnkMap* sentence_unk_surface_forms, bool final) const {
   // Store the sentence variables and clear them for the next sentence.
   corpus->sentences.push_back({});
   corpus->sentences.back().swap(*sentence);
@@ -208,9 +206,9 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   bool start_of_sentence = false;
   bool first = true;
 
-  map<unsigned, unsigned> sentence;
-  map<unsigned, unsigned> sentence_pos;
-  map<unsigned, string> sentence_unk_surface_forms;
+  SentenceMap sentence;
+  SentenceMap sentence_pos;
+  SentenceUnkMap sentence_unk_surface_forms;
   corpus->correct_act_sent.push_back({});
 
   // We'll need to make sure ROOT token has a consistent ID.
diff --git a/parser/corpus.h b/parser/corpus.h
index bdf036d..22ad869 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -201,9 +201,12 @@ class Corpus {
   // when iterating over a list of tokens in order of IDs.
   static constexpr unsigned ROOT_TOKEN_ID = -1;
 
-  std::vector<std::map<unsigned, unsigned>> sentences;
-  std::vector<std::map<unsigned, unsigned>> sentences_pos;
-  std::vector<std::map<unsigned, std::string>> sentences_unk_surface_forms;
+  typedef std::map<unsigned, unsigned> SentenceMap;
+  typedef std::map<unsigned, std::string> SentenceUnkMap;
+
+  std::vector<SentenceMap> sentences;
+  std::vector<SentenceMap> sentences_pos;
+  std::vector<SentenceUnkMap> sentences_unk_surface_forms;
   CorpusVocabulary* vocab;
 
   Corpus(CorpusVocabulary* vocab, const CorpusReader& reader,
@@ -253,11 +256,10 @@ class TrainingCorpus : public Corpus {
 
     void RecordAction(const std::string& action, TrainingCorpus* corpus) const;
 
-    void RecordSentence(
-        TrainingCorpus* corpus, std::map<unsigned, unsigned>* sentence,
-        std::map<unsigned, unsigned>* sentence_pos,
-        std::map<unsigned, std::string>* sentence_unk_surface_forms,
-        bool final = false) const;
+    void RecordSentence(TrainingCorpus* corpus, SentenceMap* sentence,
+                        SentenceMap* sentence_pos,
+                        SentenceUnkMap* sentence_unk_surface_forms,
+                        bool final = false) const;
 
     static inline unsigned UTF8Len(unsigned char x) {
       if (x < 0x80) return 1;

From a77168d2a5d0f8fdaeb015694c928c097f4cd841 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 2 Feb 2017 17:06:54 -0500
Subject: [PATCH 22/88] Switched everything over to Sentence objects

---
 parser/corpus.cc      | 51 ++++++++++++++--------------
 parser/corpus.h       | 33 ++++++++++++-------
 parser/lstm-parser.cc | 77 +++++++++++++++++++------------------------
 parser/lstm-parser.h  | 27 +++++++--------
 4 files changed, 90 insertions(+), 98 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 929aeed..4f04952 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -27,9 +27,10 @@ const string ORACLE_ROOT_POS = "ROOT";
 void ConllUCorpusReader::ReadSentences(const string& file,
                                        Corpus* corpus) const {
   string next_line;
-  Corpus::SentenceUnkMap current_sentence_unk_surface_forms;
-  Corpus::SentenceMap current_sentence;
-  Corpus::SentenceMap current_sentence_pos;
+  // TODO: Replace this code with simpler Sentence-based code.
+  Sentence::SentenceUnkMap current_sentence_unk_surface_forms;
+  Sentence::SentenceMap current_sentence;
+  Sentence::SentenceMap current_sentence_pos;
 
   ifstream conll_file(file);
   unsigned unk_word_symbol = corpus->vocab->GetWord(CorpusVocabulary::UNK);
@@ -43,15 +44,11 @@ void ConllUCorpusReader::ReadSentences(const string& file,
         current_sentence_pos[Corpus::ROOT_TOKEN_ID] = root_pos_symbol;
         current_sentence_unk_surface_forms[Corpus::ROOT_TOKEN_ID] = "";
 
-        corpus->sentences.push_back(move(current_sentence));
-        current_sentence.clear();
-
-        corpus->sentences_pos.push_back(move(current_sentence_pos));
-        current_sentence_pos.clear();
-
-        corpus->sentences_unk_surface_forms.push_back(
-            move(current_sentence_unk_surface_forms));
-        current_sentence_unk_surface_forms.clear();
+        corpus->sentences.push_back({});
+        corpus->sentences.back().words.swap(current_sentence);
+        corpus->sentences.back().poses.swap(current_sentence_pos);
+        corpus->sentences.back().unk_surface_forms.swap(
+            current_sentence_unk_surface_forms);
       }
       continue;
     } else if (next_line[0] == '#') {
@@ -87,7 +84,7 @@ void ParserTrainingCorpus::CountSingletons() {
   // compute the singletons in the parser's training data
   map<unsigned, unsigned> counts;
   for (const auto& sent : sentences) {
-    for (const auto& index_and_word_id : sent) {
+    for (const auto& index_and_word_id : sent.words) {
       counts[index_and_word_id.second]++;
     }
   }
@@ -100,8 +97,9 @@ void ParserTrainingCorpus::CountSingletons() {
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
     const string& word, const string& pos, unsigned next_token_index,
-    TrainingCorpus* corpus, SentenceMap* sentence, SentenceMap* sentence_pos,
-    SentenceUnkMap* sentence_unk_surface_forms) const {
+    TrainingCorpus* corpus, Sentence::SentenceMap* sentence,
+    Sentence::SentenceMap* sentence_pos,
+    Sentence::SentenceUnkMap* sentence_unk_surface_forms) const {
   // We assume that we'll have seen all POS tags in training, so don't
   // worry about OOV tags.
   CorpusVocabulary* vocab = corpus->vocab;
@@ -176,18 +174,18 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
 
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
-    TrainingCorpus* corpus, SentenceMap* sentence, SentenceMap* sentence_pos,
-    SentenceUnkMap* sentence_unk_surface_forms, bool final) const {
+    TrainingCorpus* corpus, Sentence::SentenceMap* words,
+    Sentence::SentenceMap* sentence_pos,
+    Sentence::SentenceUnkMap* sentence_unk_surface_forms, bool final) const {
   // Store the sentence variables and clear them for the next sentence.
   corpus->sentences.push_back({});
-  corpus->sentences.back().swap(*sentence);
-  corpus->sentences_pos.push_back({});
-  corpus->sentences_pos.back().swap(*sentence_pos);
+  Sentence* sentence = &corpus->sentences.back();
+  sentence->words.swap(*words);
+  sentence->poses.swap(*sentence_pos);
   if (!is_training) {
-    corpus->sentences_unk_surface_forms.push_back({});
-    corpus->sentences_unk_surface_forms.back().swap(
-        *sentence_unk_surface_forms);
+    sentence->unk_surface_forms.swap(*sentence_unk_surface_forms);
   }
+
   if (!final) {
     corpus->correct_act_sent.push_back({});
   }
@@ -206,9 +204,10 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   bool start_of_sentence = false;
   bool first = true;
 
-  SentenceMap sentence;
-  SentenceMap sentence_pos;
-  SentenceUnkMap sentence_unk_surface_forms;
+  // TODO: replace this code with simpler Sentence-based code.
+  Sentence::SentenceMap sentence;
+  Sentence::SentenceMap sentence_pos;
+  Sentence::SentenceUnkMap sentence_unk_surface_forms;
   corpus->correct_act_sent.push_back({});
 
   // We'll need to make sure ROOT token has a consistent ID.
diff --git a/parser/corpus.h b/parser/corpus.h
index 22ad869..1b75ed8 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -195,18 +195,27 @@ class ConllUCorpusReader : public CorpusReader {
 };
 
 
+struct Sentence {
+  typedef std::map<unsigned, unsigned> SentenceMap;
+  typedef std::map<unsigned, std::string> SentenceUnkMap;
+
+  SentenceMap words;
+  SentenceMap poses;
+  SentenceUnkMap unk_surface_forms;
+
+  size_t Size() const {
+    return words.size();
+  }
+};
+
+
 class Corpus {
 public:
   // Store root tokens with unsigned ID -1 internally to make root come last
   // when iterating over a list of tokens in order of IDs.
   static constexpr unsigned ROOT_TOKEN_ID = -1;
 
-  typedef std::map<unsigned, unsigned> SentenceMap;
-  typedef std::map<unsigned, std::string> SentenceUnkMap;
-
-  std::vector<SentenceMap> sentences;
-  std::vector<SentenceMap> sentences_pos;
-  std::vector<SentenceUnkMap> sentences_unk_surface_forms;
+  std::vector<Sentence> sentences;
   CorpusVocabulary* vocab;
 
   Corpus(CorpusVocabulary* vocab, const CorpusReader& reader,
@@ -250,15 +259,15 @@ class TrainingCorpus : public Corpus {
     void RecordWord(
         const std::string& word, const std::string& pos,
         unsigned next_token_index, TrainingCorpus* corpus,
-        std::map<unsigned, unsigned>* sentence,
-        std::map<unsigned, unsigned>* sentence_pos,
-        std::map<unsigned, std::string>* sentence_unk_surface_forms) const;
+        Sentence::SentenceMap* sentence,
+        Sentence::SentenceMap* sentence_pos,
+        Sentence::SentenceUnkMap* sentence_unk_surface_forms) const;
 
     void RecordAction(const std::string& action, TrainingCorpus* corpus) const;
 
-    void RecordSentence(TrainingCorpus* corpus, SentenceMap* sentence,
-                        SentenceMap* sentence_pos,
-                        SentenceUnkMap* sentence_unk_surface_forms,
+    void RecordSentence(TrainingCorpus* corpus, Sentence::SentenceMap* words,
+                        Sentence::SentenceMap* sentence_pos,
+                        Sentence::SentenceUnkMap* sentence_unk_surface_forms,
                         bool final = false) const;
 
     static inline unsigned UTF8Len(unsigned char x) {
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 512a093..fbfa9f2 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -172,17 +172,18 @@ bool LSTMParser::IsActionForbidden(const string& a, unsigned bsize,
 
 
 ParseTree LSTMParser::RecoverParseTree(
-    const map<unsigned, unsigned>& sentence, const vector<unsigned>& actions,
+    const Sentence& sentence, const vector<unsigned>& actions,
     const vector<string>& action_names,
     const vector<string>& actions_to_arc_labels, double logprob, bool labeled) {
   ParseTree tree(sentence, labeled);
-  vector<int> bufferi(sentence.size() + 1);
+  vector<int> bufferi(sentence.Size() + 1);
   bufferi[0] = -999;
   vector<int> stacki(1, -999);
   unsigned added_to_buffer = 0;
-  for (const auto& index_and_word_id : sentence) {
+  for (const auto& index_and_word_id : sentence.words) {
     // ROOT is set to -1, so it'll come last in a sequence of unsigned ints.
-    bufferi[sentence.size() - added_to_buffer++] = index_and_word_id.first;
+    bufferi[sentence.Size() - added_to_buffer++] =
+        index_and_word_id.first;
   }
   for (auto action : actions) { // loop over transitions for sentence
     const string& action_string = action_names[action];
@@ -225,9 +226,8 @@ ParseTree LSTMParser::RecoverParseTree(
 
 vector<unsigned> LSTMParser::LogProbParser(
     ComputationGraph* hg,
-    const map<unsigned, unsigned>& raw_sent,  // raw sentence
-    const map<unsigned, unsigned>& sent,  // sentence with OOVs replaced
-    const map<unsigned, unsigned>& sent_pos,
+    const Sentence& raw_sent,  // raw sentence
+    const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
     const vector<unsigned>& correct_actions, const vector<string>& action_names,
     const vector<string>& int_to_words, double* correct,
     Expression* final_parser_state) {
@@ -280,12 +280,12 @@ vector<unsigned> LSTMParser::LogProbParser(
 
     vector<Expression> args = {ib, w2l, w}; // learn embeddings
     if (options.use_pos) { // learn POS tag?
-      unsigned pos_id = sent_pos.find(token_index)->second;
+      unsigned pos_id = raw_sent.poses.find(token_index)->second;
       Expression p = lookup(*hg, p_p, pos_id);
       args.push_back(p2l);
       args.push_back(p);
     }
-    unsigned raw_word_id = raw_sent.find(token_index)->second;
+    unsigned raw_word_id = raw_sent.words.find(token_index)->second;
     if (p_t && pretrained.count(raw_word_id)) { // include pretrained vectors?
       Expression t = const_lookup(*hg, p_t, raw_word_id);
       args.push_back(t2l);
@@ -498,8 +498,8 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         random_shuffle(order.begin(), order.end());
       }
       tot_seen += 1;
-      const map<unsigned, unsigned>& sentence = corpus.sentences[order[si]];
-      map<unsigned, unsigned> tsentence(sentence);
+      const Sentence& sentence = corpus.sentences[order[si]];
+      Sentence::SentenceMap tsentence(sentence.words);
       if (options.unk_strategy == 1) {
         for (auto& index_and_id : tsentence) { // use reference to overwrite
           if (corpus.singletons.count(index_and_id.second)
@@ -508,11 +508,9 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
           }
         }
       }
-      const map<unsigned, unsigned>& sentence_pos =
-          corpus.sentences_pos[order[si]];
       const vector<unsigned>& actions = corpus.correct_act_sent[order[si]];
       ComputationGraph hg;
-      LogProbParser(&hg, sentence, tsentence, sentence_pos, actions,
+      LogProbParser(&hg, sentence, tsentence, actions,
                     corpus.vocab->actions, corpus.vocab->int_to_words,
                     &correct);
       double lp = as_scalar(hg.incremental_forward());
@@ -547,11 +545,9 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
       double total_heads = 0;
       auto t_start = chrono::high_resolution_clock::now();
       for (unsigned sii = 0; sii < dev_size; ++sii) {
-        const map<unsigned, unsigned>& sentence = dev_corpus.sentences[sii];
-        const map<unsigned, unsigned>& sentence_pos =
-            dev_corpus.sentences_pos[sii];
+        const Sentence& sentence = dev_corpus.sentences[sii];
 
-        ParseTree hyp = Parse(sentence, sentence_pos, vocab, false);
+        ParseTree hyp = Parse(sentence, vocab, false);
         llh += hyp.logprob;
 
         const vector<unsigned>& actions = dev_corpus.correct_act_sent[sii];
@@ -561,7 +557,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 
         trs += actions.size();
         correct_heads += ComputeCorrect(ref, hyp);
-        total_heads += sentence.size() - 1; // -1 to account for ROOT
+        total_heads += sentence.Size() - 1; // -1 to account for ROOT
       }
 
       auto t_end = chrono::high_resolution_clock::now();
@@ -584,26 +580,24 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 
 // TODO: fix this so that correct actually does something sometimes
 vector<unsigned> LSTMParser::LogProbParser(
-    const map<unsigned, unsigned>& sentence,
-    const map<unsigned, unsigned>& sentence_pos, const CorpusVocabulary& vocab,
+    const Sentence& sentence, const CorpusVocabulary& vocab,
     ComputationGraph *cg, Expression* final_parser_state) {
-  map<unsigned, unsigned> tsentence(sentence); // sentence with OOVs replaced
+  Sentence::SentenceMap tsentence(sentence.words); // sentence w/ OOVs replaced
   for (auto& index_and_id : tsentence) { // use reference to overwrite
     if (!vocab.int_to_training_word[index_and_id.second]) {
       index_and_id.second = kUNK;
     }
   }
-  return LogProbParser(cg, sentence, tsentence, sentence_pos,
-                       vector<unsigned>(), vocab.actions,
-                       vocab.int_to_words, nullptr, final_parser_state);
+  return LogProbParser(cg, sentence, tsentence, vector<unsigned>(),
+                       vocab.actions, vocab.int_to_words, nullptr,
+                       final_parser_state);
 }
 
 
-ParseTree LSTMParser::Parse(const map<unsigned, unsigned>& sentence,
-                            const map<unsigned, unsigned>& sentence_pos,
+ParseTree LSTMParser::Parse(const Sentence& sentence,
                             const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
-  vector<unsigned> pred = LogProbParser(sentence, sentence_pos, vocab, &cg);
+  vector<unsigned> pred = LogProbParser(sentence, vocab, &cg);
   double lp = as_scalar(cg.incremental_forward());
   return RecoverParseTree(sentence, pred, vocab.actions,
                           vocab.actions_to_arc_labels, labeled, lp);
@@ -623,15 +617,11 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
   auto t_start = chrono::high_resolution_clock::now();
   unsigned corpus_size = corpus.sentences.size();
   for (unsigned sii = 0; sii < corpus_size; ++sii) {
-    const map<unsigned, unsigned>& sentence = corpus.sentences[sii];
-    const map<unsigned, unsigned>& sentence_pos = corpus.sentences_pos[sii];
-    const map<unsigned, string>& sentence_unk_str =
-        corpus.sentences_unk_surface_forms[sii];
-    ParseTree hyp = Parse(sentence, sentence_pos, vocab, true);
+    const Sentence& sentence = corpus.sentences[sii];
+    ParseTree hyp = Parse(sentence, vocab, true);
     if (output_parses) {
-      OutputConll(sentence, sentence_pos, sentence_unk_str,
-                  corpus.vocab->int_to_words, corpus.vocab->int_to_pos,
-                  corpus.vocab->words_to_int, hyp);
+      OutputConll(sentence, corpus.vocab->int_to_words,
+                  corpus.vocab->int_to_pos, corpus.vocab->words_to_int, hyp);
     }
 
     if (evaluate) {
@@ -648,7 +638,7 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
       trs += actions.size();
       llh += hyp.logprob;
       correct_heads += ComputeCorrect(ref, hyp);
-      total_heads += sentence.size() - 1; // -1 to account for ROOT
+      total_heads += sentence.Size() - 1; // -1 to account for ROOT
     }
   }
 
@@ -667,29 +657,28 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
 }
 
 
-void LSTMParser::OutputConll(const map<unsigned, unsigned>& sentence,
-                             const map<unsigned, unsigned>& pos,
-                             const map<unsigned, string>& sentence_unk_strings,
+void LSTMParser::OutputConll(const Sentence& sentence,
                              const vector<string>& int_to_words,
                              const vector<string>& int_to_pos,
                              const map<string, unsigned>& words_to_int,
                              const ParseTree& tree) {
   const unsigned int unk_word =
       words_to_int.find(CorpusVocabulary::UNK)->second;
-  for (const auto& token_index_and_word : sentence) {
+  for (const auto& token_index_and_word : sentence.words) {
     unsigned token_index = token_index_and_word.first;
     unsigned word_id = token_index_and_word.second;
     if (token_index == Corpus::ROOT_TOKEN_ID) // don't output anything for ROOT
       continue;
 
-    auto unk_strs_iter = sentence_unk_strings.find(token_index);
-    assert(unk_strs_iter != sentence_unk_strings.end() &&
+    auto unk_strs_iter = sentence.unk_surface_forms.find(token_index);
+    assert(unk_strs_iter != sentence.unk_surface_forms.end() &&
            ((word_id == unk_word && unk_strs_iter->second.size() > 0) ||
             (word_id != unk_word && unk_strs_iter->second.size() == 0 &&
              int_to_words.size() > word_id)));
     string wit = (unk_strs_iter->second.size() > 0) ?
                   unk_strs_iter->second : int_to_words[word_id];
-    const string& pos_tag = int_to_pos[pos.find(token_index)->second];
+    const string& pos_tag = int_to_pos[
+        sentence.poses.find(token_index)->second];
     unsigned parent = tree.GetParent(token_index);
     if (parent == Corpus::ROOT_TOKEN_ID)
       parent = 0;
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 9f90f48..ef71451 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -70,10 +70,10 @@ class ParseTree {
 public:
   static std::string NO_LABEL;
   // Barebones representation of a parse tree.
-  const std::map<unsigned, unsigned>& sentence;
+  const Sentence& sentence;
   double logprob;
 
-  ParseTree(const std::map<unsigned, unsigned>& sentence, bool labeled = true) :
+  ParseTree(const Sentence& sentence, bool labeled = true) :
       sentence(sentence),
       logprob(0),
       arc_labels( labeled ? new std::map<unsigned, std::string> : nullptr) {}
@@ -180,13 +180,12 @@ class LSTMParser {
   static bool IsActionForbidden(const std::string& a, unsigned bsize,
                                 unsigned ssize, const std::vector<int>& stacki);
 
-  ParseTree Parse(const std::map<unsigned, unsigned>& sentence,
-                  const std::map<unsigned, unsigned>& sentence_pos,
+  ParseTree Parse(const Sentence& sentence,
                   const CorpusVocabulary& vocab, bool labeled);
 
   // take a vector of actions and return a parse tree
   ParseTree RecoverParseTree(
-      const std::map<unsigned, unsigned>& sentence,
+      const Sentence& sentence,
       const std::vector<unsigned>& actions,
       const std::vector<std::string>& action_names,
       const std::vector<std::string>& actions_to_arc_labels, double logprob = 0,
@@ -207,9 +206,8 @@ class LSTMParser {
 
   // Used for testing. Replaces OOV with UNK.
   std::vector<unsigned> LogProbParser(
-      const std::map<unsigned, unsigned>& sentence,
-      const std::map<unsigned, unsigned>& sentence_pos,
-      const CorpusVocabulary& vocab, cnn::ComputationGraph *cg,
+      const Sentence& sentence, const CorpusVocabulary& vocab,
+      cnn::ComputationGraph *cg,
       cnn::expr::Expression* final_parser_state = nullptr);
 
   void LoadPretrainedWords(const std::string& words_path);
@@ -226,9 +224,8 @@ class LSTMParser {
   // OOV in the parser training data.
   std::vector<unsigned> LogProbParser(
       cnn::ComputationGraph* hg,
-      const std::map<unsigned, unsigned>& raw_sent,  // raw sentence
-      const std::map<unsigned, unsigned>& sent,  // sentence with OOVs replaced
-      const std::map<unsigned, unsigned>& sent_pos,
+      const Sentence& sentence, // raw sentence
+      const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
       const std::vector<unsigned>& correct_actions,
       const std::vector<std::string>& action_names,
       const std::vector<std::string>& int_to_words, double* correct,
@@ -238,9 +235,9 @@ class LSTMParser {
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
-    assert(ref.sentence.size() == hyp.sentence.size());
+    assert(ref.sentence.Size() == hyp.sentence.Size());
     unsigned correct_count = 0;
-    for (const auto& token_index_and_word : ref.sentence) {
+    for (const auto& token_index_and_word : ref.sentence.words) {
       unsigned i = token_index_and_word.first;
       if (i != Corpus::ROOT_TOKEN_ID && ref.GetParent(i) == hyp.GetParent(i))
         ++correct_count;
@@ -287,9 +284,7 @@ class LSTMParser {
 
   void DoTest(const Corpus& corpus, bool evaluate, bool output_parses);
 
-  static void OutputConll(const std::map<unsigned, unsigned>& sentence,
-      const std::map<unsigned, unsigned>& pos,
-      const std::map<unsigned, std::string>& sentence_unk_strings,
+  static void OutputConll(const Sentence& sentence,
       const std::vector<std::string>& int_to_words,
       const std::vector<std::string>& int_to_pos,
       const std::map<std::string, unsigned>& words_to_int,

From 6a90c5e5e49596a3dbedfc92e033432a300ce011 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 6 Feb 2017 13:47:28 -0500
Subject: [PATCH 23/88] Replaced some map find() calls with at() calls

---
 parser/lstm-parser.cc | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index fbfa9f2..bb0c4d3 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -280,12 +280,12 @@ vector<unsigned> LSTMParser::LogProbParser(
 
     vector<Expression> args = {ib, w2l, w}; // learn embeddings
     if (options.use_pos) { // learn POS tag?
-      unsigned pos_id = raw_sent.poses.find(token_index)->second;
+      unsigned pos_id = raw_sent.poses.at(token_index);
       Expression p = lookup(*hg, p_p, pos_id);
       args.push_back(p2l);
       args.push_back(p);
     }
-    unsigned raw_word_id = raw_sent.words.find(token_index)->second;
+    unsigned raw_word_id = raw_sent.words.at(token_index);
     if (p_t && pretrained.count(raw_word_id)) { // include pretrained vectors?
       Expression t = const_lookup(*hg, p_t, raw_word_id);
       args.push_back(t2l);
@@ -662,8 +662,7 @@ void LSTMParser::OutputConll(const Sentence& sentence,
                              const vector<string>& int_to_pos,
                              const map<string, unsigned>& words_to_int,
                              const ParseTree& tree) {
-  const unsigned int unk_word =
-      words_to_int.find(CorpusVocabulary::UNK)->second;
+  const unsigned int unk_word = words_to_int.at(CorpusVocabulary::UNK);
   for (const auto& token_index_and_word : sentence.words) {
     unsigned token_index = token_index_and_word.first;
     unsigned word_id = token_index_and_word.second;
@@ -677,8 +676,7 @@ void LSTMParser::OutputConll(const Sentence& sentence,
              int_to_words.size() > word_id)));
     string wit = (unk_strs_iter->second.size() > 0) ?
                   unk_strs_iter->second : int_to_words[word_id];
-    const string& pos_tag = int_to_pos[
-        sentence.poses.find(token_index)->second];
+    const string& pos_tag = int_to_pos[sentence.poses.at(token_index)];
     unsigned parent = tree.GetParent(token_index);
     if (parent == Corpus::ROOT_TOKEN_ID)
       parent = 0;

From 014cd2655b23440c84a6bd858e703e4a7726513e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 27 Feb 2017 14:24:55 -0500
Subject: [PATCH 24/88] Removed old comment

---
 parser/lstm-parser.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index bb0c4d3..8244032 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -578,7 +578,6 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 }
 
 
-// TODO: fix this so that correct actually does something sometimes
 vector<unsigned> LSTMParser::LogProbParser(
     const Sentence& sentence, const CorpusVocabulary& vocab,
     ComputationGraph *cg, Expression* final_parser_state) {

From 95ab82f668cce6b9aa8418bc53fc11d6a20017b2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 13:47:32 -0500
Subject: [PATCH 25/88] Added checks for files being provided and successfully
 opened

Also added kUNK to the corpus class
---
 parser/corpus.cc             | 10 ++++++++++
 parser/corpus.h              |  7 +++++--
 parser/lstm-parser-driver.cc | 12 ++++++++++++
 parser/lstm-parser.h         |  4 ++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 4f04952..2ab2442 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -33,6 +33,11 @@ void ConllUCorpusReader::ReadSentences(const string& file,
   Sentence::SentenceMap current_sentence_pos;
 
   ifstream conll_file(file);
+  if (!conll_file) {
+    cerr << "Unable to open corpus file " << file << "; aborting" << endl;
+    abort();
+  }
+
   unsigned unk_word_symbol = corpus->vocab->GetWord(CorpusVocabulary::UNK);
   unsigned root_symbol = corpus->vocab->GetWord(CorpusVocabulary::ROOT);
   unsigned root_pos_symbol = corpus->vocab->GetPOS(CorpusVocabulary::ROOT);
@@ -197,6 +202,11 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   cerr << "Loading " << (is_training ? "training" : "dev")
        << " corpus from " << file << "..." << endl;
   ifstream actions_file(file);
+  if (!actions_file) {
+    cerr << "Unable to open actions file " << file << "; aborting" << endl;
+    abort();
+  }
+
   string line;
   CorpusVocabulary* vocab = corpus->vocab;
 
diff --git a/parser/corpus.h b/parser/corpus.h
index 1b75ed8..5a3c7a8 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -38,9 +38,11 @@ class CorpusVocabulary {
   std::vector<std::string> actions;
   std::vector<std::string> actions_to_arc_labels;
 
+  unsigned kUNK;
+
   CorpusVocabulary() : int_to_training_word({true, true}) {
     AddEntry(BAD0, &words_to_int, &int_to_words);
-    AddEntry(UNK, &words_to_int, &int_to_words);
+    kUNK = AddEntry(UNK, &words_to_int, &int_to_words);
     AddEntry(BAD0, &chars_to_int, &int_to_chars);
   }
 
@@ -51,7 +53,8 @@ class CorpusVocabulary {
       words_to_int(other.words_to_int), int_to_words(other.int_to_words),
       int_to_training_word(other.int_to_training_word),
       pos_to_int(other.pos_to_int), int_to_pos(other.int_to_pos),
-      chars_to_int(other.chars_to_int), int_to_chars(other.int_to_chars) {}
+      chars_to_int(other.chars_to_int), int_to_chars(
+          other.int_to_chars), kUNK(other.kUNK) {}
 
   inline unsigned CountPOS() { return pos_to_int.size(); }
   inline unsigned CountWords() { return words_to_int.size(); }
diff --git a/parser/lstm-parser-driver.cc b/parser/lstm-parser-driver.cc
index e5aaf91..6ed78fa 100644
--- a/parser/lstm-parser-driver.cc
+++ b/parser/lstm-parser-driver.cc
@@ -119,6 +119,18 @@ int main(int argc, char** argv) {
     cerr << "No model specified for testing!" << endl;
     abort();
   }
+  if (train && !load_model) {
+    if (!conf.count("words")) {
+      cerr << "Can't train without word vectors! Please provide --words."
+           << endl;
+      abort();
+    }
+    if (!conf.count("training_data")) {
+      cerr << "Can't train without training data! Please provide"
+              " --training_data" << endl;
+      abort();
+    }
+  }
 
   const string words = load_model ? "" : conf["words"].as<string>();
   unique_ptr<LSTMParser> parser;
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index ef71451..1410525 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -161,6 +161,10 @@ class LSTMParser {
     std::cerr << "Loading model from " << model_path << "...";
     auto t_start = std::chrono::high_resolution_clock::now();
     std::ifstream model_file(model_path.c_str(), std::ios::binary);
+    if (!model_file) {
+      std::cerr << "Unable to open model file; aborting" << std::endl;
+      abort();
+    }
     eos::portable_iarchive archive(model_file);
     archive >> *this;
     auto t_end = std::chrono::high_resolution_clock::now();

From 0b9684bcc84aba417f655d08a6d1ac7e5966ba38 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 13:52:26 -0500
Subject: [PATCH 26/88] Abstracted out much of the LSTM transition tagger
 architecture

(In preparation for reusing the same architecture for another
tagger that is not a parser.)
---
 parser/CMakeLists.txt             |   2 +-
 parser/lstm-parser.cc             | 366 +++++++++++-------------------
 parser/lstm-parser.h              |  73 +++---
 parser/lstm-transition-tagger.cpp | 124 ++++++++++
 parser/lstm-transition-tagger.h   |  82 +++++++
 5 files changed, 390 insertions(+), 257 deletions(-)
 create mode 100644 parser/lstm-transition-tagger.cpp
 create mode 100644 parser/lstm-transition-tagger.h

diff --git a/parser/CMakeLists.txt b/parser/CMakeLists.txt
index 3ac3352..1217810 100644
--- a/parser/CMakeLists.txt
+++ b/parser/CMakeLists.txt
@@ -1,7 +1,7 @@
 PROJECT(lstm-parser:parser)
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
-ADD_LIBRARY(lstm-parser-core lstm-parser.cc corpus.cc)
+ADD_LIBRARY(lstm-parser-core lstm-parser.cc corpus.cc lstm-transition-tagger.cpp)
 target_link_libraries(lstm-parser-core cnn ${Boost_LIBRARIES})
 
 ADD_EXECUTABLE(lstm-parse lstm-parser-driver.cc)
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 8244032..764fbea 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -14,7 +14,6 @@
 
 #include "cnn/model.h"
 #include "cnn/tensor.h"
-#include "eos/portable_archive.hpp"
 
 
 using namespace cnn::expr;
@@ -121,7 +120,6 @@ void LSTMParser::FinalizeVocab() {
 LSTMParser::LSTMParser(const ParserOptions& poptions,
                        const string& pretrained_words_path, bool finalize) :
       options(poptions),
-      kUNK(vocab.GetOrAddWord(vocab.UNK)),
       kROOT_SYMBOL(vocab.GetOrAddWord(vocab.ROOT)),
       stack_lstm(options.layers, options.lstm_input_dim, options.hidden_dim,
                  &model),
@@ -143,13 +141,16 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
 }
 
 
-bool LSTMParser::IsActionForbidden(const string& a, unsigned bsize,
-                                   unsigned ssize, const vector<int>& stacki) {
+bool LSTMParser::IsActionForbidden(const string& a, const TaggerState& state) {
+  const ParserState& real_state = static_cast<const ParserState&>(state);
+  unsigned ssize = real_state.stack.size();
+  unsigned bsize = real_state.buffer.size();
+
   if (a[1] == 'W' && ssize < 3)
     return true;
   if (a[1] == 'W') {
-    int top = stacki[stacki.size() - 1];
-    int sec = stacki[stacki.size() - 2];
+    int top = real_state.stacki[real_state.stacki.size() - 1];
+    int sec = real_state.stacki[real_state.stacki.size() - 2];
     if (sec > top)
       return true;
   }
@@ -224,234 +225,156 @@ ParseTree LSTMParser::RecoverParseTree(
 }
 
 
-vector<unsigned> LSTMParser::LogProbParser(
-    ComputationGraph* hg,
-    const Sentence& raw_sent,  // raw sentence
+cnn::expr::Expression LSTMParser::GetActionProbabilities(
+      const TaggerState& state) {
+  // p_t = pbias + S * slstm + B * blstm + A * alstm
+  Expression p_t = affine_transform(
+      {GetParamExpr(p_pbias), GetParamExpr(p_S), stack_lstm.back(),
+          GetParamExpr(p_B), buffer_lstm.back(), GetParamExpr(p_A),
+          action_lstm.back()});
+  Expression nlp_t = rectify(p_t);
+  // r_t = abias + p2a * nlp
+  Expression r_t = affine_transform(
+      {GetParamExpr(p_abias), GetParamExpr(p_p2a), nlp_t});
+  return r_t;
+}
+
+
+void LSTMParser::DoAction(unsigned action,
+                          const vector<string>& action_names,
+                          TaggerState* state, ComputationGraph* cg) {
+  ParserState* real_state = static_cast<ParserState*>(state);
+  // add current action to action LSTM
+  Expression action_e = lookup(*cg, p_a, action);
+  action_lstm.add_input(action_e);
+
+  // get relation embedding from action (TODO: convert to rel from action?)
+  Expression relation = lookup(*cg, p_r, action);
+
+  // do action
+  const string& action_string = action_names[action];
+  const char ac = action_string[0];
+  const char ac2 = action_string[1];
+
+  if (ac == 'S' && ac2 == 'H') {  // SHIFT
+    assert(real_state->buffer.size() > 1); // dummy symbol means > 1 (not >= 1)
+    real_state->stack.push_back(real_state->buffer.back());
+    stack_lstm.add_input(real_state->buffer.back());
+    real_state->buffer.pop_back();
+    buffer_lstm.rewind_one_step();
+    real_state->stacki.push_back(real_state->bufferi.back());
+    real_state->bufferi.pop_back();
+  } else if (ac == 'S' && ac2 == 'W') { //SWAP --- Miguel
+    assert(real_state->stack.size() > 2); // dummy symbol means > 2 (not >= 2)
+
+    Expression toki, tokj;
+    unsigned ii = 0, jj = 0;
+    tokj = real_state->stack.back();
+    jj = real_state->stacki.back();
+    real_state->stack.pop_back();
+    real_state->stacki.pop_back();
+
+    toki = real_state->stack.back();
+    ii = real_state->stacki.back();
+    real_state->stack.pop_back();
+    real_state->stacki.pop_back();
+
+    real_state->buffer.push_back(toki);
+    real_state->bufferi.push_back(ii);
+
+    stack_lstm.rewind_one_step();
+    stack_lstm.rewind_one_step();
+
+    buffer_lstm.add_input(real_state->buffer.back());
+
+    real_state->stack.push_back(tokj);
+    real_state->stacki.push_back(jj);
+
+    stack_lstm.add_input(real_state->stack.back());
+  } else { // LEFT or RIGHT
+    assert(real_state->stack.size() > 2); // dummy symbol means > 2 (not >= 2)
+    assert(ac == 'L' || ac == 'R');
+    Expression dep, head;
+    unsigned depi = 0, headi = 0;
+    (ac == 'R' ? dep : head) = real_state->stack.back();
+    (ac == 'R' ? depi : headi) = real_state->stacki.back();
+    real_state->stack.pop_back();
+    real_state->stacki.pop_back();
+    (ac == 'R' ? head : dep) = real_state->stack.back();
+    (ac == 'R' ? headi : depi) = real_state->stacki.back();
+    real_state->stack.pop_back();
+    real_state->stacki.pop_back();
+    // composed = cbias + H * head + D * dep + R * relation
+    Expression composed = affine_transform({GetParamExpr(p_cbias),
+        GetParamExpr(p_H), head, GetParamExpr(p_D), dep, GetParamExpr(p_R),
+        relation});
+    Expression nlcomposed = tanh(composed);
+    stack_lstm.rewind_one_step();
+    stack_lstm.rewind_one_step();
+    stack_lstm.add_input(nlcomposed);
+    real_state->stack.push_back(nlcomposed);
+    real_state->stacki.push_back(headi);
+  }
+}
+
+
+LSTMTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
+    cnn::ComputationGraph* cg,
+    const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const vector<unsigned>& correct_actions, const vector<string>& action_names,
-    const vector<string>& int_to_words, double* correct,
-    Expression* final_parser_state) {
-  // TODO: break up this function?
-  assert(finalized);
-  vector<unsigned> results;
-  const bool build_training_graph = correct_actions.size() > 0;
-
-  stack_lstm.new_graph(*hg);
-  buffer_lstm.new_graph(*hg);
-  action_lstm.new_graph(*hg);
+    const std::vector<unsigned>& correct_actions,
+    const std::vector<std::string>& action_names) {
+  stack_lstm.new_graph(*cg);
+  buffer_lstm.new_graph(*cg);
+  action_lstm.new_graph(*cg);
   stack_lstm.start_new_sequence();
   buffer_lstm.start_new_sequence();
   action_lstm.start_new_sequence();
-  // variables in the computation graph representing the parameters
-  Expression pbias = parameter(*hg, p_pbias);
-  Expression H = parameter(*hg, p_H);
-  Expression D = parameter(*hg, p_D);
-  Expression R = parameter(*hg, p_R);
-  Expression cbias = parameter(*hg, p_cbias);
-  Expression S = parameter(*hg, p_S);
-  Expression B = parameter(*hg, p_B);
-  Expression A = parameter(*hg, p_A);
-  Expression ib = parameter(*hg, p_ib);
-  Expression w2l = parameter(*hg, p_w2l);
-  Expression p2l;
-  if (options.use_pos)
-    p2l = parameter(*hg, p_p2l);
-  Expression t2l;
-  if (p_t2l)
-    t2l = parameter(*hg, p_t2l);
-  Expression p2a = parameter(*hg, p_p2a);
-  Expression abias = parameter(*hg, p_abias);
-  Expression action_start = parameter(*hg, p_action_start);
-
-  action_lstm.add_input(action_start);
-
-  // variables representing word embeddings (possibly including POS info)
-  vector<Expression> buffer(sent.size() + 1);
-  vector<int> bufferi(sent.size() + 1); // position of the words in the sentence
-  // precompute buffer representation from left to right
 
+  action_lstm.add_input(GetParamExpr(p_action_start));
+
+  ParserState* state = new ParserState;
+  state->buffer.resize(raw_sent.Size() + 1);
+  state->bufferi.resize(raw_sent.Size() + 1);
+  state->stack.push_back(parameter(*cg, p_stack_guard));
+  state->stacki.push_back(-999);
+  // drive dummy symbol on stack through LSTM
+  stack_lstm.add_input(state->stack.back());
+
+  // precompute buffer representation from left to right
   unsigned added_to_buffer = 0;
   for (const auto& index_and_word_id : sent) {
     unsigned token_index = index_and_word_id.first;
     unsigned word_id = index_and_word_id.second;
 
     assert(word_id < vocab.CountWords());
-    Expression w = lookup(*hg, p_w, word_id);
+    Expression w = lookup(*cg, p_w, word_id);
 
-    vector<Expression> args = {ib, w2l, w}; // learn embeddings
-    if (options.use_pos) { // learn POS tag?
+    vector<Expression> args = {GetParamExpr(p_ib), GetParamExpr(p_w2l),
+                               w};  // learn embeddings
+    if (options.use_pos) {  // learn POS tag?
       unsigned pos_id = raw_sent.poses.at(token_index);
-      Expression p = lookup(*hg, p_p, pos_id);
-      args.push_back(p2l);
+      Expression p = lookup(*cg, p_p, pos_id);
+      args.push_back(GetParamExpr(p_p2l));
       args.push_back(p);
     }
     unsigned raw_word_id = raw_sent.words.at(token_index);
-    if (p_t && pretrained.count(raw_word_id)) { // include pretrained vectors?
-      Expression t = const_lookup(*hg, p_t, raw_word_id);
-      args.push_back(t2l);
+    if (p_t && pretrained.count(raw_word_id)) {  // include pretrained vectors?
+      Expression t = const_lookup(*cg, p_t, raw_word_id);
+      args.push_back(GetParamExpr(p_t2l));
       args.push_back(t);
     }
-    buffer[sent.size() - added_to_buffer] = rectify(affine_transform(args));
-    bufferi[sent.size() - added_to_buffer] = token_index;
+    state->buffer[sent.size() - added_to_buffer] = rectify(affine_transform(args));
+    state->bufferi[sent.size() - added_to_buffer] = token_index;
     added_to_buffer++;
   }
   // dummy symbol to represent the empty buffer
-  buffer[0] = parameter(*hg, p_buffer_guard);
-  bufferi[0] = -999;
-  for (auto& b : buffer)
+  state->buffer[0] = parameter(*cg, p_buffer_guard);
+  state->bufferi[0] = -999;
+  for (auto& b : state->buffer)
     buffer_lstm.add_input(b);
 
-  vector<Expression> stack;  // variables representing subtree embeddings
-  vector<int> stacki; // position of words in the sentence of head of subtree
-  stack.push_back(parameter(*hg, p_stack_guard));
-  stacki.push_back(-999); // not used for anything
-  // drive dummy symbol on stack through LSTM
-  stack_lstm.add_input(stack.back());
-  vector<Expression> log_probs;
-  unsigned action_count = 0;  // incremented at each prediction
-  Expression p_t; // declared outside to allow access later
-  while (stack.size() > 2 || buffer.size() > 1) {
-    // get list of possible actions for the current parser state
-    vector<unsigned> current_valid_actions;
-    for (unsigned action = 0; action < n_possible_actions; ++action) {
-      if (IsActionForbidden(action_names[action], buffer.size(), stack.size(),
-                            stacki))
-        continue;
-      current_valid_actions.push_back(action);
-    }
-
-    // p_t = pbias + S * slstm + B * blstm + A * almst
-    p_t = affine_transform(
-        {pbias, S, stack_lstm.back(), B, buffer_lstm.back(), A,
-         action_lstm.back()});
-    Expression nlp_t = rectify(p_t);
-    // r_t = abias + p2a * nlp
-    Expression r_t = affine_transform({abias, p2a, nlp_t});
-
-    // adist = log_softmax(r_t, current_valid_actions)
-    Expression adiste = log_softmax(r_t, current_valid_actions);
-    vector<float> adist = as_vector(hg->incremental_forward());
-    double best_score = adist[current_valid_actions[0]];
-    unsigned best_a = current_valid_actions[0];
-    for (unsigned i = 1; i < current_valid_actions.size(); ++i) {
-      if (adist[current_valid_actions[i]] > best_score) {
-        best_score = adist[current_valid_actions[i]];
-        best_a = current_valid_actions[i];
-      }
-    }
-    unsigned action = best_a;
-    // If we have reference actions (for training), use the reference action.
-    if (build_training_graph) {
-      action = correct_actions[action_count];
-      if (correct && best_a == action) {
-        (*correct)++;
-      }
-    }
-    ++action_count;
-    log_probs.push_back(pick(adiste, action));
-    results.push_back(action);
-
-    // add current action to action LSTM
-    Expression action_e = lookup(*hg, p_a, action);
-    action_lstm.add_input(action_e);
-
-    // get relation embedding from action (TODO: convert to rel from action?)
-    Expression relation = lookup(*hg, p_r, action);
-
-    // do action
-    const string& action_string = action_names[action];
-    const char ac = action_string[0];
-    const char ac2 = action_string[1];
-
-    if (ac == 'S' && ac2 == 'H') {  // SHIFT
-      assert(buffer.size() > 1); // dummy symbol means > 1 (not >= 1)
-      stack.push_back(buffer.back());
-      stack_lstm.add_input(buffer.back());
-      buffer.pop_back();
-      buffer_lstm.rewind_one_step();
-      stacki.push_back(bufferi.back());
-      bufferi.pop_back();
-    } else if (ac == 'S' && ac2 == 'W') { //SWAP --- Miguel
-      assert(stack.size() > 2); // dummy symbol means > 2 (not >= 2)
-
-      Expression toki, tokj;
-      unsigned ii = 0, jj = 0;
-      tokj = stack.back();
-      jj = stacki.back();
-      stack.pop_back();
-      stacki.pop_back();
-
-      toki = stack.back();
-      ii = stacki.back();
-      stack.pop_back();
-      stacki.pop_back();
-
-      buffer.push_back(toki);
-      bufferi.push_back(ii);
-
-      stack_lstm.rewind_one_step();
-      stack_lstm.rewind_one_step();
-
-      buffer_lstm.add_input(buffer.back());
-
-      stack.push_back(tokj);
-      stacki.push_back(jj);
-
-      stack_lstm.add_input(stack.back());
-    } else { // LEFT or RIGHT
-      assert(stack.size() > 2); // dummy symbol means > 2 (not >= 2)
-      assert(ac == 'L' || ac == 'R');
-      Expression dep, head;
-      unsigned depi = 0, headi = 0;
-      (ac == 'R' ? dep : head) = stack.back();
-      (ac == 'R' ? depi : headi) = stacki.back();
-      stack.pop_back();
-      stacki.pop_back();
-      (ac == 'R' ? head : dep) = stack.back();
-      (ac == 'R' ? headi : depi) = stacki.back();
-      stack.pop_back();
-      stacki.pop_back();
-      // composed = cbias + H * head + D * dep + R * relation
-      Expression composed = affine_transform({cbias, H, head, D, dep, R,
-                                              relation});
-      Expression nlcomposed = tanh(composed);
-      stack_lstm.rewind_one_step();
-      stack_lstm.rewind_one_step();
-      stack_lstm.add_input(nlcomposed);
-      stack.push_back(nlcomposed);
-      stacki.push_back(headi);
-    }
-  }
-  assert(stack.size() == 2); // guard symbol, root
-  assert(stacki.size() == 2);
-  assert(buffer.size() == 1); // guard symbol
-  assert(bufferi.size() == 1);
-  Expression tot_neglogprob = -sum(log_probs);
-  assert(tot_neglogprob.pg != nullptr);
-
-  if (final_parser_state) {
-    *final_parser_state = p_t;
-  }
-  return results;
-}
-
-
-void LSTMParser::SaveModel(const string& model_fname, bool softlink_created) {
-  ofstream out_file(model_fname);
-  eos::portable_oarchive archive(out_file);
-  archive << *this;
-  cerr << "Model saved." << endl;
-  // Create a soft link to the most recent model in order to make it
-  // easier to refer to it in a shell script.
-  if (!softlink_created) {
-    string softlink = "latest_model.params";
-
-    if (system((string("rm -f ") + softlink).c_str()) == 0
-        && system(("ln -s " + model_fname + " " + softlink).c_str()) == 0) {
-      cerr << "Created " << softlink << " as a soft link to " << model_fname
-           << " for convenience." << endl;
-    }
-  }
+  return state;
 }
 
 
@@ -504,13 +427,13 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         for (auto& index_and_id : tsentence) { // use reference to overwrite
           if (corpus.singletons.count(index_and_id.second)
               && cnn::rand01() < unk_prob) {
-            index_and_id.second = kUNK;
+            index_and_id.second = vocab.kUNK;
           }
         }
       }
       const vector<unsigned>& actions = corpus.correct_act_sent[order[si]];
       ComputationGraph hg;
-      LogProbParser(&hg, sentence, tsentence, actions,
+      LogProbTagger(&hg, sentence, tsentence, actions,
                     corpus.vocab->actions, corpus.vocab->int_to_words,
                     &correct);
       double lp = as_scalar(hg.incremental_forward());
@@ -578,25 +501,10 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 }
 
 
-vector<unsigned> LSTMParser::LogProbParser(
-    const Sentence& sentence, const CorpusVocabulary& vocab,
-    ComputationGraph *cg, Expression* final_parser_state) {
-  Sentence::SentenceMap tsentence(sentence.words); // sentence w/ OOVs replaced
-  for (auto& index_and_id : tsentence) { // use reference to overwrite
-    if (!vocab.int_to_training_word[index_and_id.second]) {
-      index_and_id.second = kUNK;
-    }
-  }
-  return LogProbParser(cg, sentence, tsentence, vector<unsigned>(),
-                       vocab.actions, vocab.int_to_words, nullptr,
-                       final_parser_state);
-}
-
-
 ParseTree LSTMParser::Parse(const Sentence& sentence,
                             const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
-  vector<unsigned> pred = LogProbParser(sentence, vocab, &cg);
+  vector<unsigned> pred = LogProbTagger(sentence, vocab, &cg);
   double lp = as_scalar(cg.incremental_forward());
   return RecoverParseTree(sentence, pred, vocab.actions,
                           vocab.actions_to_arc_labels, labeled, lp);
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 1410525..0976f22 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -20,6 +20,7 @@
 #include "cnn/rnn.h"
 #include "corpus.h"
 #include "eos/portable_archive.hpp"
+#include "lstm-transition-tagger.h"
 
 
 namespace lstm_parser {
@@ -112,17 +113,15 @@ class ParseTree {
 };
 
 
-class LSTMParser {
+class LSTMParser : LSTMTransitionTagger {
 public:
   // TODO: make some of these members non-public
   ParserOptions options;
   CorpusVocabulary vocab;
   cnn::Model model;
 
-  bool finalized;
   std::unordered_map<unsigned, std::vector<float>> pretrained;
   unsigned n_possible_actions;
-  const unsigned kUNK;
   const unsigned kROOT_SYMBOL;
 
   cnn::LSTMBuilder stack_lstm; // (layers, input, hidden, trainer)
@@ -156,7 +155,6 @@ class LSTMParser {
                          bool finalize=true);
 
   explicit LSTMParser(const std::string& model_path) :
-      kUNK(vocab.GetOrAddWord(vocab.UNK)),
       kROOT_SYMBOL(vocab.GetOrAddWord(vocab.ROOT)) {
     std::cerr << "Loading model from " << model_path << "...";
     auto t_start = std::chrono::high_resolution_clock::now();
@@ -176,13 +174,19 @@ class LSTMParser {
 
   template <class Archive>
   explicit LSTMParser(Archive* archive) :
-      kUNK(vocab.GetOrAddWord(vocab.UNK)),
       kROOT_SYMBOL(vocab.GetOrAddWord(vocab.ROOT)) {
     *archive >> *this;
   }
 
-  static bool IsActionForbidden(const std::string& a, unsigned bsize,
-                                unsigned ssize, const std::vector<int>& stacki);
+  virtual bool IsActionForbidden(const std::string& a,
+                                 const TaggerState& state) override;
+
+  virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
+      override;
+
+  virtual void DoAction(unsigned action,
+                        const std::vector<std::string>& action_names,
+                        TaggerState* state, cnn::ComputationGraph* cg) override;
 
   ParseTree Parse(const Sentence& sentence,
                   const CorpusVocabulary& vocab, bool labeled);
@@ -208,34 +212,45 @@ class LSTMParser {
     DoTest(corpus, true, output_parses);
   }
 
-  // Used for testing. Replaces OOV with UNK.
-  std::vector<unsigned> LogProbParser(
-      const Sentence& sentence, const CorpusVocabulary& vocab,
-      cnn::ComputationGraph *cg,
-      cnn::expr::Expression* final_parser_state = nullptr);
-
   void LoadPretrainedWords(const std::string& words_path);
 
   void FinalizeVocab();
 
 protected:
-  // *** if correct_actions is empty, this runs greedy decoding ***
-  // returns parse actions for input sentence (in training just returns the
-  // reference)
-  // OOV handling: raw_sent will have the actual words
-  //               sent will have words replaced by appropriate UNK tokens
-  // this lets us use pretrained embeddings, when available, for words that were
-  // OOV in the parser training data.
-  std::vector<unsigned> LogProbParser(
-      cnn::ComputationGraph* hg,
-      const Sentence& sentence, // raw sentence
+  struct ParserState : public TaggerState {
+    std::vector<cnn::expr::Expression> buffer;
+    std::vector<int> bufferi; // position of the words in the sentence
+    std::vector<cnn::expr::Expression> stack;  // subtree embeddings
+    std::vector<int> stacki; // word position in sentence of head of subtree
+
+    ~ParserState() {
+      assert(stack.size() == 2); // guard symbol, root
+      assert(stacki.size() == 2);
+      assert(buffer.size() == 1); // guard symbol
+      assert(bufferi.size() == 1);
+    }
+  };
+
+  virtual std::vector<cnn::Parameters*> GetParameters() override {
+    std::vector<cnn::Parameters*> all_params {p_pbias, p_H, p_D, p_R, p_cbias,
+        p_S, p_B, p_A, p_ib, p_w2l, p_p2a, p_abias, p_action_start};
+    if (options.use_pos)
+      all_params.push_back(p_p2l);
+    if (p_t2l)
+      all_params.push_back(p_t2l);
+    return all_params;
+  }
+
+  virtual TaggerState* InitializeParserState(
+      cnn::ComputationGraph* cg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
       const std::vector<unsigned>& correct_actions,
-      const std::vector<std::string>& action_names,
-      const std::vector<std::string>& int_to_words, double* correct,
-      cnn::expr::Expression* final_parser_state = nullptr);
+      const std::vector<std::string>& action_names) override;
 
-  void SaveModel(const std::string& model_fname, bool softlink_created);
+  virtual bool ShouldTerminate(const TaggerState& state) override {
+    const ParserState& real_state = static_cast<const ParserState&>(state);
+    return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
+  }
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
@@ -249,6 +264,10 @@ class LSTMParser {
     return correct_count;
   }
 
+  virtual void DoSave(eos::portable_oarchive& archive) override {
+    archive << *this;
+  }
+
 private:
   friend class boost::serialization::access;
 
diff --git a/parser/lstm-transition-tagger.cpp b/parser/lstm-transition-tagger.cpp
new file mode 100644
index 0000000..b647b12
--- /dev/null
+++ b/parser/lstm-transition-tagger.cpp
@@ -0,0 +1,124 @@
+#include "lstm-transition-tagger.h"
+
+#include <fstream>
+#include <string>
+#include <memory>
+
+#include "cnn/expr.h"
+#include "cnn/model.h"
+#include "eos/portable_archive.hpp"
+
+using namespace std;
+using namespace cnn;
+using namespace cnn::expr;
+
+namespace lstm_parser {
+
+
+void LSTMTransitionTagger::SaveModel(const string& model_fname,
+                                     bool softlink_created) {
+  ofstream out_file(model_fname);
+  eos::portable_oarchive archive(out_file);
+  DoSave(archive);
+  cerr << "Model saved." << endl;
+  // Create a soft link to the most recent model in order to make it
+  // easier to refer to it in a shell script.
+  if (!softlink_created) {
+    string softlink = "latest_model.params";
+
+    if (system((string("rm -f ") + softlink).c_str()) == 0
+        && system(("ln -s " + model_fname + " " + softlink).c_str()) == 0) {
+      cerr << "Created " << softlink << " as a soft link to " << model_fname
+           << " for convenience." << endl;
+    }
+  }
+}
+
+
+vector<unsigned> LSTMTransitionTagger::LogProbTagger(
+    const Sentence& sentence, const CorpusVocabulary& vocab,
+    ComputationGraph *cg, Expression* final_parser_state) {
+  Sentence::SentenceMap tsentence(sentence.words); // sentence w/ OOVs replaced
+  for (auto& index_and_id : tsentence) { // use reference to overwrite
+    if (!vocab.int_to_training_word[index_and_id.second]) {
+      index_and_id.second = vocab.kUNK;
+    }
+  }
+  return LogProbTagger(cg, sentence, tsentence, vector<unsigned>(),
+                       vocab.actions, vocab.int_to_words, nullptr,
+                       final_parser_state);
+}
+
+
+vector<unsigned> LSTMTransitionTagger::LogProbTagger(
+    ComputationGraph* cg,
+    const Sentence& raw_sent,  // raw sentence
+    const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+    const vector<unsigned>& correct_actions, const vector<string>& action_names,
+    const vector<string>& int_to_words, double* correct,
+    Expression* final_parser_state) {
+  assert(finalized);
+  vector<unsigned> results;
+  const bool build_training_graph = correct_actions.size() > 0;
+
+  // variables in the computation graph representing the parameters
+  for (Parameters *params : GetParameters()) {
+    param_expressions[params] = parameter(*cg, params);
+  }
+
+  unique_ptr<TaggerState> state(InitializeParserState(cg, raw_sent, sent,
+                                                      correct_actions,
+                                                      action_names));
+
+  vector<Expression> log_probs;
+  unsigned action_count = 0;  // incremented at each prediction
+  Expression p_t; // declared outside to allow access later
+  while (!ShouldTerminate(*state)) {
+    // Get list of possible actions for the current parser state.
+    vector<unsigned> current_valid_actions;
+    for (unsigned action = 0; action < action_names.size(); ++action) {
+      if (IsActionForbidden(action_names[action], *state))
+        continue;
+      current_valid_actions.push_back(action);
+    }
+
+    Expression r_t = GetActionProbabilities(*state);
+    // adist = log_softmax(r_t, current_valid_actions)
+    Expression adiste = log_softmax(r_t, current_valid_actions);
+    vector<float> adist = as_vector(cg->incremental_forward());
+    double best_score = adist[current_valid_actions[0]];
+    unsigned best_a = current_valid_actions[0];
+    for (unsigned i = 1; i < current_valid_actions.size(); ++i) {
+      if (adist[current_valid_actions[i]] > best_score) {
+        best_score = adist[current_valid_actions[i]];
+        best_a = current_valid_actions[i];
+      }
+    }
+    unsigned action = best_a;
+    // If we have reference actions (for training), use the reference action.
+    if (build_training_graph) {
+      action = correct_actions[action_count];
+      if (correct && best_a == action) {
+        (*correct)++;
+      }
+    }
+    ++action_count;
+    log_probs.push_back(pick(adiste, action));
+    results.push_back(action);
+
+    DoAction(action, action_names, state.get(), cg);
+  }
+
+  Expression tot_neglogprob = -sum(log_probs);
+  assert(tot_neglogprob.pg != nullptr);
+
+  if (final_parser_state) {
+    *final_parser_state = p_t;
+  }
+  param_expressions.clear();
+  return results;
+}
+
+
+
+} /* namespace lstm_parser */
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
new file mode 100644
index 0000000..e2de232
--- /dev/null
+++ b/parser/lstm-transition-tagger.h
@@ -0,0 +1,82 @@
+#ifndef LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_
+#define LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "cnn/expr.h"
+#include "cnn/model.h"
+#include "corpus.h"
+
+namespace eos {
+class portable_oarchive;
+}
+
+namespace lstm_parser {
+
+class LSTMTransitionTagger {
+public:
+  LSTMTransitionTagger() : finalized(false) {}
+  virtual ~LSTMTransitionTagger() {}
+
+protected:
+  struct TaggerState {};
+
+  bool finalized;
+  std::map<cnn::ParametersBase*, cnn::expr::Expression> param_expressions;
+
+  inline cnn::expr::Expression GetParamExpr(cnn::ParametersBase* params) {
+    return param_expressions.at(params);
+  }
+
+  virtual std::vector<cnn::Parameters*> GetParameters() = 0;
+
+  virtual TaggerState* InitializeParserState(
+      cnn::ComputationGraph* hg, const Sentence& raw_sent,
+      const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+      const std::vector<unsigned>& correct_actions,
+      const std::vector<std::string>& action_names) = 0;
+
+  virtual cnn::expr::Expression GetActionProbabilities(
+      const TaggerState& state) = 0;
+
+  virtual bool ShouldTerminate(const TaggerState& state) = 0;
+
+  virtual bool IsActionForbidden(const std::string& action_name,
+                                 const TaggerState& state) = 0;
+
+  virtual void DoAction(unsigned action,
+                        const std::vector<std::string>& action_names,
+                        TaggerState* state, cnn::ComputationGraph* cg) = 0;
+
+  virtual void DoSave(eos::portable_oarchive& archive) = 0;
+
+  void SaveModel(const std::string& model_fname, bool softlink_created);
+
+  // Used for testing. Replaces OOV with UNK.
+  std::vector<unsigned> LogProbTagger(
+      const Sentence& sentence, const CorpusVocabulary& vocab,
+      cnn::ComputationGraph *cg,
+      cnn::expr::Expression* final_parser_state = nullptr);
+
+  // *** if correct_actions is empty, this runs greedy decoding ***
+  // returns parse actions for input sentence (in training just returns the
+  // reference)
+  // OOV handling: raw_sent will have the actual words
+  //               sent will have words replaced by appropriate UNK tokens
+  // this lets us use pretrained embeddings, when available, for words that were
+  // OOV in the parser training data.
+  std::vector<unsigned> LogProbTagger(
+      cnn::ComputationGraph* hg,
+      const Sentence& sentence, // raw sentence
+      const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+      const std::vector<unsigned>& correct_actions,
+      const std::vector<std::string>& action_names,
+      const std::vector<std::string>& int_to_words, double* correct,
+      cnn::expr::Expression* final_parser_state = nullptr);
+};
+
+} /* namespace lstm_parser */
+
+#endif /* LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_ */

From ee8966ef7cc725f15827e5799c343e50e6f0b6d5 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 14:11:03 -0500
Subject: [PATCH 27/88] Fixed handling of -w flag

---
 parser/lstm-parser-driver.cc | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/parser/lstm-parser-driver.cc b/parser/lstm-parser-driver.cc
index 6ed78fa..ddd7eba 100644
--- a/parser/lstm-parser-driver.cc
+++ b/parser/lstm-parser-driver.cc
@@ -120,11 +120,6 @@ int main(int argc, char** argv) {
     abort();
   }
   if (train && !load_model) {
-    if (!conf.count("words")) {
-      cerr << "Can't train without word vectors! Please provide --words."
-           << endl;
-      abort();
-    }
     if (!conf.count("training_data")) {
       cerr << "Can't train without training data! Please provide"
               " --training_data" << endl;
@@ -132,7 +127,8 @@ int main(int argc, char** argv) {
     }
   }
 
-  const string words = load_model ? "" : conf["words"].as<string>();
+  const string words =
+      load_model || !conf.count("words") ? "" : conf["words"].as<string>();
   unique_ptr<LSTMParser> parser;
   if (load_model) {
     parser.reset(new LSTMParser(conf["model"].as<string>()));

From f8c2aca8c1bbed07991600c29b3513c9f7c4d096 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 14:18:13 -0500
Subject: [PATCH 28/88] Lint

---
 parser/lstm-parser.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 0976f22..7ae9ddb 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -168,7 +168,8 @@ class LSTMParser : LSTMTransitionTagger {
     auto t_end = std::chrono::high_resolution_clock::now();
     auto ms_passed =
         std::chrono::duration<double, std::milli>(t_end - t_start).count();
-    std::cerr << "done. (Loading took " << ms_passed << " milliseconds.)" << std::endl;
+    std::cerr << "done. (Loading took " << ms_passed << " milliseconds.)"
+              << std::endl;
   }
 
 

From 1b49452e0efe411f86e339e0543ef5164a06519a Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 14:47:16 -0500
Subject: [PATCH 29/88] A bit of rearranging within the inheritance hierarchy

---
 parser/lstm-parser.cc             | 10 +++-------
 parser/lstm-parser.h              | 31 ++++++++++++++-----------------
 parser/lstm-transition-tagger.cpp |  8 ++++++++
 parser/lstm-transition-tagger.h   | 21 ++++++++++++++++-----
 4 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 764fbea..82418ce 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -64,10 +64,7 @@ void LSTMParser::LoadPretrainedWords(const string& words_path) {
 }
 
 
-void LSTMParser::FinalizeVocab() {
-  if (finalized)
-    return;
-
+void LSTMParser::InitializeNetworkParameters() {
   // Now that the vocab is ready to be finalized, we can set all the network
   // parameters.
   unsigned action_size = vocab.CountActions() + 1;
@@ -112,8 +109,6 @@ void LSTMParser::FinalizeVocab() {
     p_p = nullptr;
     p_p2l = nullptr;
   }
-
-  finalized = true;
 }
 
 
@@ -141,7 +136,8 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
 }
 
 
-bool LSTMParser::IsActionForbidden(const string& a, const TaggerState& state) {
+bool LSTMParser::IsActionForbidden(const string& a,
+                                   const TaggerState& state) const {
   const ParserState& real_state = static_cast<const ParserState&>(state);
   unsigned ssize = real_state.stack.size();
   unsigned bsize = real_state.buffer.size();
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 7ae9ddb..3c9db7e 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -113,12 +113,9 @@ class ParseTree {
 };
 
 
-class LSTMParser : LSTMTransitionTagger {
+class LSTMParser : public LSTMTransitionTagger {
 public:
-  // TODO: make some of these members non-public
   ParserOptions options;
-  CorpusVocabulary vocab;
-  cnn::Model model;
 
   std::unordered_map<unsigned, std::vector<float>> pretrained;
   unsigned n_possible_actions;
@@ -179,16 +176,6 @@ class LSTMParser : LSTMTransitionTagger {
     *archive >> *this;
   }
 
-  virtual bool IsActionForbidden(const std::string& a,
-                                 const TaggerState& state) override;
-
-  virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
-      override;
-
-  virtual void DoAction(unsigned action,
-                        const std::vector<std::string>& action_names,
-                        TaggerState* state, cnn::ComputationGraph* cg) override;
-
   ParseTree Parse(const Sentence& sentence,
                   const CorpusVocabulary& vocab, bool labeled);
 
@@ -215,8 +202,6 @@ class LSTMParser : LSTMTransitionTagger {
 
   void LoadPretrainedWords(const std::string& words_path);
 
-  void FinalizeVocab();
-
 protected:
   struct ParserState : public TaggerState {
     std::vector<cnn::expr::Expression> buffer;
@@ -248,11 +233,23 @@ class LSTMParser : LSTMTransitionTagger {
       const std::vector<unsigned>& correct_actions,
       const std::vector<std::string>& action_names) override;
 
-  virtual bool ShouldTerminate(const TaggerState& state) override {
+  virtual void InitializeNetworkParameters() override;
+
+  virtual bool ShouldTerminate(const TaggerState& state) const override {
     const ParserState& real_state = static_cast<const ParserState&>(state);
     return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
   }
 
+  virtual bool IsActionForbidden(const std::string& a,
+                                 const TaggerState& state) const override;
+
+  virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
+      override;
+
+  virtual void DoAction(unsigned action,
+                        const std::vector<std::string>& action_names,
+                        TaggerState* state, cnn::ComputationGraph* cg) override;
+
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
     assert(ref.sentence.Size() == hyp.sentence.Size());
diff --git a/parser/lstm-transition-tagger.cpp b/parser/lstm-transition-tagger.cpp
index b647b12..e04142b 100644
--- a/parser/lstm-transition-tagger.cpp
+++ b/parser/lstm-transition-tagger.cpp
@@ -35,6 +35,14 @@ void LSTMTransitionTagger::SaveModel(const string& model_fname,
 }
 
 
+void LSTMTransitionTagger::FinalizeVocab() {
+  if (finalized)
+    return;
+  InitializeNetworkParameters();
+  finalized = true;
+}
+
+
 vector<unsigned> LSTMTransitionTagger::LogProbTagger(
     const Sentence& sentence, const CorpusVocabulary& vocab,
     ComputationGraph *cg, Expression* final_parser_state) {
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
index e2de232..bade12f 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/lstm-transition-tagger.h
@@ -17,15 +17,25 @@ namespace lstm_parser {
 
 class LSTMTransitionTagger {
 public:
+  // TODO: this really shouldn't be public...
+  CorpusVocabulary vocab;
+
   LSTMTransitionTagger() : finalized(false) {}
   virtual ~LSTMTransitionTagger() {}
 
+  void FinalizeVocab();
+
 protected:
   struct TaggerState {};
 
   bool finalized;
   std::map<cnn::ParametersBase*, cnn::expr::Expression> param_expressions;
 
+  cnn::Model model;
+
+  LSTMTransitionTagger(const CorpusVocabulary& vocab)
+      : vocab(vocab), finalized(false) {}
+
   inline cnn::expr::Expression GetParamExpr(cnn::ParametersBase* params) {
     return param_expressions.at(params);
   }
@@ -41,10 +51,10 @@ class LSTMTransitionTagger {
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
 
-  virtual bool ShouldTerminate(const TaggerState& state) = 0;
+  virtual bool ShouldTerminate(const TaggerState& state) const = 0;
 
   virtual bool IsActionForbidden(const std::string& action_name,
-                                 const TaggerState& state) = 0;
+                                 const TaggerState& state) const = 0;
 
   virtual void DoAction(unsigned action,
                         const std::vector<std::string>& action_names,
@@ -52,6 +62,8 @@ class LSTMTransitionTagger {
 
   virtual void DoSave(eos::portable_oarchive& archive) = 0;
 
+  virtual void InitializeNetworkParameters() = 0;
+
   void SaveModel(const std::string& model_fname, bool softlink_created);
 
   // Used for testing. Replaces OOV with UNK.
@@ -61,12 +73,11 @@ class LSTMTransitionTagger {
       cnn::expr::Expression* final_parser_state = nullptr);
 
   // *** if correct_actions is empty, this runs greedy decoding ***
-  // returns parse actions for input sentence (in training just returns the
-  // reference)
+  // returns actions for input sentence (in training just returns the reference)
   // OOV handling: raw_sent will have the actual words
   //               sent will have words replaced by appropriate UNK tokens
   // this lets us use pretrained embeddings, when available, for words that were
-  // OOV in the parser training data.
+  // OOV in the training data.
   std::vector<unsigned> LogProbTagger(
       cnn::ComputationGraph* hg,
       const Sentence& sentence, // raw sentence

From 818eac8c035727916c1fa117317c4da58c7e9fb8 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 15:25:58 -0500
Subject: [PATCH 30/88] Updated ShouldTerminate interface

---
 parser/lstm-parser.h              | 4 +++-
 parser/lstm-transition-tagger.cpp | 2 +-
 parser/lstm-transition-tagger.h   | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 3c9db7e..b598452 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -235,7 +235,9 @@ class LSTMParser : public LSTMTransitionTagger {
 
   virtual void InitializeNetworkParameters() override;
 
-  virtual bool ShouldTerminate(const TaggerState& state) const override {
+  virtual bool ShouldTerminate(
+      const TaggerState& state, const Sentence& raw_sent,
+      const Sentence::SentenceMap& sent) const override {
     const ParserState& real_state = static_cast<const ParserState&>(state);
     return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
   }
diff --git a/parser/lstm-transition-tagger.cpp b/parser/lstm-transition-tagger.cpp
index e04142b..978846b 100644
--- a/parser/lstm-transition-tagger.cpp
+++ b/parser/lstm-transition-tagger.cpp
@@ -81,7 +81,7 @@ vector<unsigned> LSTMTransitionTagger::LogProbTagger(
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
   Expression p_t; // declared outside to allow access later
-  while (!ShouldTerminate(*state)) {
+  while (!ShouldTerminate(*state, raw_sent, sent)) {
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;
     for (unsigned action = 0; action < action_names.size(); ++action) {
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
index bade12f..0870123 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/lstm-transition-tagger.h
@@ -51,7 +51,9 @@ class LSTMTransitionTagger {
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
 
-  virtual bool ShouldTerminate(const TaggerState& state) const = 0;
+  virtual bool ShouldTerminate(const TaggerState& state,
+                               const Sentence& raw_sent,
+                               const Sentence::SentenceMap& sent) const = 0;
 
   virtual bool IsActionForbidden(const std::string& action_name,
                                  const TaggerState& state) const = 0;

From 2030e2057b3a8305c9c7e2feeebaadef4e3257f4 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 17:35:55 -0500
Subject: [PATCH 31/88] Minor API and code cleanup

---
 parser/lstm-parser.cc             | 24 ++++++++++++------------
 parser/lstm-parser.h              |  3 ++-
 parser/lstm-transition-tagger.cpp |  2 +-
 parser/lstm-transition-tagger.h   |  6 ++----
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 82418ce..fd1045e 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -136,22 +136,24 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
 }
 
 
-bool LSTMParser::IsActionForbidden(const string& a,
+bool LSTMParser::IsActionForbidden(const unsigned action,
+                                   const vector<string>& action_names,
                                    const TaggerState& state) const {
+  const string& action_name = action_names[action];
   const ParserState& real_state = static_cast<const ParserState&>(state);
   unsigned ssize = real_state.stack.size();
   unsigned bsize = real_state.buffer.size();
 
-  if (a[1] == 'W' && ssize < 3)
+  if (action_name[1] == 'W' && ssize < 3)
     return true;
-  if (a[1] == 'W') {
+  if (action_name[1] == 'W') {
     int top = real_state.stacki[real_state.stacki.size() - 1];
     int sec = real_state.stacki[real_state.stacki.size() - 2];
     if (sec > top)
       return true;
   }
 
-  bool is_shift = (a[0] == 'S' && a[1] == 'H');
+  bool is_shift = (action_name[0] == 'S' && action_name[1] == 'H');
   bool is_reduce = !is_shift;
   if (is_shift && bsize == 1)
     return true;
@@ -162,7 +164,7 @@ bool LSTMParser::IsActionForbidden(const string& a,
       is_shift)
     return true;
   // only attach left to ROOT
-  if (bsize == 1 && ssize == 3 && a[0] == 'R')
+  if (bsize == 1 && ssize == 3 && action_name[0] == 'R')
     return true;
   return false;
 }
@@ -221,8 +223,7 @@ ParseTree LSTMParser::RecoverParseTree(
 }
 
 
-cnn::expr::Expression LSTMParser::GetActionProbabilities(
-      const TaggerState& state) {
+Expression LSTMParser::GetActionProbabilities(const TaggerState& state) {
   // p_t = pbias + S * slstm + B * blstm + A * alstm
   Expression p_t = affine_transform(
       {GetParamExpr(p_pbias), GetParamExpr(p_S), stack_lstm.back(),
@@ -236,8 +237,7 @@ cnn::expr::Expression LSTMParser::GetActionProbabilities(
 }
 
 
-void LSTMParser::DoAction(unsigned action,
-                          const vector<string>& action_names,
+void LSTMParser::DoAction(unsigned action, const vector<string>& action_names,
                           TaggerState* state, ComputationGraph* cg) {
   ParserState* real_state = static_cast<ParserState*>(state);
   // add current action to action LSTM
@@ -315,11 +315,11 @@ void LSTMParser::DoAction(unsigned action,
 
 
 LSTMTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
-    cnn::ComputationGraph* cg,
+    ComputationGraph* cg,
     const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const std::vector<unsigned>& correct_actions,
-    const std::vector<std::string>& action_names) {
+    const vector<unsigned>& correct_actions,
+    const vector<string>& action_names) {
   stack_lstm.new_graph(*cg);
   buffer_lstm.new_graph(*cg);
   action_lstm.new_graph(*cg);
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index b598452..9b9d731 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -242,7 +242,8 @@ class LSTMParser : public LSTMTransitionTagger {
     return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
   }
 
-  virtual bool IsActionForbidden(const std::string& a,
+  virtual bool IsActionForbidden(const unsigned action,
+                                 const std::vector<std::string>& action_names,
                                  const TaggerState& state) const override;
 
   virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
diff --git a/parser/lstm-transition-tagger.cpp b/parser/lstm-transition-tagger.cpp
index 978846b..c5f9cc9 100644
--- a/parser/lstm-transition-tagger.cpp
+++ b/parser/lstm-transition-tagger.cpp
@@ -85,7 +85,7 @@ vector<unsigned> LSTMTransitionTagger::LogProbTagger(
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;
     for (unsigned action = 0; action < action_names.size(); ++action) {
-      if (IsActionForbidden(action_names[action], *state))
+      if (IsActionForbidden(action, action_names, *state))
         continue;
       current_valid_actions.push_back(action);
     }
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
index 0870123..3af1b7b 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/lstm-transition-tagger.h
@@ -33,9 +33,6 @@ class LSTMTransitionTagger {
 
   cnn::Model model;
 
-  LSTMTransitionTagger(const CorpusVocabulary& vocab)
-      : vocab(vocab), finalized(false) {}
-
   inline cnn::expr::Expression GetParamExpr(cnn::ParametersBase* params) {
     return param_expressions.at(params);
   }
@@ -55,7 +52,8 @@ class LSTMTransitionTagger {
                                const Sentence& raw_sent,
                                const Sentence::SentenceMap& sent) const = 0;
 
-  virtual bool IsActionForbidden(const std::string& action_name,
+  virtual bool IsActionForbidden(const unsigned action,
+                                 const std::vector<std::string>& action_names,
                                  const TaggerState& state) const = 0;
 
   virtual void DoAction(unsigned action,

From 751d3afd62c87ee60a48a2e008d3810ac9af33ad Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 19:05:43 -0500
Subject: [PATCH 32/88] Better code structure for unknown word replacement

---
 parser/lstm-transition-tagger.cpp | 26 +++++++++++++++++---------
 parser/lstm-transition-tagger.h   | 20 ++++++++++++--------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/parser/lstm-transition-tagger.cpp b/parser/lstm-transition-tagger.cpp
index c5f9cc9..b8706d5 100644
--- a/parser/lstm-transition-tagger.cpp
+++ b/parser/lstm-transition-tagger.cpp
@@ -42,19 +42,27 @@ void LSTMTransitionTagger::FinalizeVocab() {
   finalized = true;
 }
 
-
-vector<unsigned> LSTMTransitionTagger::LogProbTagger(
-    const Sentence& sentence, const CorpusVocabulary& vocab,
-    ComputationGraph *cg, Expression* final_parser_state) {
-  Sentence::SentenceMap tsentence(sentence.words); // sentence w/ OOVs replaced
-  for (auto& index_and_id : tsentence) { // use reference to overwrite
+Sentence::SentenceMap LSTMTransitionTagger::ReplaceUnknowns(
+    const Sentence& sentence, const CorpusVocabulary& vocab) {
+  Sentence::SentenceMap tsentence(sentence.words);  // sentence w/ OOVs replaced
+  for (auto& index_and_id : tsentence) {
+    // use reference to overwrite
     if (!vocab.int_to_training_word[index_and_id.second]) {
       index_and_id.second = vocab.kUNK;
     }
   }
-  return LogProbTagger(cg, sentence, tsentence, vector<unsigned>(),
-                       vocab.actions, vocab.int_to_words, nullptr,
-                       final_parser_state);
+  return tsentence;
+}
+
+vector<unsigned> LSTMTransitionTagger::LogProbTagger(
+    const Sentence& sentence, const CorpusVocabulary& vocab,
+    ComputationGraph *cg, bool replace_unknowns,
+    Expression* final_parser_state) {
+  return LogProbTagger(
+      cg, sentence,
+      replace_unknowns ? ReplaceUnknowns(sentence, vocab) : sentence.words,
+      vector<unsigned>(), vocab.actions, vocab.int_to_words, nullptr,
+      final_parser_state);
 }
 
 
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
index 3af1b7b..12df1ee 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/lstm-transition-tagger.h
@@ -25,15 +25,22 @@ class LSTMTransitionTagger {
 
   void FinalizeVocab();
 
+  // Used for testing. Replaces OOV with UNK.
+  std::vector<unsigned> LogProbTagger(
+      const Sentence& sentence, const CorpusVocabulary& vocab,
+      cnn::ComputationGraph *cg,
+      bool replace_unknowns = true,
+      cnn::expr::Expression* final_parser_state = nullptr);
+
 protected:
   struct TaggerState {};
 
   bool finalized;
-  std::map<cnn::ParametersBase*, cnn::expr::Expression> param_expressions;
+  std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;
 
   cnn::Model model;
 
-  inline cnn::expr::Expression GetParamExpr(cnn::ParametersBase* params) {
+  inline cnn::expr::Expression GetParamExpr(cnn::Parameters* params) {
     return param_expressions.at(params);
   }
 
@@ -66,12 +73,6 @@ class LSTMTransitionTagger {
 
   void SaveModel(const std::string& model_fname, bool softlink_created);
 
-  // Used for testing. Replaces OOV with UNK.
-  std::vector<unsigned> LogProbTagger(
-      const Sentence& sentence, const CorpusVocabulary& vocab,
-      cnn::ComputationGraph *cg,
-      cnn::expr::Expression* final_parser_state = nullptr);
-
   // *** if correct_actions is empty, this runs greedy decoding ***
   // returns actions for input sentence (in training just returns the reference)
   // OOV handling: raw_sent will have the actual words
@@ -86,6 +87,9 @@ class LSTMTransitionTagger {
       const std::vector<std::string>& action_names,
       const std::vector<std::string>& int_to_words, double* correct,
       cnn::expr::Expression* final_parser_state = nullptr);
+
+  Sentence::SentenceMap ReplaceUnknowns(const Sentence& sentence,
+                                        const CorpusVocabulary& vocab);
 };
 
 } /* namespace lstm_parser */

From 543eb3833b55f90c7eb798185403e36a485d57fc Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 1 Mar 2017 19:15:01 -0500
Subject: [PATCH 33/88] Made vocab not a public member

---
 parser/lstm-parser-driver.cc    | 11 ++++++-----
 parser/lstm-transition-tagger.h |  8 ++++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/parser/lstm-parser-driver.cc b/parser/lstm-parser-driver.cc
index ddd7eba..09a6fb3 100644
--- a/parser/lstm-parser-driver.cc
+++ b/parser/lstm-parser-driver.cc
@@ -153,7 +153,7 @@ int main(int argc, char** argv) {
     }
 
     signal(SIGINT, signal_callback_handler);
-    ParserTrainingCorpus training_corpus(&parser->vocab,
+    ParserTrainingCorpus training_corpus(parser->GetVocab(),
                                          conf["training_data"].as<string>(),
                                          true);
     parser->FinalizeVocab();
@@ -161,8 +161,8 @@ int main(int argc, char** argv) {
          << endl;
     // OOV words will be replaced by UNK tokens
     dev_corpus.reset(
-        new ParserTrainingCorpus(&parser->vocab, conf["dev_data"].as<string>(),
-                                 false));
+        new ParserTrainingCorpus(parser->GetVocab(),
+                                 conf["dev_data"].as<string>(), false));
 
     ostringstream os;
     os << "parser_" << (parser->options.use_pos ? "pos" : "nopos")
@@ -190,7 +190,7 @@ int main(int argc, char** argv) {
     cerr << "Evaluating model on " << conf["dev_data"].as<string>() << endl;
     if (!train) { // Didn't already load dev corpus for training
       dev_corpus.reset(
-          new ParserTrainingCorpus(&parser->vocab,
+          new ParserTrainingCorpus(parser->GetVocab(),
                                    conf["dev_data"].as<string>(), false));
     }
     parser->Evaluate(*dev_corpus);
@@ -213,7 +213,8 @@ int main(int argc, char** argv) {
            << endl;
       abort();
     }
-    Corpus test_corpus(&parser->vocab, *reader, conf["test_data"].as<string>());
+    Corpus test_corpus(parser->GetVocab(), *reader,
+                       conf["test_data"].as<string>());
     parser->Test(test_corpus);
   }
 }
diff --git a/parser/lstm-transition-tagger.h b/parser/lstm-transition-tagger.h
index 12df1ee..e6ec5c0 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/lstm-transition-tagger.h
@@ -17,8 +17,6 @@ namespace lstm_parser {
 
 class LSTMTransitionTagger {
 public:
-  // TODO: this really shouldn't be public...
-  CorpusVocabulary vocab;
 
   LSTMTransitionTagger() : finalized(false) {}
   virtual ~LSTMTransitionTagger() {}
@@ -32,6 +30,11 @@ class LSTMTransitionTagger {
       bool replace_unknowns = true,
       cnn::expr::Expression* final_parser_state = nullptr);
 
+  const lstm_parser::CorpusVocabulary& GetVocab() const { return vocab; }
+
+  // TODO: arrange things such that we don't need to expose this?
+  lstm_parser::CorpusVocabulary* GetVocab() { return &vocab; }
+
 protected:
   struct TaggerState {};
 
@@ -39,6 +42,7 @@ class LSTMTransitionTagger {
   std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;
 
   cnn::Model model;
+  CorpusVocabulary vocab;
 
   inline cnn::expr::Expression GetParamExpr(cnn::Parameters* params) {
     return param_expressions.at(params);

From 61142afbb4500fbb3273caf6fc0274301e0d3fd4 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 5 Mar 2017 18:08:36 -0500
Subject: [PATCH 34/88] Renamed to neural transition tagger; cleaned up API a
 bit

Particularly regarding tagger state objects
---
 parser/CMakeLists.txt                         |  2 +-
 parser/lstm-parser.cc                         | 13 +++-------
 parser/lstm-parser.h                          | 17 ++++++++-----
 ...agger.cpp => neural-transition-tagger.cpp} | 14 +++++------
 ...on-tagger.h => neural-transition-tagger.h} | 25 ++++++++++---------
 5 files changed, 36 insertions(+), 35 deletions(-)
 rename parser/{lstm-transition-tagger.cpp => neural-transition-tagger.cpp} (91%)
 rename parser/{lstm-transition-tagger.h => neural-transition-tagger.h} (81%)

diff --git a/parser/CMakeLists.txt b/parser/CMakeLists.txt
index 1217810..0077cab 100644
--- a/parser/CMakeLists.txt
+++ b/parser/CMakeLists.txt
@@ -1,7 +1,7 @@
 PROJECT(lstm-parser:parser)
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
-ADD_LIBRARY(lstm-parser-core lstm-parser.cc corpus.cc lstm-transition-tagger.cpp)
+ADD_LIBRARY(lstm-parser-core lstm-parser.cc corpus.cc neural-transition-tagger.cpp)
 target_link_libraries(lstm-parser-core cnn ${Boost_LIBRARIES})
 
 ADD_EXECUTABLE(lstm-parse lstm-parser-driver.cc)
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index fd1045e..cd52ccf 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -314,7 +314,7 @@ void LSTMParser::DoAction(unsigned action, const vector<string>& action_names,
 }
 
 
-LSTMTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
+NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
     ComputationGraph* cg,
     const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
@@ -327,15 +327,10 @@ LSTMTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
   buffer_lstm.start_new_sequence();
   action_lstm.start_new_sequence();
 
+  Expression stack_guard = GetParamExpr(p_stack_guard);
+  ParserState* state = new ParserState(raw_sent, sent, stack_guard);
   action_lstm.add_input(GetParamExpr(p_action_start));
-
-  ParserState* state = new ParserState;
-  state->buffer.resize(raw_sent.Size() + 1);
-  state->bufferi.resize(raw_sent.Size() + 1);
-  state->stack.push_back(parameter(*cg, p_stack_guard));
-  state->stacki.push_back(-999);
-  // drive dummy symbol on stack through LSTM
-  stack_lstm.add_input(state->stack.back());
+  stack_lstm.add_input(stack_guard);
 
   // precompute buffer representation from left to right
   unsigned added_to_buffer = 0;
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 9b9d731..7e082a5 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -20,7 +20,7 @@
 #include "cnn/rnn.h"
 #include "corpus.h"
 #include "eos/portable_archive.hpp"
-#include "lstm-transition-tagger.h"
+#include "neural-transition-tagger.h"
 
 
 namespace lstm_parser {
@@ -113,7 +113,7 @@ class ParseTree {
 };
 
 
-class LSTMParser : public LSTMTransitionTagger {
+class LSTMParser : public NeuralTransitionTagger {
 public:
   ParserOptions options;
 
@@ -209,6 +209,12 @@ class LSTMParser : public LSTMTransitionTagger {
     std::vector<cnn::expr::Expression> stack;  // subtree embeddings
     std::vector<int> stacki; // word position in sentence of head of subtree
 
+    ParserState(const Sentence& raw_sentence,
+                const Sentence::SentenceMap& sentence, Expression stack_guard)
+        : TaggerState {raw_sentence, sentence}, buffer(raw_sentence.Size() + 1),
+          bufferi(raw_sentence.Size() + 1), stack( {stack_guard}),
+          stacki( {-999}) {}
+
     ~ParserState() {
       assert(stack.size() == 2); // guard symbol, root
       assert(stacki.size() == 2);
@@ -219,7 +225,8 @@ class LSTMParser : public LSTMTransitionTagger {
 
   virtual std::vector<cnn::Parameters*> GetParameters() override {
     std::vector<cnn::Parameters*> all_params {p_pbias, p_H, p_D, p_R, p_cbias,
-        p_S, p_B, p_A, p_ib, p_w2l, p_p2a, p_abias, p_action_start};
+        p_S, p_B, p_A, p_ib, p_w2l, p_p2a, p_abias, p_action_start,
+        p_stack_guard};
     if (options.use_pos)
       all_params.push_back(p_p2l);
     if (p_t2l)
@@ -235,9 +242,7 @@ class LSTMParser : public LSTMTransitionTagger {
 
   virtual void InitializeNetworkParameters() override;
 
-  virtual bool ShouldTerminate(
-      const TaggerState& state, const Sentence& raw_sent,
-      const Sentence::SentenceMap& sent) const override {
+  virtual bool ShouldTerminate(const TaggerState& state) const override {
     const ParserState& real_state = static_cast<const ParserState&>(state);
     return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
   }
diff --git a/parser/lstm-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
similarity index 91%
rename from parser/lstm-transition-tagger.cpp
rename to parser/neural-transition-tagger.cpp
index b8706d5..39af31f 100644
--- a/parser/lstm-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -1,4 +1,4 @@
-#include "lstm-transition-tagger.h"
+#include "neural-transition-tagger.h"
 
 #include <fstream>
 #include <string>
@@ -15,7 +15,7 @@ using namespace cnn::expr;
 namespace lstm_parser {
 
 
-void LSTMTransitionTagger::SaveModel(const string& model_fname,
+void NeuralTransitionTagger::SaveModel(const string& model_fname,
                                      bool softlink_created) {
   ofstream out_file(model_fname);
   eos::portable_oarchive archive(out_file);
@@ -35,14 +35,14 @@ void LSTMTransitionTagger::SaveModel(const string& model_fname,
 }
 
 
-void LSTMTransitionTagger::FinalizeVocab() {
+void NeuralTransitionTagger::FinalizeVocab() {
   if (finalized)
     return;
   InitializeNetworkParameters();
   finalized = true;
 }
 
-Sentence::SentenceMap LSTMTransitionTagger::ReplaceUnknowns(
+Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
     const Sentence& sentence, const CorpusVocabulary& vocab) {
   Sentence::SentenceMap tsentence(sentence.words);  // sentence w/ OOVs replaced
   for (auto& index_and_id : tsentence) {
@@ -54,7 +54,7 @@ Sentence::SentenceMap LSTMTransitionTagger::ReplaceUnknowns(
   return tsentence;
 }
 
-vector<unsigned> LSTMTransitionTagger::LogProbTagger(
+vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     const Sentence& sentence, const CorpusVocabulary& vocab,
     ComputationGraph *cg, bool replace_unknowns,
     Expression* final_parser_state) {
@@ -66,7 +66,7 @@ vector<unsigned> LSTMTransitionTagger::LogProbTagger(
 }
 
 
-vector<unsigned> LSTMTransitionTagger::LogProbTagger(
+vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     ComputationGraph* cg,
     const Sentence& raw_sent,  // raw sentence
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
@@ -89,7 +89,7 @@ vector<unsigned> LSTMTransitionTagger::LogProbTagger(
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
   Expression p_t; // declared outside to allow access later
-  while (!ShouldTerminate(*state, raw_sent, sent)) {
+  while (!ShouldTerminate(*state)) {
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;
     for (unsigned action = 0; action < action_names.size(); ++action) {
diff --git a/parser/lstm-transition-tagger.h b/parser/neural-transition-tagger.h
similarity index 81%
rename from parser/lstm-transition-tagger.h
rename to parser/neural-transition-tagger.h
index e6ec5c0..f6d08ff 100644
--- a/parser/lstm-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -1,5 +1,5 @@
-#ifndef LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_
-#define LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_
+#ifndef LSTM_PARSER_PARSER_NEURAL_TRANSITION_TAGGER_H_
+#define LSTM_PARSER_PARSER_NEURAL_TRANSITION_TAGGER_H_
 
 #include <map>
 #include <string>
@@ -15,11 +15,11 @@ class portable_oarchive;
 
 namespace lstm_parser {
 
-class LSTMTransitionTagger {
+class NeuralTransitionTagger {
 public:
 
-  LSTMTransitionTagger() : finalized(false) {}
-  virtual ~LSTMTransitionTagger() {}
+  NeuralTransitionTagger() : finalized(false) {}
+  virtual ~NeuralTransitionTagger() {}
 
   void FinalizeVocab();
 
@@ -30,13 +30,16 @@ class LSTMTransitionTagger {
       bool replace_unknowns = true,
       cnn::expr::Expression* final_parser_state = nullptr);
 
-  const lstm_parser::CorpusVocabulary& GetVocab() const { return vocab; }
+  const CorpusVocabulary& GetVocab() const { return vocab; }
 
   // TODO: arrange things such that we don't need to expose this?
-  lstm_parser::CorpusVocabulary* GetVocab() { return &vocab; }
+  CorpusVocabulary* GetVocab() { return &vocab; }
 
 protected:
-  struct TaggerState {};
+  struct TaggerState {
+    const Sentence& raw_sentence;
+    const Sentence::SentenceMap& sentence;
+  };
 
   bool finalized;
   std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;
@@ -59,9 +62,7 @@ class LSTMTransitionTagger {
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
 
-  virtual bool ShouldTerminate(const TaggerState& state,
-                               const Sentence& raw_sent,
-                               const Sentence::SentenceMap& sent) const = 0;
+  virtual bool ShouldTerminate(const TaggerState& state) const = 0;
 
   virtual bool IsActionForbidden(const unsigned action,
                                  const std::vector<std::string>& action_names,
@@ -98,4 +99,4 @@ class LSTMTransitionTagger {
 
 } /* namespace lstm_parser */
 
-#endif /* LSTM_PARSER_PARSER_LSTM_TRANSITION_TAGGER_H_ */
+#endif /* LSTM_PARSER_PARSER_NEURAL_TRANSITION_TAGGER_H_ */

From 788f7c39b38960ce87d49297c45d53a8834d05f2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 5 Mar 2017 19:02:11 -0500
Subject: [PATCH 35/88] Formatting

---
 parser/corpus.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 5a3c7a8..54aa2f8 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -53,8 +53,8 @@ class CorpusVocabulary {
       words_to_int(other.words_to_int), int_to_words(other.int_to_words),
       int_to_training_word(other.int_to_training_word),
       pos_to_int(other.pos_to_int), int_to_pos(other.int_to_pos),
-      chars_to_int(other.chars_to_int), int_to_chars(
-          other.int_to_chars), kUNK(other.kUNK) {}
+      chars_to_int(other.chars_to_int), int_to_chars(other.int_to_chars),
+      kUNK(other.kUNK) {}
 
   inline unsigned CountPOS() { return pos_to_int.size(); }
   inline unsigned CountWords() { return words_to_int.size(); }

From 1d518e32f515d462edbd0326c9f0faa80bd7458a Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 6 Mar 2017 13:13:42 -0500
Subject: [PATCH 36/88] Got rid of dumb corpus copy constructor

---
 parser/corpus.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 54aa2f8..a905ba5 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -46,16 +46,6 @@ class CorpusVocabulary {
     AddEntry(BAD0, &chars_to_int, &int_to_chars);
   }
 
-  // Copy constructor: Copy everything except action-related stuff, on the
-  // assumption that we're copying the vocabulary for use in another task with
-  // different actions.
-  CorpusVocabulary(const CorpusVocabulary& other) :
-      words_to_int(other.words_to_int), int_to_words(other.int_to_words),
-      int_to_training_word(other.int_to_training_word),
-      pos_to_int(other.pos_to_int), int_to_pos(other.int_to_pos),
-      chars_to_int(other.chars_to_int), int_to_chars(other.int_to_chars),
-      kUNK(other.kUNK) {}
-
   inline unsigned CountPOS() { return pos_to_int.size(); }
   inline unsigned CountWords() { return words_to_int.size(); }
   inline unsigned CountChars() { return chars_to_int.size(); }

From 14636cb4aace1944b8ade145c9d39489fbe45de1 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 7 Mar 2017 15:08:33 -0500
Subject: [PATCH 37/88] Attempted to marginally reduce memory usage

---
 parser/neural-transition-tagger.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 39af31f..7ce4870 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -39,6 +39,13 @@ void NeuralTransitionTagger::FinalizeVocab() {
   if (finalized)
     return;
   InitializeNetworkParameters();
+  // Give up memory we don't need.
+  vocab.actions.shrink_to_fit();
+  vocab.actions_to_arc_labels.shrink_to_fit();
+  vocab.int_to_chars.shrink_to_fit();
+  vocab.int_to_pos.shrink_to_fit();
+  vocab.int_to_training_word.shrink_to_fit();
+  vocab.int_to_words.shrink_to_fit();
   finalized = true;
 }
 

From 90076a8b6b5b6bb18c13dcfa5fcc84f7742b2f3e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 7 Mar 2017 15:29:36 -0500
Subject: [PATCH 38/88] Changed CNN build to always optimize, even for debug
 builds

---
 cnn/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cnn/CMakeLists.txt b/cnn/CMakeLists.txt
index e8408b4..58173ae 100644
--- a/cnn/CMakeLists.txt
+++ b/cnn/CMakeLists.txt
@@ -1,6 +1,10 @@
 project(cnn)
 cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 
+if(NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_BUILD_TYPE RelWithDebInfo)
+endif(NOT CMAKE_BUILD_TYPE)
+
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 
 # CNN uses Eigen which exploits modern CPU architectures. To get the

From 069189e98d91ced9fe9f07366d3410406c26d7be Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 8 Mar 2017 17:07:06 -0500
Subject: [PATCH 39/88] Made it easier to print Sentence objects

---
 parser/corpus.cc |  4 ++--
 parser/corpus.h  | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 2ab2442..85acb24 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -49,7 +49,7 @@ void ConllUCorpusReader::ReadSentences(const string& file,
         current_sentence_pos[Corpus::ROOT_TOKEN_ID] = root_pos_symbol;
         current_sentence_unk_surface_forms[Corpus::ROOT_TOKEN_ID] = "";
 
-        corpus->sentences.push_back({});
+        corpus->sentences.emplace_back(*corpus->vocab);
         corpus->sentences.back().words.swap(current_sentence);
         corpus->sentences.back().poses.swap(current_sentence_pos);
         corpus->sentences.back().unk_surface_forms.swap(
@@ -183,7 +183,7 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
     Sentence::SentenceMap* sentence_pos,
     Sentence::SentenceUnkMap* sentence_unk_surface_forms, bool final) const {
   // Store the sentence variables and clear them for the next sentence.
-  corpus->sentences.push_back({});
+  corpus->sentences.emplace_back(*corpus->vocab);
   Sentence* sentence = &corpus->sentences.back();
   sentence->words.swap(*words);
   sentence->poses.swap(*sentence_pos);
diff --git a/parser/corpus.h b/parser/corpus.h
index a905ba5..4caaa9a 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -10,6 +10,7 @@
 #include <map>
 #include <set>
 #include <string>
+#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -188,19 +189,48 @@ class ConllUCorpusReader : public CorpusReader {
 };
 
 
-struct Sentence {
+class Sentence;
+inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence);
+
+class Sentence {
+public:
   typedef std::map<unsigned, unsigned> SentenceMap;
   typedef std::map<unsigned, std::string> SentenceUnkMap;
 
+  Sentence(const CorpusVocabulary& vocab) : vocab(vocab) {}
+
   SentenceMap words;
   SentenceMap poses;
   SentenceUnkMap unk_surface_forms;
+  const CorpusVocabulary& vocab;
 
   size_t Size() const {
     return words.size();
   }
+
+  std::string AsString() const {
+    std::ostringstream oss;
+    oss << *this;
+    return oss.str();
+  }
 };
 
+inline std::ostream& operator<<(std::ostream& os, const Sentence&sentence) {
+  for (auto &index_and_word_id : sentence.words) {
+    unsigned index = index_and_word_id.first;
+    unsigned word_id = index_and_word_id.second;
+    unsigned pos_id = sentence.poses.at(index);
+    auto unk_iter = sentence.unk_surface_forms.find(index);
+    os << (unk_iter == sentence.unk_surface_forms.end() ?
+            sentence.vocab.int_to_words.at(word_id) : unk_iter->second)
+       << '/' << sentence.vocab.int_to_pos.at(pos_id);
+    if (index != sentence.words.rend()->first) {
+      os << ' ';
+    }
+  }
+  return os;
+}
+
 
 class Corpus {
 public:

From bcfe82d8819766bbd1c02d2cdb15338c76493e9e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 8 Mar 2017 18:00:32 -0500
Subject: [PATCH 40/88] Fixed nasty bug with handling of completely unknown
 words

---
 parser/neural-transition-tagger.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 7ce4870..150dc47 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -54,7 +54,8 @@ Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
   Sentence::SentenceMap tsentence(sentence.words);  // sentence w/ OOVs replaced
   for (auto& index_and_id : tsentence) {
     // use reference to overwrite
-    if (!vocab.int_to_training_word[index_and_id.second]) {
+    if (index_and_id.second >= vocab.int_to_training_word.size()
+        || !vocab.int_to_training_word[index_and_id.second]) {
       index_and_id.second = vocab.kUNK;
     }
   }

From ae363f45de4675750dacac0aafdfc597d4e725c0 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 9 Mar 2017 13:29:23 -0500
Subject: [PATCH 41/88] More sensible storage for correct actions in corpus
 reading

---
 parser/corpus.cc | 25 ++++++++++++++-----------
 parser/corpus.h  |  5 +++--
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 85acb24..acd99a5 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -155,17 +155,18 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordWord(
 
 
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
-    const string& action, TrainingCorpus* corpus) const {
+    const string& action, TrainingCorpus* corpus,
+    vector<unsigned>* correct_actions) const {
   CorpusVocabulary* vocab = corpus->vocab;
   auto action_iter = find(vocab->actions.begin(), vocab->actions.end(), action);
   if (action_iter != vocab->actions.end()) {
     unsigned action_index = distance(vocab->actions.begin(), action_iter);
-    corpus->correct_act_sent.back().push_back(action_index);
+    correct_actions->push_back(action_index);
   } else { // A not-previously-seen action
     if (is_training) {
       vocab->actions.push_back(action);
       unsigned action_index = vocab->actions.size() - 1;
-      corpus->correct_act_sent.back().push_back(action_index);
+      correct_actions->push_back(action_index);
       vocab->actions_to_arc_labels.push_back(vocab->GetLabelForAction(action));
     } else {
       // TODO: right now, new actions which haven't been observed in
@@ -181,19 +182,21 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
 void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
     TrainingCorpus* corpus, Sentence::SentenceMap* words,
     Sentence::SentenceMap* sentence_pos,
-    Sentence::SentenceUnkMap* sentence_unk_surface_forms, bool final) const {
+    Sentence::SentenceUnkMap* sentence_unk_surface_forms,
+    vector<unsigned>* correct_actions) const {
   // Store the sentence variables and clear them for the next sentence.
   corpus->sentences.emplace_back(*corpus->vocab);
   Sentence* sentence = &corpus->sentences.back();
   sentence->words.swap(*words);
   sentence->poses.swap(*sentence_pos);
+  corpus->correct_act_sent.push_back({});
+  corpus->correct_act_sent.back().swap(*correct_actions);
+
   if (!is_training) {
     sentence->unk_surface_forms.swap(*sentence_unk_surface_forms);
   }
 
-  if (!final) {
-    corpus->correct_act_sent.push_back({});
-  }
+  assert(corpus->correct_act_sent.size() == corpus->sentences.size());
 }
 
 
@@ -218,7 +221,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   Sentence::SentenceMap sentence;
   Sentence::SentenceMap sentence_pos;
   Sentence::SentenceUnkMap sentence_unk_surface_forms;
-  corpus->correct_act_sent.push_back({});
+  vector<unsigned> correct_actions;
 
   // We'll need to make sure ROOT token has a consistent ID.
   // (Should get inlined; defined here for DRY purposes.)
@@ -251,7 +254,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
       if (!first) { // if first, first line is blank, but no sentence yet
         FixRootID();
         RecordSentence(corpus, &sentence, &sentence_pos,
-                       &sentence_unk_surface_forms);
+                       &sentence_unk_surface_forms, &correct_actions);
       }
       start_of_sentence = true;
       continue; // don't update next_is_action_line
@@ -300,7 +303,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
         } while (iss);
       }
     } else { // next_is_action_line
-      RecordAction(line, corpus);
+      RecordAction(line, corpus, &correct_actions);
       start_of_sentence = false;
     }
 
@@ -311,7 +314,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
   if (sentence.size() > 0) {
     FixRootID();
     RecordSentence(corpus, &sentence, &sentence_pos,
-                   &sentence_unk_surface_forms, true);
+                   &sentence_unk_surface_forms, &correct_actions);
   }
 
   actions_file.close();
diff --git a/parser/corpus.h b/parser/corpus.h
index 4caaa9a..88f3e04 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -286,12 +286,13 @@ class TrainingCorpus : public Corpus {
         Sentence::SentenceMap* sentence_pos,
         Sentence::SentenceUnkMap* sentence_unk_surface_forms) const;
 
-    void RecordAction(const std::string& action, TrainingCorpus* corpus) const;
+    void RecordAction(const std::string& action, TrainingCorpus* corpus,
+                      std::vector<unsigned>* correct_actions) const;
 
     void RecordSentence(TrainingCorpus* corpus, Sentence::SentenceMap* words,
                         Sentence::SentenceMap* sentence_pos,
                         Sentence::SentenceUnkMap* sentence_unk_surface_forms,
-                        bool final = false) const;
+                        std::vector<unsigned>* correct_actions) const;
 
     static inline unsigned UTF8Len(unsigned char x) {
       if (x < 0x80) return 1;

From 21279da470f310a694288c8715948efd007aaacc Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 9 Mar 2017 13:37:45 -0500
Subject: [PATCH 42/88] Minor memory optimizations

---
 parser/corpus.cc | 2 ++
 parser/corpus.h  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index acd99a5..be682d8 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -82,6 +82,8 @@ void ConllUCorpusReader::ReadSentences(const string& file,
     current_sentence[token_index] = word_id;
     current_sentence_pos[token_index] = corpus->vocab->GetPOS(pos);
   }
+
+  corpus->sentences.shrink_to_fit();
 }
 
 
diff --git a/parser/corpus.h b/parser/corpus.h
index 88f3e04..2123738 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -333,6 +333,8 @@ class ParserTrainingCorpus : public TrainingCorpus {
       ParserTrainingCorpus* training_corpus =
           static_cast<ParserTrainingCorpus*>(corpus);
       LoadCorrectActions(file, training_corpus);
+      training_corpus->sentences.shrink_to_fit();
+      training_corpus->correct_act_sent.shrink_to_fit();
     }
 
     virtual ~OracleParseTransitionsReader() {};

From ff988c3c1b6f9e08c8283d7cad9a7208f69d6227 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 9 Mar 2017 14:27:32 -0500
Subject: [PATCH 43/88] Added assertion for too many training actions

---
 parser/neural-transition-tagger.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 150dc47..d629a5d 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -121,6 +121,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     unsigned action = best_a;
     // If we have reference actions (for training), use the reference action.
     if (build_training_graph) {
+      assert(action_count < correct_actions.size());
       action = correct_actions[action_count];
       if (correct && best_a == action) {
         (*correct)++;

From e2f4ceebd3e866c6a352df47d2d9bd8e5ec605aa Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 15 Mar 2017 20:33:54 -0400
Subject: [PATCH 44/88] Minor logging change

---
 parser/lstm-parser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 7e082a5..aa07aed 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -153,7 +153,7 @@ class LSTMParser : public NeuralTransitionTagger {
 
   explicit LSTMParser(const std::string& model_path) :
       kROOT_SYMBOL(vocab.GetOrAddWord(vocab.ROOT)) {
-    std::cerr << "Loading model from " << model_path << "...";
+    std::cerr << "Loading parser model from " << model_path << "...";
     auto t_start = std::chrono::high_resolution_clock::now();
     std::ifstream model_file(model_path.c_str(), std::ios::binary);
     if (!model_file) {

From b2cec9baad400f2581cc273fbbb86a72d3709add Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 13:13:23 -0400
Subject: [PATCH 45/88] Simplified RecoverParseTree

---
 parser/lstm-parser.cc | 20 +++++++-------------
 parser/lstm-parser.h  |  7 ++-----
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index cd52ccf..9183110 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -171,9 +171,8 @@ bool LSTMParser::IsActionForbidden(const unsigned action,
 
 
 ParseTree LSTMParser::RecoverParseTree(
-    const Sentence& sentence, const vector<unsigned>& actions,
-    const vector<string>& action_names,
-    const vector<string>& actions_to_arc_labels, double logprob, bool labeled) {
+    const Sentence& sentence, const vector<unsigned>& actions, double logprob,
+    bool labeled) {
   ParseTree tree(sentence, labeled);
   vector<int> bufferi(sentence.Size() + 1);
   bufferi[0] = -999;
@@ -185,7 +184,7 @@ ParseTree LSTMParser::RecoverParseTree(
         index_and_word_id.first;
   }
   for (auto action : actions) { // loop over transitions for sentence
-    const string& action_string = action_names[action];
+    const string& action_string = vocab.actions[action];
     const char ac = action_string[0];
     const char ac2 = action_string[1];
     if (ac == 'S' && ac2 == 'H') {  // SHIFT
@@ -212,7 +211,7 @@ ParseTree LSTMParser::RecoverParseTree(
       (ac == 'R' ? headi : depi) = stacki.back();
       stacki.pop_back();
       stacki.push_back(headi);
-      tree.SetParent(depi, headi, actions_to_arc_labels[action]);
+      tree.SetParent(depi, headi, vocab.actions_to_arc_labels[action]);
     }
   }
   assert(bufferi.size() == 1);
@@ -465,9 +464,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         llh += hyp.logprob;
 
         const vector<unsigned>& actions = dev_corpus.correct_act_sent[sii];
-        ParseTree ref = RecoverParseTree(
-            sentence, actions, dev_corpus.vocab->actions,
-            dev_corpus.vocab->actions_to_arc_labels);
+        ParseTree ref = RecoverParseTree(sentence, actions);
 
         trs += actions.size();
         correct_heads += ComputeCorrect(ref, hyp);
@@ -497,8 +494,7 @@ ParseTree LSTMParser::Parse(const Sentence& sentence,
   ComputationGraph cg;
   vector<unsigned> pred = LogProbTagger(sentence, vocab, &cg);
   double lp = as_scalar(cg.incremental_forward());
-  return RecoverParseTree(sentence, pred, vocab.actions,
-                          vocab.actions_to_arc_labels, labeled, lp);
+  return RecoverParseTree(sentence, pred, labeled, lp);
 }
 
 
@@ -530,9 +526,7 @@ void LSTMParser::DoTest(const Corpus& corpus, bool evaluate,
       const ParserTrainingCorpus& training_corpus =
           static_cast<const ParserTrainingCorpus&>(corpus);
       const vector<unsigned>& actions = training_corpus.correct_act_sent[sii];
-      ParseTree ref = RecoverParseTree(sentence, actions, corpus.vocab->actions,
-                                       corpus.vocab->actions_to_arc_labels,
-                                       true);
+      ParseTree ref = RecoverParseTree(sentence, actions, true);
       trs += actions.size();
       llh += hyp.logprob;
       correct_heads += ComputeCorrect(ref, hyp);
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index aa07aed..4bceb3a 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -181,11 +181,8 @@ class LSTMParser : public NeuralTransitionTagger {
 
   // take a vector of actions and return a parse tree
   ParseTree RecoverParseTree(
-      const Sentence& sentence,
-      const std::vector<unsigned>& actions,
-      const std::vector<std::string>& action_names,
-      const std::vector<std::string>& actions_to_arc_labels, double logprob = 0,
-      bool labeled = false);
+      const Sentence& sentence, const std::vector<unsigned>& actions,
+      double logprob = 0, bool labeled = false);
 
   void Train(const ParserTrainingCorpus& corpus,
              const ParserTrainingCorpus& dev_corpus, const double unk_prob,

From a88be9de70f8cc6fa4166912e36374393412e5a0 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 13:15:24 -0400
Subject: [PATCH 46/88] Whitespace cleanup

---
 parser/lstm-parser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 4bceb3a..e3651f6 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -77,7 +77,7 @@ class ParseTree {
   ParseTree(const Sentence& sentence, bool labeled = true) :
       sentence(sentence),
       logprob(0),
-      arc_labels( labeled ? new std::map<unsigned, std::string> : nullptr) {}
+      arc_labels(labeled ? new std::map<unsigned, std::string> : nullptr) {}
 
   inline void SetParent(unsigned child_index, unsigned parent_index,
                       const std::string& arc_label="") {

From 4e269c8988237192e79bb18635b4ebe73e7b9e88 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 13:32:55 -0400
Subject: [PATCH 47/88] Added function to check if a parse tree is labeled

---
 parser/lstm-parser.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index e3651f6..7f9ad6a 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -82,7 +82,7 @@ class ParseTree {
   inline void SetParent(unsigned child_index, unsigned parent_index,
                       const std::string& arc_label="") {
     parents[child_index] = parent_index;
-    if (arc_labels) {
+    if (IsLabeled()) {
       (*arc_labels)[child_index] = arc_label;
     }
   }
@@ -97,7 +97,7 @@ class ParseTree {
   }
 
   const inline std::string& GetArcLabel(unsigned child) const {
-    if (!arc_labels)
+    if (!IsLabeled())
       return NO_LABEL;
     auto arc_label_iter = arc_labels->find(child);
     if (arc_label_iter == arc_labels->end()) {
@@ -107,6 +107,8 @@ class ParseTree {
     }
   }
 
+  bool IsLabeled() const { return arc_labels.get(); }
+
 private:
   std::map<unsigned, unsigned> parents;
   std::unique_ptr<std::map<unsigned, std::string>> arc_labels;

From f38c407d45c7a7ec306b5eacac307713c65a714f Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 13:45:43 -0400
Subject: [PATCH 48/88] Made ParseTree wrap sentence reference to allow move
 assignment

---
 parser/lstm-parser.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 7f9ad6a..fed01df 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -67,17 +67,16 @@ struct ParserOptions {
 };
 
 
+// Barebones representation of a parse tree.
 class ParseTree {
 public:
   static std::string NO_LABEL;
-  // Barebones representation of a parse tree.
-  const Sentence& sentence;
   double logprob;
 
   ParseTree(const Sentence& sentence, bool labeled = true) :
-      sentence(sentence),
       logprob(0),
-      arc_labels(labeled ? new std::map<unsigned, std::string> : nullptr) {}
+      arc_labels(labeled ? new std::map<unsigned, std::string> : nullptr),
+      sentence(sentence) {}
 
   inline void SetParent(unsigned child_index, unsigned parent_index,
                       const std::string& arc_label="") {
@@ -87,6 +86,10 @@ class ParseTree {
     }
   }
 
+  const Sentence& GetSentence() const {
+    return sentence.get();
+  }
+
   const inline unsigned GetParent(unsigned child) const {
     auto parent_iter = parents.find(child);
     if (parent_iter == parents.end()) {
@@ -112,6 +115,7 @@ class ParseTree {
 private:
   std::map<unsigned, unsigned> parents;
   std::unique_ptr<std::map<unsigned, std::string>> arc_labels;
+  std::reference_wrapper<const Sentence> sentence;
 };
 
 
@@ -259,9 +263,9 @@ class LSTMParser : public NeuralTransitionTagger {
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
-    assert(ref.sentence.Size() == hyp.sentence.Size());
+    assert(ref.GetSentence().Size() == hyp.GetSentence().Size());
     unsigned correct_count = 0;
-    for (const auto& token_index_and_word : ref.sentence.words) {
+    for (const auto& token_index_and_word : ref.GetSentence().words) {
       unsigned i = token_index_and_word.first;
       if (i != Corpus::ROOT_TOKEN_ID && ref.GetParent(i) == hyp.GetParent(i))
         ++correct_count;

From 481fdbaeb68c49223bc6c7048d080e8f01ac77f2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 14:01:53 -0400
Subject: [PATCH 49/88] Simplified tagger interface

Stopped stupidly passing around stuff that the tagger already has
access to via its own vocab object
---
 parser/corpus.cc                    | 12 ++++++------
 parser/corpus.h                     |  8 ++++----
 parser/lstm-parser.cc               | 20 ++++++++------------
 parser/lstm-parser.h                |  9 +++------
 parser/neural-transition-tagger.cpp | 24 ++++++++++--------------
 parser/neural-transition-tagger.h   | 19 ++++++-------------
 6 files changed, 37 insertions(+), 55 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index be682d8..6636824 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -160,14 +160,14 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordAction(
     const string& action, TrainingCorpus* corpus,
     vector<unsigned>* correct_actions) const {
   CorpusVocabulary* vocab = corpus->vocab;
-  auto action_iter = find(vocab->actions.begin(), vocab->actions.end(), action);
-  if (action_iter != vocab->actions.end()) {
-    unsigned action_index = distance(vocab->actions.begin(), action_iter);
+  auto action_iter = find(vocab->action_names.begin(), vocab->action_names.end(), action);
+  if (action_iter != vocab->action_names.end()) {
+    unsigned action_index = distance(vocab->action_names.begin(), action_iter);
     correct_actions->push_back(action_index);
   } else { // A not-previously-seen action
     if (is_training) {
-      vocab->actions.push_back(action);
-      unsigned action_index = vocab->actions.size() - 1;
+      vocab->action_names.push_back(action);
+      unsigned action_index = vocab->action_names.size() - 1;
       correct_actions->push_back(action_index);
       vocab->actions_to_arc_labels.push_back(vocab->GetLabelForAction(action));
     } else {
@@ -323,7 +323,7 @@ void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
 
   cerr << "done." << "\n";
   if (is_training) {
-    for (auto a : vocab->actions) {
+    for (auto a : vocab->action_names) {
       vocab->actions_to_arc_labels.push_back(vocab->GetLabelForAction(a));
       cerr << a << "\n";
     }
diff --git a/parser/corpus.h b/parser/corpus.h
index 2123738..ed239a3 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -36,7 +36,7 @@ class CorpusVocabulary {
   StrToIntMap chars_to_int;
   std::vector<std::string> int_to_chars;
 
-  std::vector<std::string> actions;
+  std::vector<std::string> action_names;
   std::vector<std::string> actions_to_arc_labels;
 
   unsigned kUNK;
@@ -50,7 +50,7 @@ class CorpusVocabulary {
   inline unsigned CountPOS() { return pos_to_int.size(); }
   inline unsigned CountWords() { return words_to_int.size(); }
   inline unsigned CountChars() { return chars_to_int.size(); }
-  inline unsigned CountActions() { return actions.size(); }
+  inline unsigned CountActions() { return action_names.size(); }
 
   inline unsigned GetWord(const std::string& word) const {
     auto word_iter = words_to_int.find(word);
@@ -118,7 +118,7 @@ class CorpusVocabulary {
     ar & vocab->int_to_pos;
     ar & vocab->int_to_chars;
     ar & vocab->int_to_training_word;
-    ar & vocab->actions;
+    ar & vocab->action_names;
   }
 
   template<class Archive>
@@ -152,7 +152,7 @@ class CorpusVocabulary {
       chars_to_int[int_to_chars[i]] = i;
 
     // ...and the arc labels.
-    for (const std::string& action : actions) {
+    for (const std::string& action : action_names) {
       actions_to_arc_labels.push_back(GetLabelForAction(action));
     }
   }
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 9183110..c5232f4 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -137,9 +137,8 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
 
 
 bool LSTMParser::IsActionForbidden(const unsigned action,
-                                   const vector<string>& action_names,
                                    const TaggerState& state) const {
-  const string& action_name = action_names[action];
+  const string& action_name = vocab.action_names[action];
   const ParserState& real_state = static_cast<const ParserState&>(state);
   unsigned ssize = real_state.stack.size();
   unsigned bsize = real_state.buffer.size();
@@ -184,7 +183,7 @@ ParseTree LSTMParser::RecoverParseTree(
         index_and_word_id.first;
   }
   for (auto action : actions) { // loop over transitions for sentence
-    const string& action_string = vocab.actions[action];
+    const string& action_string = vocab.action_names[action];
     const char ac = action_string[0];
     const char ac2 = action_string[1];
     if (ac == 'S' && ac2 == 'H') {  // SHIFT
@@ -236,8 +235,8 @@ Expression LSTMParser::GetActionProbabilities(const TaggerState& state) {
 }
 
 
-void LSTMParser::DoAction(unsigned action, const vector<string>& action_names,
-                          TaggerState* state, ComputationGraph* cg) {
+void LSTMParser::DoAction(unsigned action, TaggerState* state,
+                          ComputationGraph* cg) {
   ParserState* real_state = static_cast<ParserState*>(state);
   // add current action to action LSTM
   Expression action_e = lookup(*cg, p_a, action);
@@ -247,7 +246,7 @@ void LSTMParser::DoAction(unsigned action, const vector<string>& action_names,
   Expression relation = lookup(*cg, p_r, action);
 
   // do action
-  const string& action_string = action_names[action];
+  const string& action_string = vocab.action_names[action];
   const char ac = action_string[0];
   const char ac2 = action_string[1];
 
@@ -317,8 +316,7 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
     ComputationGraph* cg,
     const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const vector<unsigned>& correct_actions,
-    const vector<string>& action_names) {
+    const vector<unsigned>& correct_actions) {
   stack_lstm.new_graph(*cg);
   buffer_lstm.new_graph(*cg);
   action_lstm.new_graph(*cg);
@@ -423,9 +421,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
       }
       const vector<unsigned>& actions = corpus.correct_act_sent[order[si]];
       ComputationGraph hg;
-      LogProbTagger(&hg, sentence, tsentence, actions,
-                    corpus.vocab->actions, corpus.vocab->int_to_words,
-                    &correct);
+      LogProbTagger(&hg, sentence, tsentence, actions, &correct);
       double lp = as_scalar(hg.incremental_forward());
       if (lp < 0) {
         cerr << "Log prob < 0 on sentence " << order[si] << ": lp=" << lp
@@ -492,7 +488,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 ParseTree LSTMParser::Parse(const Sentence& sentence,
                             const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
-  vector<unsigned> pred = LogProbTagger(sentence, vocab, &cg);
+  vector<unsigned> pred = LogProbTagger(sentence, &cg);
   double lp = as_scalar(cg.incremental_forward());
   return RecoverParseTree(sentence, pred, labeled, lp);
 }
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index fed01df..867de4b 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -240,8 +240,7 @@ class LSTMParser : public NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* cg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      const std::vector<std::string>& action_names) override;
+      const std::vector<unsigned>& correct_actions) override;
 
   virtual void InitializeNetworkParameters() override;
 
@@ -251,15 +250,13 @@ class LSTMParser : public NeuralTransitionTagger {
   }
 
   virtual bool IsActionForbidden(const unsigned action,
-                                 const std::vector<std::string>& action_names,
                                  const TaggerState& state) const override;
 
   virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
       override;
 
-  virtual void DoAction(unsigned action,
-                        const std::vector<std::string>& action_names,
-                        TaggerState* state, cnn::ComputationGraph* cg) override;
+  virtual void DoAction(unsigned action, TaggerState* state,
+                        cnn::ComputationGraph* cg) override;
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index d629a5d..7a265ca 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -40,7 +40,7 @@ void NeuralTransitionTagger::FinalizeVocab() {
     return;
   InitializeNetworkParameters();
   // Give up memory we don't need.
-  vocab.actions.shrink_to_fit();
+  vocab.action_names.shrink_to_fit();
   vocab.actions_to_arc_labels.shrink_to_fit();
   vocab.int_to_chars.shrink_to_fit();
   vocab.int_to_pos.shrink_to_fit();
@@ -50,7 +50,7 @@ void NeuralTransitionTagger::FinalizeVocab() {
 }
 
 Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
-    const Sentence& sentence, const CorpusVocabulary& vocab) {
+    const Sentence& sentence) {
   Sentence::SentenceMap tsentence(sentence.words);  // sentence w/ OOVs replaced
   for (auto& index_and_id : tsentence) {
     // use reference to overwrite
@@ -63,14 +63,12 @@ Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
 }
 
 vector<unsigned> NeuralTransitionTagger::LogProbTagger(
-    const Sentence& sentence, const CorpusVocabulary& vocab,
-    ComputationGraph *cg, bool replace_unknowns,
+    const Sentence& sentence, ComputationGraph *cg, bool replace_unknowns,
     Expression* final_parser_state) {
   return LogProbTagger(
       cg, sentence,
-      replace_unknowns ? ReplaceUnknowns(sentence, vocab) : sentence.words,
-      vector<unsigned>(), vocab.actions, vocab.int_to_words, nullptr,
-      final_parser_state);
+      replace_unknowns ? ReplaceUnknowns(sentence) : sentence.words,
+      vector<unsigned>(), nullptr, final_parser_state);
 }
 
 
@@ -78,8 +76,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     ComputationGraph* cg,
     const Sentence& raw_sent,  // raw sentence
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const vector<unsigned>& correct_actions, const vector<string>& action_names,
-    const vector<string>& int_to_words, double* correct,
+    const vector<unsigned>& correct_actions, double* correct,
     Expression* final_parser_state) {
   assert(finalized);
   vector<unsigned> results;
@@ -91,8 +88,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
   }
 
   unique_ptr<TaggerState> state(InitializeParserState(cg, raw_sent, sent,
-                                                      correct_actions,
-                                                      action_names));
+                                                      correct_actions));
 
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
@@ -100,8 +96,8 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
   while (!ShouldTerminate(*state)) {
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;
-    for (unsigned action = 0; action < action_names.size(); ++action) {
-      if (IsActionForbidden(action, action_names, *state))
+    for (unsigned action = 0; action < vocab.action_names.size(); ++action) {
+      if (IsActionForbidden(action, *state))
         continue;
       current_valid_actions.push_back(action);
     }
@@ -131,7 +127,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     log_probs.push_back(pick(adiste, action));
     results.push_back(action);
 
-    DoAction(action, action_names, state.get(), cg);
+    DoAction(action, state.get(), cg);
   }
 
   Expression tot_neglogprob = -sum(log_probs);
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index f6d08ff..baac0b4 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -25,8 +25,7 @@ class NeuralTransitionTagger {
 
   // Used for testing. Replaces OOV with UNK.
   std::vector<unsigned> LogProbTagger(
-      const Sentence& sentence, const CorpusVocabulary& vocab,
-      cnn::ComputationGraph *cg,
+      const Sentence& sentence, cnn::ComputationGraph *cg,
       bool replace_unknowns = true,
       cnn::expr::Expression* final_parser_state = nullptr);
 
@@ -56,8 +55,7 @@ class NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* hg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      const std::vector<std::string>& action_names) = 0;
+      const std::vector<unsigned>& correct_actions) = 0;
 
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
@@ -65,12 +63,10 @@ class NeuralTransitionTagger {
   virtual bool ShouldTerminate(const TaggerState& state) const = 0;
 
   virtual bool IsActionForbidden(const unsigned action,
-                                 const std::vector<std::string>& action_names,
                                  const TaggerState& state) const = 0;
 
-  virtual void DoAction(unsigned action,
-                        const std::vector<std::string>& action_names,
-                        TaggerState* state, cnn::ComputationGraph* cg) = 0;
+  virtual void DoAction(unsigned action, TaggerState* state,
+                        cnn::ComputationGraph* cg) = 0;
 
   virtual void DoSave(eos::portable_oarchive& archive) = 0;
 
@@ -89,12 +85,9 @@ class NeuralTransitionTagger {
       const Sentence& sentence, // raw sentence
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
       const std::vector<unsigned>& correct_actions,
-      const std::vector<std::string>& action_names,
-      const std::vector<std::string>& int_to_words, double* correct,
-      cnn::expr::Expression* final_parser_state = nullptr);
+      double* correct, cnn::expr::Expression* final_parser_state = nullptr);
 
-  Sentence::SentenceMap ReplaceUnknowns(const Sentence& sentence,
-                                        const CorpusVocabulary& vocab);
+  Sentence::SentenceMap ReplaceUnknowns(const Sentence& sentence);
 };
 
 } /* namespace lstm_parser */

From 33f51821cd8b7a9550a217a7459a669150128c5f Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 16 Mar 2017 14:22:23 -0400
Subject: [PATCH 50/88] More constness

---
 parser/lstm-parser.cc | 2 +-
 parser/lstm-parser.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index c5232f4..1754d8d 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -171,7 +171,7 @@ bool LSTMParser::IsActionForbidden(const unsigned action,
 
 ParseTree LSTMParser::RecoverParseTree(
     const Sentence& sentence, const vector<unsigned>& actions, double logprob,
-    bool labeled) {
+    bool labeled) const {
   ParseTree tree(sentence, labeled);
   vector<int> bufferi(sentence.Size() + 1);
   bufferi[0] = -999;
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 867de4b..ad4376f 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -188,7 +188,7 @@ class LSTMParser : public NeuralTransitionTagger {
   // take a vector of actions and return a parse tree
   ParseTree RecoverParseTree(
       const Sentence& sentence, const std::vector<unsigned>& actions,
-      double logprob = 0, bool labeled = false);
+      double logprob = 0, bool labeled = false) const;
 
   void Train(const ParserTrainingCorpus& corpus,
              const ParserTrainingCorpus& dev_corpus, const double unk_prob,

From 81f9ef798a575fe5eba91ae782b7b8ece41c4ff4 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Fri, 17 Mar 2017 00:40:02 -0400
Subject: [PATCH 51/88] Added copy/move constructors to ParseTree

---
 parser/lstm-parser.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index ad4376f..ba54586 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -78,7 +78,17 @@ class ParseTree {
       arc_labels(labeled ? new std::map<unsigned, std::string> : nullptr),
       sentence(sentence) {}
 
-  inline void SetParent(unsigned child_index, unsigned parent_index,
+  ParseTree(const ParseTree& other)
+      : logprob(other.logprob), parents(other.parents),
+        arc_labels(other.IsLabeled() ?
+            new std::map<unsigned, std::string>(*other.arc_labels) : nullptr),
+        sentence(other.sentence) {}
+
+  ParseTree(ParseTree&& other) = default;
+
+  ParseTree& operator=(ParseTree&& other) = default;
+
+  void SetParent(unsigned child_index, unsigned parent_index,
                       const std::string& arc_label="") {
     parents[child_index] = parent_index;
     if (IsLabeled()) {
@@ -90,7 +100,7 @@ class ParseTree {
     return sentence.get();
   }
 
-  const inline unsigned GetParent(unsigned child) const {
+  const unsigned GetParent(unsigned child) const {
     auto parent_iter = parents.find(child);
     if (parent_iter == parents.end()) {
       return Corpus::ROOT_TOKEN_ID; // This is the best guess we've got.
@@ -99,7 +109,7 @@ class ParseTree {
     }
   }
 
-  const inline std::string& GetArcLabel(unsigned child) const {
+  const std::string& GetArcLabel(unsigned child) const {
     if (!IsLabeled())
       return NO_LABEL;
     auto arc_label_iter = arc_labels->find(child);
@@ -112,7 +122,7 @@ class ParseTree {
 
   bool IsLabeled() const { return arc_labels.get(); }
 
-private:
+protected:
   std::map<unsigned, unsigned> parents;
   std::unique_ptr<std::map<unsigned, std::string>> arc_labels;
   std::reference_wrapper<const Sentence> sentence;

From b360d62edc223f1a53e171fa1fcd2f044877b938 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Fri, 17 Mar 2017 19:19:05 -0400
Subject: [PATCH 52/88] Const-ness change

---
 parser/lstm-parser.cc | 2 +-
 parser/lstm-parser.h  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 1754d8d..7c9a202 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -23,7 +23,7 @@ using namespace std;
 namespace lstm_parser {
 
 
-string ParseTree::NO_LABEL = "ERROR";
+const string ParseTree::NO_LABEL("ERROR");
 
 
 void LSTMParser::LoadPretrainedWords(const string& words_path) {
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index ba54586..7374df8 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -70,7 +70,8 @@ struct ParserOptions {
 // Barebones representation of a parse tree.
 class ParseTree {
 public:
-  static std::string NO_LABEL;
+  static const std::string NO_LABEL;
+
   double logprob;
 
   ParseTree(const Sentence& sentence, bool labeled = true) :

From 3e8a27503f1f3249e73a663c7ceaf0f456a0c19c Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Fri, 17 Mar 2017 21:03:08 -0400
Subject: [PATCH 53/88] Added root child to ParseTree representation

---
 parser/lstm-parser.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 7374df8..fd8eac5 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -77,13 +77,13 @@ class ParseTree {
   ParseTree(const Sentence& sentence, bool labeled = true) :
       logprob(0),
       arc_labels(labeled ? new std::map<unsigned, std::string> : nullptr),
-      sentence(sentence) {}
+      sentence(sentence), root_child(-1) {}
 
   ParseTree(const ParseTree& other)
       : logprob(other.logprob), parents(other.parents),
         arc_labels(other.IsLabeled() ?
             new std::map<unsigned, std::string>(*other.arc_labels) : nullptr),
-        sentence(other.sentence) {}
+        sentence(other.sentence), root_child(-1) {}
 
   ParseTree(ParseTree&& other) = default;
 
@@ -95,6 +95,9 @@ class ParseTree {
     if (IsLabeled()) {
       (*arc_labels)[child_index] = arc_label;
     }
+    if (parent_index == Corpus::ROOT_TOKEN_ID) {
+      root_child = child_index;
+    }
   }
 
   const Sentence& GetSentence() const {
@@ -121,12 +124,15 @@ class ParseTree {
     }
   }
 
+  const unsigned GetRootChild() const { return root_child; }
+
   bool IsLabeled() const { return arc_labels.get(); }
 
 protected:
   std::map<unsigned, unsigned> parents;
   std::unique_ptr<std::map<unsigned, std::string>> arc_labels;
   std::reference_wrapper<const Sentence> sentence;
+  unsigned root_child;
 };
 
 

From 7599db133a6ec4af40ec6ab5426bab7fcf83509e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Fri, 17 Mar 2017 22:58:36 -0400
Subject: [PATCH 54/88] Relinquish CNN memory when replacing a model

---
 parser/corpus.cc                    |  2 +-
 parser/lstm-parser.cc               | 54 ++++++++++++++---------------
 parser/lstm-parser.h                | 12 +++----
 parser/neural-transition-tagger.cpp |  2 ++
 parser/neural-transition-tagger.h   |  6 ++--
 5 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/parser/corpus.cc b/parser/corpus.cc
index 6636824..d9291c9 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -204,7 +204,7 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
 
 void ParserTrainingCorpus::OracleParseTransitionsReader::LoadCorrectActions(
     const string& file, ParserTrainingCorpus* corpus) const {
-  cerr << "Loading " << (is_training ? "training" : "dev")
+  cerr << "Loading " << (is_training ? "training" : "dev/test")
        << " corpus from " << file << "..." << endl;
   ifstream actions_file(file);
   if (!actions_file) {
diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 7c9a202..f545f44 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -74,37 +74,37 @@ void LSTMParser::InitializeNetworkParameters() {
 
   if (!pretrained.empty()) {
     unsigned pretrained_dim = pretrained.begin()->second.size();
-    p_t = model.add_lookup_parameters(vocab_size, {pretrained_dim});
+    p_t = model->add_lookup_parameters(vocab_size, {pretrained_dim});
     for (const auto& it : pretrained)
       p_t->Initialize(it.first, it.second);
-    p_t2l = model.add_parameters({options.lstm_input_dim, pretrained_dim});
+    p_t2l = model->add_parameters({options.lstm_input_dim, pretrained_dim});
   } else {
     p_t = nullptr;
     p_t2l = nullptr;
   }
 
-  p_w = model.add_lookup_parameters(vocab_size, {options.input_dim});
-  p_a = model.add_lookup_parameters(action_size, {options.action_dim});
-  p_r = model.add_lookup_parameters(action_size, {options.rel_dim});
-  p_pbias = model.add_parameters({options.hidden_dim});
-  p_A = model.add_parameters({options.hidden_dim, options.hidden_dim});
-  p_B = model.add_parameters({options.hidden_dim, options.hidden_dim});
-  p_S = model.add_parameters({options.hidden_dim, options.hidden_dim});
-  p_H = model.add_parameters({options.lstm_input_dim, options.lstm_input_dim});
-  p_D = model.add_parameters({options.lstm_input_dim, options.lstm_input_dim});
-  p_R = model.add_parameters({options.lstm_input_dim, options.rel_dim});
-  p_w2l = model.add_parameters({options.lstm_input_dim, options.input_dim});
-  p_ib = model.add_parameters({options.lstm_input_dim});
-  p_cbias = model.add_parameters({options.lstm_input_dim});
-  p_p2a = model.add_parameters({action_size, options.hidden_dim});
-  p_action_start = model.add_parameters({options.action_dim});
-  p_abias = model.add_parameters({action_size});
-  p_buffer_guard = model.add_parameters({options.lstm_input_dim});
-  p_stack_guard = model.add_parameters({options.lstm_input_dim});
+  p_w = model->add_lookup_parameters(vocab_size, {options.input_dim});
+  p_a = model->add_lookup_parameters(action_size, {options.action_dim});
+  p_r = model->add_lookup_parameters(action_size, {options.rel_dim});
+  p_pbias = model->add_parameters({options.hidden_dim});
+  p_A = model->add_parameters({options.hidden_dim, options.hidden_dim});
+  p_B = model->add_parameters({options.hidden_dim, options.hidden_dim});
+  p_S = model->add_parameters({options.hidden_dim, options.hidden_dim});
+  p_H = model->add_parameters({options.lstm_input_dim, options.lstm_input_dim});
+  p_D = model->add_parameters({options.lstm_input_dim, options.lstm_input_dim});
+  p_R = model->add_parameters({options.lstm_input_dim, options.rel_dim});
+  p_w2l = model->add_parameters({options.lstm_input_dim, options.input_dim});
+  p_ib = model->add_parameters({options.lstm_input_dim});
+  p_cbias = model->add_parameters({options.lstm_input_dim});
+  p_p2a = model->add_parameters({action_size, options.hidden_dim});
+  p_action_start = model->add_parameters({options.action_dim});
+  p_abias = model->add_parameters({action_size});
+  p_buffer_guard = model->add_parameters({options.lstm_input_dim});
+  p_stack_guard = model->add_parameters({options.lstm_input_dim});
 
   if (options.use_pos) {
-    p_p = model.add_lookup_parameters(pos_size, {options.pos_dim});
-    p_p2l = model.add_parameters({options.lstm_input_dim, options.pos_dim});
+    p_p = model->add_lookup_parameters(pos_size, {options.pos_dim});
+    p_p2l = model->add_parameters({options.lstm_input_dim, options.pos_dim});
   } else {
     p_p = nullptr;
     p_p2l = nullptr;
@@ -117,11 +117,11 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
       options(poptions),
       kROOT_SYMBOL(vocab.GetOrAddWord(vocab.ROOT)),
       stack_lstm(options.layers, options.lstm_input_dim, options.hidden_dim,
-                 &model),
+                 model.get()),
       buffer_lstm(options.layers, options.lstm_input_dim, options.hidden_dim,
-                  &model),
+                  model.get()),
       action_lstm(options.layers, options.action_dim, options.hidden_dim,
-                  &model) {
+                  model.get()) {
   // First load words if needed before creating network parameters.
   // That will ensure that the vocab has the final number of words.
   if (!pretrained_words_path.empty()) {
@@ -373,8 +373,8 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
   bool softlink_created = false;
   int best_correct_heads = 0;
   unsigned status_every_i_iterations = 100;
-  SimpleSGDTrainer sgd(&model);
-  //MomentumSGDTrainer sgd(model);
+  SimpleSGDTrainer sgd(model.get());
+  //MomentumSGDTrainer sgd(model.get());
   sgd.eta_decay = 0.08;
   //sgd.eta_decay = 0.05;
   unsigned num_sentences = corpus.sentences.size();
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index fd8eac5..84e7285 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -299,7 +299,7 @@ class LSTMParser : public NeuralTransitionTagger {
     ar & options;
     ar & vocab;
     ar & pretrained;
-    ar & model;
+    ar & *model;
   }
 
   template<class Archive>
@@ -312,19 +312,19 @@ class LSTMParser : public NeuralTransitionTagger {
     ar & pretrained;
     // Don't finalize yet...we want to finalize once our model is initialized.
 
-    model = cnn::Model();
+    model.reset(new cnn::Model);
     // Reset the LSTMs *before* reading in the network model, to make sure the
     // model knows how big it's supposed to be.
     stack_lstm = cnn::LSTMBuilder(options.layers, options.lstm_input_dim,
-                                  options.hidden_dim, &model);
+                                  options.hidden_dim, model.get());
     buffer_lstm = cnn::LSTMBuilder(options.layers, options.lstm_input_dim,
-                                   options.hidden_dim, &model);
+                                   options.hidden_dim, model.get());
     action_lstm = cnn::LSTMBuilder(options.layers, options.action_dim,
-                                   options.hidden_dim, &model);
+                                   options.hidden_dim, model.get());
 
     FinalizeVocab(); // OK, now finalize. :)
 
-    ar & model;
+    ar & *model;
   }
   BOOST_SERIALIZATION_SPLIT_MEMBER();
 
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 7a265ca..b886c3b 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -38,6 +38,8 @@ void NeuralTransitionTagger::SaveModel(const string& model_fname,
 void NeuralTransitionTagger::FinalizeVocab() {
   if (finalized)
     return;
+  if (!model.get())
+    model.reset(new Model);
   InitializeNetworkParameters();
   // Give up memory we don't need.
   vocab.action_names.shrink_to_fit();
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index baac0b4..fd20ffa 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -17,8 +17,7 @@ namespace lstm_parser {
 
 class NeuralTransitionTagger {
 public:
-
-  NeuralTransitionTagger() : finalized(false) {}
+  NeuralTransitionTagger() : finalized(false), model(new cnn::Model) {}
   virtual ~NeuralTransitionTagger() {}
 
   void FinalizeVocab();
@@ -43,7 +42,8 @@ class NeuralTransitionTagger {
   bool finalized;
   std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;
 
-  cnn::Model model;
+  // Store the model as a smart ptr so we can call its destructor when needed.
+  std::unique_ptr<cnn::Model> model;
   CorpusVocabulary vocab;
 
   inline cnn::expr::Expression GetParamExpr(cnn::Parameters* params) {

From 415f0844d14ceb75214bc6b2e8297dd96d40c1f7 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 16:54:17 -0400
Subject: [PATCH 55/88] Fixed memory leak

---
 parser/lstm-parser.h              | 6 +++---
 parser/neural-transition-tagger.h | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 84e7285..684f0b0 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -231,9 +231,9 @@ class LSTMParser : public NeuralTransitionTagger {
 
     ParserState(const Sentence& raw_sentence,
                 const Sentence::SentenceMap& sentence, Expression stack_guard)
-        : TaggerState {raw_sentence, sentence}, buffer(raw_sentence.Size() + 1),
-          bufferi(raw_sentence.Size() + 1), stack( {stack_guard}),
-          stacki( {-999}) {}
+        : TaggerState(raw_sentence, sentence), buffer(raw_sentence.Size() + 1),
+          bufferi(raw_sentence.Size() + 1), stack({stack_guard}),
+          stacki({-999}) {}
 
     ~ParserState() {
       assert(stack.size() == 2); // guard symbol, root
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index fd20ffa..0ef5514 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -35,8 +35,12 @@ class NeuralTransitionTagger {
 
 protected:
   struct TaggerState {
+    TaggerState(const Sentence& raw_sentence,
+                const Sentence::SentenceMap& sentence)
+        : raw_sentence(raw_sentence), sentence(sentence) {}
     const Sentence& raw_sentence;
     const Sentence::SentenceMap& sentence;
+    virtual ~TaggerState() {}
   };
 
   bool finalized;

From 0be98ffbaebe1d0d0100116d23ffadf530742244 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 17:30:29 -0400
Subject: [PATCH 56/88] Patched memory leak

---
 cnn/cnn/tensor.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cnn/cnn/tensor.h b/cnn/cnn/tensor.h
index 0516fe7..599b97a 100644
--- a/cnn/cnn/tensor.h
+++ b/cnn/cnn/tensor.h
@@ -7,6 +7,7 @@
 #include "cnn/dim.h"
 #include "cnn/random.h"
 #include "cnn/aligned-mem-pool.h"
+#include "devices.h"
 
 #if HAVE_CUDA
 #include <cuda.h>
@@ -26,6 +27,7 @@ namespace cnn {
 #define EIGEN_BACKEND 1
 
 typedef float real;
+extern Device* default_device; // for allocating memory on a load
 
 struct Tensor {
   Tensor() = default;
@@ -160,8 +162,12 @@ struct Tensor {
     float* vc = static_cast<float*>(std::malloc(d.size() * sizeof(float)));
     ar & boost::serialization::make_array(vc, d.size());
     CUDA_CHECK(cudaMemcpyAsync(v, vc, d.size() * sizeof(float), cudaMemcpyHostToDevice));
+    free(vc);
 #else
-    v = static_cast<float*>(_mm_malloc(d.size() * sizeof(float), 32));
+    // UGLY HACK to avoid memory leak: node values and gradients don't get
+    // stored to disk; only parameters. So allocate memory for loading from the
+    // parameters pool.
+    v = static_cast<float*>(default_device->ps->allocate(d.size() * sizeof(float)));
     ar & boost::serialization::make_array(v, d.size());
 #endif
   }

From e25679a940c00c6b13ff06c295c0e9eb9c81a5e7 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 19:35:41 -0400
Subject: [PATCH 57/88] Fixed another memory leak

---
 cnn/cnn/model.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cnn/cnn/model.cc b/cnn/cnn/model.cc
index 4bd35d4..7179e4d 100644
--- a/cnn/cnn/model.cc
+++ b/cnn/cnn/model.cc
@@ -160,6 +160,7 @@ void LookupParameters::clear() {
 
 Model::~Model() {
   for (auto p : all_params) delete p;
+  default_device->mem->free(gradient_norm_scratch);
 }
 
 void Model::project_weights(float radius) {

From 5d2b8145e2670ff85551db04362f4c5e819064ce Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 21:29:49 -0400
Subject: [PATCH 58/88] Added WordForToken convenience function to Sentence

---
 parser/corpus.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/parser/corpus.h b/parser/corpus.h
index ed239a3..5eac736 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -213,6 +213,12 @@ class Sentence {
     oss << *this;
     return oss.str();
   }
+
+  const std::string& WordForToken(unsigned token_id) const {
+    unsigned word_id = words.at(token_id);
+    return word_id == vocab.kUNK ? unk_surface_forms.at(token_id)
+                                 : vocab.int_to_words[word_id];
+  }
 };
 
 inline std::ostream& operator<<(std::ostream& os, const Sentence&sentence) {

From 9f74cb09c303989a8fe7ef3bcafedff95b254e84 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 22:15:26 -0400
Subject: [PATCH 59/88] Made CNN initialization return the random seed

---
 cnn/cnn/init.cc | 4 +++-
 cnn/cnn/init.h  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cnn/cnn/init.cc b/cnn/cnn/init.cc
index 4e0a1a3..915a246 100644
--- a/cnn/cnn/init.cc
+++ b/cnn/cnn/init.cc
@@ -30,7 +30,7 @@ static void RemoveArgs(int& argc, char**& argv, int& argi, int n) {
   assert(argc >= 0);
 }
 
-void Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_parameters) {
+unsigned Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_parameters) {
   vector<Device*> gpudevices;
 #if HAVE_CUDA
   cerr << "[cnn] initializing CUDA\n";
@@ -88,6 +88,8 @@ void Initialize(int& argc, char**& argv, unsigned random_seed, bool shared_param
   kSCALAR_ONE = default_device->kSCALAR_ONE;
   kSCALAR_ZERO = default_device->kSCALAR_ZERO;
   cerr << "[cnn] memory allocation done.\n";
+
+  return random_seed;
 }
 
 void Cleanup() {
diff --git a/cnn/cnn/init.h b/cnn/cnn/init.h
index e9e8fef..80a4b28 100644
--- a/cnn/cnn/init.h
+++ b/cnn/cnn/init.h
@@ -3,7 +3,7 @@
 
 namespace cnn {
 
-void Initialize(int& argc, char**& argv, unsigned random_seed = 0, bool shared_parameters = false);
+unsigned Initialize(int& argc, char**& argv, unsigned random_seed = 0, bool shared_parameters = false);
 void Cleanup();
 
 } // namespace cnn

From e27f33027adaf74c17ee536283601304a66cf679 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 18 Mar 2017 22:59:53 -0400
Subject: [PATCH 60/88] A bit of cleanup

---
 parser/corpus.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 5eac736..c5a9d0a 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -10,7 +10,6 @@
 #include <map>
 #include <set>
 #include <string>
-#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -208,12 +207,6 @@ class Sentence {
     return words.size();
   }
 
-  std::string AsString() const {
-    std::ostringstream oss;
-    oss << *this;
-    return oss.str();
-  }
-
   const std::string& WordForToken(unsigned token_id) const {
     unsigned word_id = words.at(token_id);
     return word_id == vocab.kUNK ? unk_surface_forms.at(token_id)
@@ -221,7 +214,7 @@ class Sentence {
   }
 };
 
-inline std::ostream& operator<<(std::ostream& os, const Sentence&sentence) {
+inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence) {
   for (auto &index_and_word_id : sentence.words) {
     unsigned index = index_and_word_id.first;
     unsigned word_id = index_and_word_id.second;

From 5a2256753dcf2d52a223a5689da14b5beec004ce Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 14:06:00 -0400
Subject: [PATCH 61/88] Minor memory management improvement

---
 cnn/cnn/model.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cnn/cnn/model.h b/cnn/cnn/model.h
index 2e76194..ddb6258 100644
--- a/cnn/cnn/model.h
+++ b/cnn/cnn/model.h
@@ -103,6 +103,15 @@ struct LookupParameters : public ParametersBase {
 class Model {
  public:
   Model() : gradient_norm_scratch() {}
+  Model(const Model&) = delete;
+  Model(Model&& m) {
+    all_params = std::move(m.all_params);
+    lookup_params = std::move(m.lookup_params);
+    params = std::move(m.params);
+    // Free our scratch memory before claiming the other model's.
+    default_device->mem->free(gradient_norm_scratch);
+    gradient_norm_scratch = m.gradient_norm_scratch;
+  }
   ~Model();
   float gradient_l2_norm() const;
   void reset_gradient();

From 478e3ffe4a0212ae51857af2878d5d0b3aac48c5 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 23:01:21 -0400
Subject: [PATCH 62/88] Possible minor memory management improvement

---
 cnn/cnn/exec.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cnn/cnn/exec.cc b/cnn/cnn/exec.cc
index bc8b799..4005ad4 100644
--- a/cnn/cnn/exec.cc
+++ b/cnn/cnn/exec.cc
@@ -10,6 +10,7 @@ ExecutionEngine::~ExecutionEngine() {}
 
 void SimpleExecutionEngine::invalidate() {
   num_nodes_evaluated = 0;
+  fxs->free();
 }
 
 const Tensor& SimpleExecutionEngine::forward() { 

From 9c2be25a2369c49117ac7eb16f55657dfe1ced98 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 23:05:32 -0400
Subject: [PATCH 63/88] Made main LogProbTagger public

---
 parser/lstm-parser.cc             |  8 ++++----
 parser/neural-transition-tagger.h | 26 +++++++++++++-------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index f545f44..a76aa03 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -420,15 +420,15 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
         }
       }
       const vector<unsigned>& actions = corpus.correct_act_sent[order[si]];
-      ComputationGraph hg;
-      LogProbTagger(&hg, sentence, tsentence, actions, &correct);
-      double lp = as_scalar(hg.incremental_forward());
+      ComputationGraph cg;
+      LogProbTagger(&cg, sentence, tsentence, actions, &correct);
+      double lp = as_scalar(cg.incremental_forward());
       if (lp < 0) {
         cerr << "Log prob < 0 on sentence " << order[si] << ": lp=" << lp
              << endl;
         assert(lp >= 0.0);
       }
-      hg.backward();
+      cg.backward();
       sgd.update(1.0);
       llh += lp;
       ++si;
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 0ef5514..e927edf 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -28,6 +28,19 @@ class NeuralTransitionTagger {
       bool replace_unknowns = true,
       cnn::expr::Expression* final_parser_state = nullptr);
 
+  // *** if correct_actions is empty, this runs greedy decoding ***
+  // returns actions for input sentence (in training just returns the reference)
+  // OOV handling: raw_sent will have the actual words
+  //               sent will have words replaced by appropriate UNK tokens
+  // this lets us use pretrained embeddings, when available, for words that were
+  // OOV in the training data.
+  std::vector<unsigned> LogProbTagger(
+      cnn::ComputationGraph* cg,
+      const Sentence& sentence, // raw sentence
+      const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+      const std::vector<unsigned>& correct_actions,
+      double* correct, cnn::expr::Expression* final_parser_state = nullptr);
+
   const CorpusVocabulary& GetVocab() const { return vocab; }
 
   // TODO: arrange things such that we don't need to expose this?
@@ -78,19 +91,6 @@ class NeuralTransitionTagger {
 
   void SaveModel(const std::string& model_fname, bool softlink_created);
 
-  // *** if correct_actions is empty, this runs greedy decoding ***
-  // returns actions for input sentence (in training just returns the reference)
-  // OOV handling: raw_sent will have the actual words
-  //               sent will have words replaced by appropriate UNK tokens
-  // this lets us use pretrained embeddings, when available, for words that were
-  // OOV in the training data.
-  std::vector<unsigned> LogProbTagger(
-      cnn::ComputationGraph* hg,
-      const Sentence& sentence, // raw sentence
-      const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      double* correct, cnn::expr::Expression* final_parser_state = nullptr);
-
   Sentence::SentenceMap ReplaceUnknowns(const Sentence& sentence);
 };
 

From 4e470b9c29bcd4b3ef810c0b7ac348d5c911522f Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 23:09:44 -0400
Subject: [PATCH 64/88] Minor parameter order change

---
 parser/lstm-parser.cc               | 2 +-
 parser/neural-transition-tagger.cpp | 2 +-
 parser/neural-transition-tagger.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index a76aa03..97ba3a6 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -488,7 +488,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
 ParseTree LSTMParser::Parse(const Sentence& sentence,
                             const CorpusVocabulary& vocab, bool labeled) {
   ComputationGraph cg;
-  vector<unsigned> pred = LogProbTagger(sentence, &cg);
+  vector<unsigned> pred = LogProbTagger(&cg, sentence);
   double lp = as_scalar(cg.incremental_forward());
   return RecoverParseTree(sentence, pred, labeled, lp);
 }
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index b886c3b..3409ede 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -65,7 +65,7 @@ Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
 }
 
 vector<unsigned> NeuralTransitionTagger::LogProbTagger(
-    const Sentence& sentence, ComputationGraph *cg, bool replace_unknowns,
+    ComputationGraph *cg, const Sentence& sentence, bool replace_unknowns,
     Expression* final_parser_state) {
   return LogProbTagger(
       cg, sentence,
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index e927edf..b3781da 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -24,7 +24,7 @@ class NeuralTransitionTagger {
 
   // Used for testing. Replaces OOV with UNK.
   std::vector<unsigned> LogProbTagger(
-      const Sentence& sentence, cnn::ComputationGraph *cg,
+      cnn::ComputationGraph *cg, const Sentence& sentence,
       bool replace_unknowns = true,
       cnn::expr::Expression* final_parser_state = nullptr);
 

From 24d9e5cc4670d0c9d0739884e3c4577a9f4628b3 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 23:24:16 -0400
Subject: [PATCH 65/88] Sensible default params for LogProbTagger

---
 parser/neural-transition-tagger.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index b3781da..fd2f4a4 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -38,8 +38,9 @@ class NeuralTransitionTagger {
       cnn::ComputationGraph* cg,
       const Sentence& sentence, // raw sentence
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      double* correct, cnn::expr::Expression* final_parser_state = nullptr);
+      const std::vector<unsigned>& correct_actions = std::vector<unsigned>(),
+      double* correct = nullptr,
+      cnn::expr::Expression* final_parser_state = nullptr);
 
   const CorpusVocabulary& GetVocab() const { return vocab; }
 

From 88e1af0b3d202949104cdf6da387f8d522e4fdff Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 20 Mar 2017 23:36:10 -0400
Subject: [PATCH 66/88] Don't use reference actions in test, even if they're
 specified

---
 parser/lstm-parser.cc               |  2 +-
 parser/neural-transition-tagger.cpp | 15 +++------------
 parser/neural-transition-tagger.h   |  8 +++++++-
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 97ba3a6..095ac04 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -421,7 +421,7 @@ void LSTMParser::Train(const ParserTrainingCorpus& corpus,
       }
       const vector<unsigned>& actions = corpus.correct_act_sent[order[si]];
       ComputationGraph cg;
-      LogProbTagger(&cg, sentence, tsentence, actions, &correct);
+      LogProbTagger(&cg, sentence, tsentence, true, actions, &correct);
       double lp = as_scalar(cg.incremental_forward());
       if (lp < 0) {
         cerr << "Log prob < 0 on sentence " << order[si] << ": lp=" << lp
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 3409ede..487c078 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -64,25 +64,16 @@ Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
   return tsentence;
 }
 
-vector<unsigned> NeuralTransitionTagger::LogProbTagger(
-    ComputationGraph *cg, const Sentence& sentence, bool replace_unknowns,
-    Expression* final_parser_state) {
-  return LogProbTagger(
-      cg, sentence,
-      replace_unknowns ? ReplaceUnknowns(sentence) : sentence.words,
-      vector<unsigned>(), nullptr, final_parser_state);
-}
-
 
 vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     ComputationGraph* cg,
     const Sentence& raw_sent,  // raw sentence
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+    bool training,
     const vector<unsigned>& correct_actions, double* correct,
     Expression* final_parser_state) {
   assert(finalized);
   vector<unsigned> results;
-  const bool build_training_graph = correct_actions.size() > 0;
 
   // variables in the computation graph representing the parameters
   for (Parameters *params : GetParameters()) {
@@ -117,8 +108,8 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
       }
     }
     unsigned action = best_a;
-    // If we have reference actions (for training), use the reference action.
-    if (build_training_graph) {
+    // If we're training, use the reference action.
+    if (training) {
       assert(action_count < correct_actions.size());
       action = correct_actions[action_count];
       if (correct && best_a == action) {
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index fd2f4a4..3e91d03 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -26,7 +26,12 @@ class NeuralTransitionTagger {
   std::vector<unsigned> LogProbTagger(
       cnn::ComputationGraph *cg, const Sentence& sentence,
       bool replace_unknowns = true,
-      cnn::expr::Expression* final_parser_state = nullptr);
+      cnn::expr::Expression* final_parser_state = nullptr) {
+    return LogProbTagger(
+        cg, sentence,
+        replace_unknowns ? ReplaceUnknowns(sentence) : sentence.words,
+        false, std::vector<unsigned>(), nullptr, final_parser_state);
+  }
 
   // *** if correct_actions is empty, this runs greedy decoding ***
   // returns actions for input sentence (in training just returns the reference)
@@ -38,6 +43,7 @@ class NeuralTransitionTagger {
       cnn::ComputationGraph* cg,
       const Sentence& sentence, // raw sentence
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
+      bool training = false,
       const std::vector<unsigned>& correct_actions = std::vector<unsigned>(),
       double* correct = nullptr,
       cnn::expr::Expression* final_parser_state = nullptr);

From c97a0f4b7287d58efabae580a0f10775ab2386d7 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 21 Mar 2017 00:07:26 -0400
Subject: [PATCH 67/88] *Do* still update the correct count if applicable, even
 in dev

---
 parser/neural-transition-tagger.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 487c078..e75fea9 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -72,6 +72,8 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     bool training,
     const vector<unsigned>& correct_actions, double* correct,
     Expression* final_parser_state) {
+  if (training)
+    assert(!correct_actions.empty());
   assert(finalized);
   vector<unsigned> results;
 
@@ -108,13 +110,16 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
       }
     }
     unsigned action = best_a;
-    // If we're training, use the reference action.
-    if (training) {
+
+    if (!correct_actions.empty()) {
       assert(action_count < correct_actions.size());
-      action = correct_actions[action_count];
-      if (correct && best_a == action) {
+      unsigned correct_action = correct_actions[action_count];
+      if (correct && best_a == correct_action) {
         (*correct)++;
       }
+      // If we're training, use the reference action.
+      if (training)
+        action = correct_action;
     }
     ++action_count;
     log_probs.push_back(pick(adiste, action));

From 0e065e035c87ac9ab0afe76f21a3d6fb6f5e70c8 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 21 Mar 2017 14:07:29 -0400
Subject: [PATCH 68/88] Can now shrink a memory pool back down w/o clearing
 entirely

---
 cnn/cnn/aligned-mem-pool.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cnn/cnn/aligned-mem-pool.h b/cnn/cnn/aligned-mem-pool.h
index 9a087b0..fa616f0 100644
--- a/cnn/cnn/aligned-mem-pool.h
+++ b/cnn/cnn/aligned-mem-pool.h
@@ -8,6 +8,8 @@ namespace cnn {
 
 class AlignedMemoryPool {
  public:
+  typedef size_t PoolState;
+
   explicit AlignedMemoryPool(size_t cap, MemAllocator* a) : a(a) {
     sys_alloc(cap);
     zero_all();
@@ -36,6 +38,14 @@ class AlignedMemoryPool {
   bool is_shared() {
     return shared;
   }
+
+  PoolState get_state() const {
+    return used;
+  }
+
+  void restore_state(const PoolState& state) {
+    used = state;
+  }
  private:
   void sys_alloc(size_t cap) {
     capacity = a->round_up_align(cap);

From 3662e8ee504ed5e87ad0042d9a015416933390ea Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 19 Apr 2017 23:54:56 -0400
Subject: [PATCH 69/88] Attempted to get GPU compilation working

---
 CMakeLists.txt         |  6 +++---
 cnn/CMakeLists.txt     | 22 ++++++++++++++--------
 cnn/cnn/CMakeLists.txt | 14 ++++++++++----
 parser/CMakeLists.txt  | 19 +++++++++++++++----
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a4e62ba..0ca9f46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ if(NOT CMAKE_BUILD_TYPE)
 endif(NOT CMAKE_BUILD_TYPE)
 
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++14")
 
 enable_testing()
 
@@ -30,6 +30,6 @@ include_directories(${EIGEN3_INCLUDE_DIR})
 
 #configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
 
-add_subdirectory(cnn/cnn)
+add_subdirectory(cnn)
 # add_subdirectory(cnn/examples)
-add_subdirectory(parser)
+add_subdirectory(parser)
\ No newline at end of file
diff --git a/cnn/CMakeLists.txt b/cnn/CMakeLists.txt
index 58173ae..17fc1ec 100644
--- a/cnn/CMakeLists.txt
+++ b/cnn/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 
 if(NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
     set(CMAKE_BUILD_TYPE RelWithDebInfo)
-endif(NOT CMAKE_BUILD_TYPE)
+endif(NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
 
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 
@@ -14,7 +14,7 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 #   3. try compiler options like -march=native or other architecture
 #      flags (the compiler does not always make the best configuration
 #      decisions without help)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++11 -Ofast -g -DEIGEN_FAST_MATH -march=native")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -std=c++14 -Ofast -g -DEIGEN_FAST_MATH -march=native")
 
 enable_testing()
 
@@ -67,9 +67,11 @@ else()
 endif()
 
 if(BACKEND MATCHES "^eigen$")
-  set(WITH_EIGEN_BACKEND 1)
+  set(WITH_CUDA_BACKEND 0 CACHE INTERNAL "" FORCE)
+  set(WITH_EIGEN_BACKEND 1 CACHE INTERNAL "" FORCE)
 elseif(BACKEND MATCHES "^cuda$")
-  set(WITH_CUDA_BACKEND 1)
+  set(WITH_CUDA_BACKEND 1 CACHE INTERNAL "" FORCE)
+  set(WITH_EIGEN_BACKEND 0 CACHE INTERNAL "" FORCE)
 else()
   message(SEND_ERROR "BACKEND must be eigen or cuda")
 endif()
@@ -97,8 +99,12 @@ set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
+option(CNN_CORE_ONLY "If off, won't build extra dirs like tests and examples" ON)
+
 add_subdirectory(cnn)
-add_subdirectory(tests)
-add_subdirectory(examples)
-add_subdirectory(rnnlm)
-enable_testing()
+if(NOT CNN_CORE_ONLY)
+    add_subdirectory(tests)
+    add_subdirectory(examples)
+    add_subdirectory(rnnlm)
+    enable_testing()
+endif(NOT CNN_CORE_ONLY)
\ No newline at end of file
diff --git a/cnn/cnn/CMakeLists.txt b/cnn/cnn/CMakeLists.txt
index bfa85d0..6f66321 100644
--- a/cnn/cnn/CMakeLists.txt
+++ b/cnn/cnn/CMakeLists.txt
@@ -69,6 +69,8 @@ set(cnn_library_HDRS
     training.h
 )
 
+option(CNN_SHARED "Whether to build CNN shared libs" OFF)
+
 if(WITH_CUDA_BACKEND)
   list(APPEND cnn_library_SRCS
        cuda.cc)
@@ -99,20 +101,24 @@ file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc)
 # actual target:
 add_library(cnn STATIC ${cnn_library_SRCS} ${cnn_library_HDRS})
 target_link_libraries(cnn ${LIBS})
-if(WITH_CUDA_BACKEND)
+if(CNN_SHARED)
+  if(WITH_CUDA_BACKEND)
 	add_library(gcnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS})
 	target_link_libraries(gcnn_shared ${LIBS})
-else()
+  else()
 	add_library(cnn_shared SHARED ${cnn_library_SRCS} ${cnn_library_HDRS})
 	target_link_libraries(cnn_shared ${LIBS})
-endif(WITH_CUDA_BACKEND)
+  endif(WITH_CUDA_BACKEND)
+endif(CNN_SHARED)
 #add_library(cnn ${cnn_library_SRCS} ${cnn_library_HDRS} ${LIBS})
 if(WITH_CUDA_BACKEND)
   set(CUDA_SEPARABLE_COMPILATION ON)
   list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_20,code=sm_20;-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-O2;-DVERBOSE;-Xcompiler;-fpic")
   SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
   cuda_add_library(cnncuda STATIC gpu-ops.cu)
-  cuda_add_library(cnncuda_shared SHARED gpu-ops.cu)
+  if(CNN_SHARED)
+    cuda_add_library(cnncuda_shared SHARED gpu-ops.cu)
+  endif(CNN_SHARED)
 endif(WITH_CUDA_BACKEND)
 
 install(FILES ${cnn_library_HDRS} DESTINATION include/cnn)
diff --git a/parser/CMakeLists.txt b/parser/CMakeLists.txt
index 0077cab..80fee68 100644
--- a/parser/CMakeLists.txt
+++ b/parser/CMakeLists.txt
@@ -1,8 +1,19 @@
 PROJECT(lstm-parser:parser)
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
-ADD_LIBRARY(lstm-parser-core lstm-parser.cc corpus.cc neural-transition-tagger.cpp)
-target_link_libraries(lstm-parser-core cnn ${Boost_LIBRARIES})
-
+add_library(lstm-parser-core STATIC lstm-parser.cc corpus.cc 
+            neural-transition-tagger.cpp)
 ADD_EXECUTABLE(lstm-parse lstm-parser-driver.cc)
-target_link_libraries(lstm-parse lstm-parser-core ${Boost_LIBRARIES})
+
+if(WITH_CUDA_BACKEND)
+  add_dependencies(lstm-parser-core cnncuda)
+  target_link_libraries(lstm-parser-core cnncuda)
+  CUDA_ADD_CUBLAS_TO_TARGET(lstm-parser-core)
+
+  add_dependencies(lstm-parse cnncuda)
+  target_link_libraries(lstm-parse cnncuda)
+  CUDA_ADD_CUBLAS_TO_TARGET(lstm-parse)
+endif(WITH_CUDA_BACKEND)
+
+target_link_libraries(lstm-parser-core cnn ${Boost_LIBRARIES})
+target_link_libraries(lstm-parse lstm-parser-core ${Boost_LIBRARIES})
\ No newline at end of file

From ae6f7c9777c5f53d13e2788d8ee0a5614eb719d2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 20 Apr 2017 22:13:24 -0400
Subject: [PATCH 70/88] Unused final parser state -> expose arbitrary network
 states

---
 parser/lstm-parser.cc               | 15 +++++++++++++--
 parser/lstm-parser.h                |  8 +++++---
 parser/neural-transition-tagger.cpp | 12 +++++-------
 parser/neural-transition-tagger.h   | 14 ++++++++------
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 095ac04..61f9705 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -236,7 +236,8 @@ Expression LSTMParser::GetActionProbabilities(const TaggerState& state) {
 
 
 void LSTMParser::DoAction(unsigned action, TaggerState* state,
-                          ComputationGraph* cg) {
+                          ComputationGraph* cg,
+                          vector<Expression>* states_to_expose) {
   ParserState* real_state = static_cast<ParserState*>(state);
   // add current action to action LSTM
   Expression action_e = lookup(*cg, p_a, action);
@@ -309,6 +310,11 @@ void LSTMParser::DoAction(unsigned action, TaggerState* state,
     real_state->stack.push_back(nlcomposed);
     real_state->stacki.push_back(headi);
   }
+
+  // After the last action, record the final tree state, if requested.
+  if (states_to_expose && ShouldTerminate(*real_state)) {
+    (*states_to_expose).back() = real_state->stack.back();
+  }
 }
 
 
@@ -316,7 +322,8 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
     ComputationGraph* cg,
     const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const vector<unsigned>& correct_actions) {
+    const vector<unsigned>& correct_actions,
+    vector<Expression>* states_to_expose) {
   stack_lstm.new_graph(*cg);
   buffer_lstm.new_graph(*cg);
   action_lstm.new_graph(*cg);
@@ -362,6 +369,10 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
   for (auto& b : state->buffer)
     buffer_lstm.add_input(b);
 
+  if (states_to_expose) {
+    states_to_expose->resize(1);
+  }
+
   return state;
 }
 
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 684f0b0..2ff5bba 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -257,7 +257,8 @@ class LSTMParser : public NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* cg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions) override;
+      const std::vector<unsigned>& correct_actions,
+      std::vector<cnn::expr::Expression>* states_to_expose) override;
 
   virtual void InitializeNetworkParameters() override;
 
@@ -272,8 +273,9 @@ class LSTMParser : public NeuralTransitionTagger {
   virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
       override;
 
-  virtual void DoAction(unsigned action, TaggerState* state,
-                        cnn::ComputationGraph* cg) override;
+  virtual void DoAction(
+      unsigned action, TaggerState* state, cnn::ComputationGraph* cg,
+      std::vector<cnn::expr::Expression>* states_to_expose) override;
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index e75fea9..37b09ba 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -71,7 +71,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
     bool training,
     const vector<unsigned>& correct_actions, double* correct,
-    Expression* final_parser_state) {
+    vector<Expression>* states_to_expose) {
   if (training)
     assert(!correct_actions.empty());
   assert(finalized);
@@ -82,8 +82,9 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     param_expressions[params] = parameter(*cg, params);
   }
 
-  unique_ptr<TaggerState> state(InitializeParserState(cg, raw_sent, sent,
-                                                      correct_actions));
+  unique_ptr<TaggerState> state(
+      InitializeParserState(cg, raw_sent, sent, correct_actions,
+                            states_to_expose));
 
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
@@ -125,15 +126,12 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     log_probs.push_back(pick(adiste, action));
     results.push_back(action);
 
-    DoAction(action, state.get(), cg);
+    DoAction(action, state.get(), cg, states_to_expose);
   }
 
   Expression tot_neglogprob = -sum(log_probs);
   assert(tot_neglogprob.pg != nullptr);
 
-  if (final_parser_state) {
-    *final_parser_state = p_t;
-  }
   param_expressions.clear();
   return results;
 }
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 3e91d03..4e814db 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -26,11 +26,11 @@ class NeuralTransitionTagger {
   std::vector<unsigned> LogProbTagger(
       cnn::ComputationGraph *cg, const Sentence& sentence,
       bool replace_unknowns = true,
-      cnn::expr::Expression* final_parser_state = nullptr) {
+      std::vector<cnn::expr::Expression>* states_to_expose = nullptr) {
     return LogProbTagger(
         cg, sentence,
         replace_unknowns ? ReplaceUnknowns(sentence) : sentence.words,
-        false, std::vector<unsigned>(), nullptr, final_parser_state);
+        false, std::vector<unsigned>(), nullptr, states_to_expose);
   }
 
   // *** if correct_actions is empty, this runs greedy decoding ***
@@ -46,7 +46,7 @@ class NeuralTransitionTagger {
       bool training = false,
       const std::vector<unsigned>& correct_actions = std::vector<unsigned>(),
       double* correct = nullptr,
-      cnn::expr::Expression* final_parser_state = nullptr);
+      std::vector<cnn::expr::Expression>* states_to_expose = nullptr);
 
   const CorpusVocabulary& GetVocab() const { return vocab; }
 
@@ -79,7 +79,8 @@ class NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* hg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions) = 0;
+      const std::vector<unsigned>& correct_actions,
+      std::vector<cnn::expr::Expression>* states_to_expose) = 0;
 
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
@@ -89,8 +90,9 @@ class NeuralTransitionTagger {
   virtual bool IsActionForbidden(const unsigned action,
                                  const TaggerState& state) const = 0;
 
-  virtual void DoAction(unsigned action, TaggerState* state,
-                        cnn::ComputationGraph* cg) = 0;
+  virtual void DoAction(
+      unsigned action, TaggerState* state, cnn::ComputationGraph* cg,
+      std::vector<cnn::expr::Expression>* states_to_expose) = 0;
 
   virtual void DoSave(eos::portable_oarchive& archive) = 0;
 

From 43dd29a7d9aa4062742d73867b4367822f740f0d Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 20 Apr 2017 23:32:28 -0400
Subject: [PATCH 71/88] Switched to map for states to expose

---
 parser/lstm-parser.cc               | 11 +++--------
 parser/lstm-parser.h                |  5 ++---
 parser/neural-transition-tagger.cpp |  5 ++---
 parser/neural-transition-tagger.h   | 13 +++++++------
 4 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 61f9705..0f90e75 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -237,7 +237,7 @@ Expression LSTMParser::GetActionProbabilities(const TaggerState& state) {
 
 void LSTMParser::DoAction(unsigned action, TaggerState* state,
                           ComputationGraph* cg,
-                          vector<Expression>* states_to_expose) {
+                          map<string, Expression>* states_to_expose) {
   ParserState* real_state = static_cast<ParserState*>(state);
   // add current action to action LSTM
   Expression action_e = lookup(*cg, p_a, action);
@@ -313,7 +313,7 @@ void LSTMParser::DoAction(unsigned action, TaggerState* state,
 
   // After the last action, record the final tree state, if requested.
   if (states_to_expose && ShouldTerminate(*real_state)) {
-    (*states_to_expose).back() = real_state->stack.back();
+    (*states_to_expose)["Tree"] = real_state->stack.back();
   }
 }
 
@@ -322,8 +322,7 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
     ComputationGraph* cg,
     const Sentence& raw_sent,
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-    const vector<unsigned>& correct_actions,
-    vector<Expression>* states_to_expose) {
+    const vector<unsigned>& correct_actions) {
   stack_lstm.new_graph(*cg);
   buffer_lstm.new_graph(*cg);
   action_lstm.new_graph(*cg);
@@ -369,10 +368,6 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
   for (auto& b : state->buffer)
     buffer_lstm.add_input(b);
 
-  if (states_to_expose) {
-    states_to_expose->resize(1);
-  }
-
   return state;
 }
 
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 2ff5bba..90338e4 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -257,8 +257,7 @@ class LSTMParser : public NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* cg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      std::vector<cnn::expr::Expression>* states_to_expose) override;
+      const std::vector<unsigned>& correct_actions) override;
 
   virtual void InitializeNetworkParameters() override;
 
@@ -275,7 +274,7 @@ class LSTMParser : public NeuralTransitionTagger {
 
   virtual void DoAction(
       unsigned action, TaggerState* state, cnn::ComputationGraph* cg,
-      std::vector<cnn::expr::Expression>* states_to_expose) override;
+      std::map<std::string, cnn::expr::Expression>* states_to_expose) override;
 
   inline unsigned ComputeCorrect(const ParseTree& ref,
                                  const ParseTree& hyp) const {
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 37b09ba..83336d4 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -71,7 +71,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
     bool training,
     const vector<unsigned>& correct_actions, double* correct,
-    vector<Expression>* states_to_expose) {
+    map<string, Expression>* states_to_expose) {
   if (training)
     assert(!correct_actions.empty());
   assert(finalized);
@@ -83,8 +83,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
   }
 
   unique_ptr<TaggerState> state(
-      InitializeParserState(cg, raw_sent, sent, correct_actions,
-                            states_to_expose));
+      InitializeParserState(cg, raw_sent, sent, correct_actions));
 
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 4e814db..553f6c1 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -24,9 +24,11 @@ class NeuralTransitionTagger {
 
   // Used for testing. Replaces OOV with UNK.
   std::vector<unsigned> LogProbTagger(
-      cnn::ComputationGraph *cg, const Sentence& sentence,
+      cnn::ComputationGraph *cg,
+      const Sentence& sentence,
       bool replace_unknowns = true,
-      std::vector<cnn::expr::Expression>* states_to_expose = nullptr) {
+      std::map<std::string, cnn::expr::Expression>* states_to_expose =
+          nullptr) {
     return LogProbTagger(
         cg, sentence,
         replace_unknowns ? ReplaceUnknowns(sentence) : sentence.words,
@@ -46,7 +48,7 @@ class NeuralTransitionTagger {
       bool training = false,
       const std::vector<unsigned>& correct_actions = std::vector<unsigned>(),
       double* correct = nullptr,
-      std::vector<cnn::expr::Expression>* states_to_expose = nullptr);
+      std::map<std::string, cnn::expr::Expression>* states_to_expose = nullptr);
 
   const CorpusVocabulary& GetVocab() const { return vocab; }
 
@@ -79,8 +81,7 @@ class NeuralTransitionTagger {
   virtual TaggerState* InitializeParserState(
       cnn::ComputationGraph* hg, const Sentence& raw_sent,
       const Sentence::SentenceMap& sent,  // sentence with OOVs replaced
-      const std::vector<unsigned>& correct_actions,
-      std::vector<cnn::expr::Expression>* states_to_expose) = 0;
+      const std::vector<unsigned>& correct_actions) = 0;
 
   virtual cnn::expr::Expression GetActionProbabilities(
       const TaggerState& state) = 0;
@@ -92,7 +93,7 @@ class NeuralTransitionTagger {
 
   virtual void DoAction(
       unsigned action, TaggerState* state, cnn::ComputationGraph* cg,
-      std::vector<cnn::expr::Expression>* states_to_expose) = 0;
+      std::map<std::string, cnn::expr::Expression>* states_to_expose) = 0;
 
   virtual void DoSave(eos::portable_oarchive& archive) = 0;
 

From 7a02eb52bb3d79dbfdefaa8a49a32bd4c1ee5ef3 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 20 Apr 2017 23:32:39 -0400
Subject: [PATCH 72/88] Code formatting

---
 parser/lstm-parser.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 0f90e75..0cdae71 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -300,9 +300,11 @@ void LSTMParser::DoAction(unsigned action, TaggerState* state,
     real_state->stack.pop_back();
     real_state->stacki.pop_back();
     // composed = cbias + H * head + D * dep + R * relation
-    Expression composed = affine_transform({GetParamExpr(p_cbias),
-        GetParamExpr(p_H), head, GetParamExpr(p_D), dep, GetParamExpr(p_R),
-        relation});
+    Expression composed = affine_transform(
+        {GetParamExpr(p_cbias),
+         GetParamExpr(p_H), head,
+         GetParamExpr(p_D), dep,
+         GetParamExpr(p_R), relation});
     Expression nlcomposed = tanh(composed);
     stack_lstm.rewind_one_step();
     stack_lstm.rewind_one_step();
@@ -358,7 +360,8 @@ NeuralTransitionTagger::TaggerState* LSTMParser::InitializeParserState(
       args.push_back(GetParamExpr(p_t2l));
       args.push_back(t);
     }
-    state->buffer[sent.size() - added_to_buffer] = rectify(affine_transform(args));
+    state->buffer[sent.size() - added_to_buffer] = rectify(
+        affine_transform(args));
     state->bufferi[sent.size() - added_to_buffer] = token_index;
     added_to_buffer++;
   }

From 9124b54b8abadbf68f354e76a90ddfe1b6d31747 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Mon, 24 Apr 2017 18:18:35 -0400
Subject: [PATCH 73/88] Expose tree node embeddings

---
 parser/lstm-parser.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 0cdae71..9b32cbd 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -311,6 +311,11 @@ void LSTMParser::DoAction(unsigned action, TaggerState* state,
     stack_lstm.add_input(nlcomposed);
     real_state->stack.push_back(nlcomposed);
     real_state->stacki.push_back(headi);
+    if (states_to_expose) {
+      // Once something is attached as a dependent, it will never again be
+      // modified, so cache its expression.
+      (*states_to_expose)[to_string(depi)] = dep;
+    }
   }
 
   // After the last action, record the final tree state, if requested.

From 93e570833ab7551a76bd7320dc3be5417ba6e80e Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Wed, 26 Apr 2017 16:09:08 -0400
Subject: [PATCH 74/88] Exposed whether tagger is in training to subclass
 functions

---
 parser/neural-transition-tagger.cpp | 1 +
 parser/neural-transition-tagger.h   | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 83336d4..dcac0f3 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -72,6 +72,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     bool training,
     const vector<unsigned>& correct_actions, double* correct,
     map<string, Expression>* states_to_expose) {
+  in_training = training;
   if (training)
     assert(!correct_actions.empty());
   assert(finalized);
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 553f6c1..502073f 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -17,7 +17,8 @@ namespace lstm_parser {
 
 class NeuralTransitionTagger {
 public:
-  NeuralTransitionTagger() : finalized(false), model(new cnn::Model) {}
+  NeuralTransitionTagger() : finalized(false), in_training(false),
+                             model(new cnn::Model) {}
   virtual ~NeuralTransitionTagger() {}
 
   void FinalizeVocab();
@@ -66,6 +67,7 @@ class NeuralTransitionTagger {
   };
 
   bool finalized;
+  bool in_training;  // expose to virtual fns whether we're doing training
   std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;
 
   // Store the model as a smart ptr so we can call its destructor when needed.

From bfcea60ef350d19c06d67b949e89993efb9fa457 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Thu, 27 Apr 2017 13:44:25 -0400
Subject: [PATCH 75/88] Minor formatting

---
 parser/lstm-parser.h                | 2 +-
 parser/neural-transition-tagger.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index 90338e4..ba72f7e 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -323,7 +323,7 @@ class LSTMParser : public NeuralTransitionTagger {
     action_lstm = cnn::LSTMBuilder(options.layers, options.action_dim,
                                    options.hidden_dim, model.get());
 
-    FinalizeVocab(); // OK, now finalize. :)
+    FinalizeVocab(); // OK, now finalize. :) (Also initializes network params.)
 
     ar & *model;
   }
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index dcac0f3..7e8c30a 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -51,6 +51,7 @@ void NeuralTransitionTagger::FinalizeVocab() {
   finalized = true;
 }
 
+
 Sentence::SentenceMap NeuralTransitionTagger::ReplaceUnknowns(
     const Sentence& sentence) {
   Sentence::SentenceMap tsentence(sentence.words);  // sentence w/ OOVs replaced
@@ -137,5 +138,4 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
 }
 
 
-
 } /* namespace lstm_parser */

From 5619f815210e36753ba85a9416e937e01fa95254 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 2 May 2017 13:03:24 -0400
Subject: [PATCH 76/88] Added optional ParseTree pointer to Sentence

---
 parser/corpus.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index c5a9d0a..e3ec3fd 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -191,17 +191,20 @@ class ConllUCorpusReader : public CorpusReader {
 class Sentence;
 inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence);
 
+class ParseTree;  // forward declaration
+
 class Sentence {
 public:
   typedef std::map<unsigned, unsigned> SentenceMap;
   typedef std::map<unsigned, std::string> SentenceUnkMap;
 
-  Sentence(const CorpusVocabulary& vocab) : vocab(vocab) {}
+  Sentence(const CorpusVocabulary& vocab) : vocab(vocab), tree(nullptr) {}
 
   SentenceMap words;
   SentenceMap poses;
   SentenceUnkMap unk_surface_forms;
   const CorpusVocabulary& vocab;
+  ParseTree* tree;
 
   size_t Size() const {
     return words.size();

From d7902d911d6d980ef7447d21dc52353f6edd17a5 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 2 May 2017 22:45:52 -0400
Subject: [PATCH 77/88] Fixed assertion bug

---
 parser/neural-transition-tagger.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 7e8c30a..afa9929 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -114,7 +114,7 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     unsigned action = best_a;
 
     if (!correct_actions.empty()) {
-      assert(action_count < correct_actions.size());
+      assert(action_count < correct_actions.size() || !training);
       unsigned correct_action = correct_actions[action_count];
       if (correct && best_a == correct_action) {
         (*correct)++;

From 5653ab730f28c2f8167fa4b4b505f04bf846471f Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 20 May 2017 22:33:18 -0400
Subject: [PATCH 78/88] Added num_values() to CNN LookupParameters

---
 cnn/cnn/model.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cnn/cnn/model.h b/cnn/cnn/model.h
index ddb6258..b27dec9 100644
--- a/cnn/cnn/model.h
+++ b/cnn/cnn/model.h
@@ -61,6 +61,7 @@ struct LookupParameters : public ParametersBase {
   void squared_l2norm(float* sqnorm) const override;
   void g_squared_l2norm(float* sqnorm) const override;
   size_t size() const override;
+  size_t num_values() const { return values.size(); }
   void Initialize(unsigned index, const std::vector<float>& val);
 
   void copy(const LookupParameters & val);

From 372cdea496b64e7f232ead5c3401a7ddae59b4c0 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 23 May 2017 14:22:55 -0400
Subject: [PATCH 79/88] Made all TaggerStates modifiable by overriding subclass
 member fns

---
 parser/lstm-parser.cc               | 8 ++++----
 parser/lstm-parser.h                | 8 ++++----
 parser/neural-transition-tagger.cpp | 6 +++---
 parser/neural-transition-tagger.h   | 6 +++---
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/parser/lstm-parser.cc b/parser/lstm-parser.cc
index 9b32cbd..e33ed09 100644
--- a/parser/lstm-parser.cc
+++ b/parser/lstm-parser.cc
@@ -137,9 +137,9 @@ LSTMParser::LSTMParser(const ParserOptions& poptions,
 
 
 bool LSTMParser::IsActionForbidden(const unsigned action,
-                                   const TaggerState& state) const {
+                                   TaggerState* state) const {
   const string& action_name = vocab.action_names[action];
-  const ParserState& real_state = static_cast<const ParserState&>(state);
+  const ParserState& real_state = static_cast<const ParserState&>(*state);
   unsigned ssize = real_state.stack.size();
   unsigned bsize = real_state.buffer.size();
 
@@ -221,7 +221,7 @@ ParseTree LSTMParser::RecoverParseTree(
 }
 
 
-Expression LSTMParser::GetActionProbabilities(const TaggerState& state) {
+Expression LSTMParser::GetActionProbabilities(TaggerState* state) {
   // p_t = pbias + S * slstm + B * blstm + A * alstm
   Expression p_t = affine_transform(
       {GetParamExpr(p_pbias), GetParamExpr(p_S), stack_lstm.back(),
@@ -319,7 +319,7 @@ void LSTMParser::DoAction(unsigned action, TaggerState* state,
   }
 
   // After the last action, record the final tree state, if requested.
-  if (states_to_expose && ShouldTerminate(*real_state)) {
+  if (states_to_expose && ShouldTerminate(real_state)) {
     (*states_to_expose)["Tree"] = real_state->stack.back();
   }
 }
diff --git a/parser/lstm-parser.h b/parser/lstm-parser.h
index ba72f7e..e6a7e09 100644
--- a/parser/lstm-parser.h
+++ b/parser/lstm-parser.h
@@ -261,15 +261,15 @@ class LSTMParser : public NeuralTransitionTagger {
 
   virtual void InitializeNetworkParameters() override;
 
-  virtual bool ShouldTerminate(const TaggerState& state) const override {
-    const ParserState& real_state = static_cast<const ParserState&>(state);
+  virtual bool ShouldTerminate(TaggerState* state) const override {
+    const ParserState& real_state = static_cast<const ParserState&>(*state);
     return real_state.stack.size() <= 2 && real_state.buffer.size() <= 1;
   }
 
   virtual bool IsActionForbidden(const unsigned action,
-                                 const TaggerState& state) const override;
+                                 TaggerState* state) const override;
 
-  virtual cnn::expr::Expression GetActionProbabilities(const TaggerState& state)
+  virtual cnn::expr::Expression GetActionProbabilities(TaggerState* state)
       override;
 
   virtual void DoAction(
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index afa9929..60f1557 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -90,16 +90,16 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
   Expression p_t; // declared outside to allow access later
-  while (!ShouldTerminate(*state)) {
+  while (!ShouldTerminate(state.get())) {
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;
     for (unsigned action = 0; action < vocab.action_names.size(); ++action) {
-      if (IsActionForbidden(action, *state))
+      if (IsActionForbidden(action, state.get()))
         continue;
       current_valid_actions.push_back(action);
     }
 
-    Expression r_t = GetActionProbabilities(*state);
+    Expression r_t = GetActionProbabilities(state.get());
     // adist = log_softmax(r_t, current_valid_actions)
     Expression adiste = log_softmax(r_t, current_valid_actions);
     vector<float> adist = as_vector(cg->incremental_forward());
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 502073f..7d4d375 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -86,12 +86,12 @@ class NeuralTransitionTagger {
       const std::vector<unsigned>& correct_actions) = 0;
 
   virtual cnn::expr::Expression GetActionProbabilities(
-      const TaggerState& state) = 0;
+      TaggerState* state) = 0;
 
-  virtual bool ShouldTerminate(const TaggerState& state) const = 0;
+  virtual bool ShouldTerminate(TaggerState* state) const = 0;
 
   virtual bool IsActionForbidden(const unsigned action,
-                                 const TaggerState& state) const = 0;
+                                 TaggerState* state) const = 0;
 
   virtual void DoAction(
       unsigned action, TaggerState* state, cnn::ComputationGraph* cg,

From c8d7685903b9a45193d8781eddc177db239fa39a Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 4 Jun 2017 01:55:53 -0400
Subject: [PATCH 80/88] Made Sentences swappable (also more efficient GetWord
 default)

---
 parser/corpus.h                     | 21 ++++++++++++++-------
 parser/neural-transition-tagger.cpp |  2 +-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index e3ec3fd..075810d 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -54,7 +54,7 @@ class CorpusVocabulary {
   inline unsigned GetWord(const std::string& word) const {
     auto word_iter = words_to_int.find(word);
     if (word_iter == words_to_int.end()) {
-      return words_to_int.find(CorpusVocabulary::UNK)->second;
+      return kUNK;
     } else {
       return word_iter->second;
     }
@@ -198,12 +198,12 @@ class Sentence {
   typedef std::map<unsigned, unsigned> SentenceMap;
   typedef std::map<unsigned, std::string> SentenceUnkMap;
 
-  Sentence(const CorpusVocabulary& vocab) : vocab(vocab), tree(nullptr) {}
+  Sentence(const CorpusVocabulary& vocab) : vocab(&vocab), tree(nullptr) {}
 
   SentenceMap words;
   SentenceMap poses;
   SentenceUnkMap unk_surface_forms;
-  const CorpusVocabulary& vocab;
+  const CorpusVocabulary* vocab;
   ParseTree* tree;
 
   size_t Size() const {
@@ -212,8 +212,8 @@ class Sentence {
 
   const std::string& WordForToken(unsigned token_id) const {
     unsigned word_id = words.at(token_id);
-    return word_id == vocab.kUNK ? unk_surface_forms.at(token_id)
-                                 : vocab.int_to_words[word_id];
+    return word_id == vocab->kUNK ? unk_surface_forms.at(token_id)
+                                  : vocab->int_to_words[word_id];
   }
 };
 
@@ -224,8 +224,8 @@ inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence) {
     unsigned pos_id = sentence.poses.at(index);
     auto unk_iter = sentence.unk_surface_forms.find(index);
     os << (unk_iter == sentence.unk_surface_forms.end() ?
-            sentence.vocab.int_to_words.at(word_id) : unk_iter->second)
-       << '/' << sentence.vocab.int_to_pos.at(pos_id);
+            sentence.vocab->int_to_words.at(word_id) : unk_iter->second)
+       << '/' << sentence.vocab->int_to_pos.at(pos_id);
     if (index != sentence.words.rend()->first) {
       os << ' ';
     }
@@ -351,4 +351,11 @@ class ParserTrainingCorpus : public TrainingCorpus {
 
 } // namespace lstm_parser
 
+
+inline void swap(lstm_parser::Sentence& s1, lstm_parser::Sentence& s2) {
+  lstm_parser::Sentence tmp = std::move(s1);
+  s2 = std::move(s1);
+  s1 = std::move(tmp);
+}
+
 #endif
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 60f1557..016a6a3 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -23,7 +23,7 @@ void NeuralTransitionTagger::SaveModel(const string& model_fname,
   cerr << "Model saved." << endl;
   // Create a soft link to the most recent model in order to make it
   // easier to refer to it in a shell script.
-  if (!softlink_created) {
+  if (false) {
     string softlink = "latest_model.params";
 
     if (system((string("rm -f ") + softlink).c_str()) == 0

From 8b6d31561e99c9a9eb8ace3601947244355eb5c9 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 4 Jun 2017 15:33:06 -0400
Subject: [PATCH 81/88] Fixed Sentence swap function

---
 parser/corpus.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 075810d..2a6c1a9 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -354,8 +354,8 @@ class ParserTrainingCorpus : public TrainingCorpus {
 
 inline void swap(lstm_parser::Sentence& s1, lstm_parser::Sentence& s2) {
   lstm_parser::Sentence tmp = std::move(s1);
-  s2 = std::move(s1);
-  s1 = std::move(tmp);
+  s1 = std::move(s2);
+  s2 = std::move(tmp);
 }
 
 #endif

From 7d0746440eba38d478675425c35ca820a48399c2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 16 Sep 2017 22:00:27 -0400
Subject: [PATCH 82/88] Updated test command line in README to latest flags

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 369808e..f0704ea 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ There is a pretrained model for English [here](http://www.cs.cmu.edu/~jdunietz/h
 
 Given a `test.conll` file formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat):
 
-    parser/lstm-parse -m english_pos_2_32_100_20_100_12_20.params -t test.conll
+    parser/lstm-parse -m english_pos_2_32_100_20_100_12_20.params -T test.conll -s
 
 If you are not using the pretrained model, you will need to replace the `.params` argument with the name of your own trained model file.
 

From 741dad6807663ea9141a3041ba12f505ed8281ea Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 3 Oct 2017 00:34:58 -0400
Subject: [PATCH 83/88] Deleted dead variable declaration

---
 parser/neural-transition-tagger.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 016a6a3..f1a9739 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -89,7 +89,6 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
 
   vector<Expression> log_probs;
   unsigned action_count = 0;  // incremented at each prediction
-  Expression p_t; // declared outside to allow access later
   while (!ShouldTerminate(state.get())) {
     // Get list of possible actions for the current parser state.
     vector<unsigned> current_valid_actions;

From a4670beb8162fda00815f27efedd7ce2b052bed2 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Tue, 3 Oct 2017 12:22:15 -0400
Subject: [PATCH 84/88] Enabled GetActionProbabilities to signal that oracle
 action should be used

---
 parser/neural-transition-tagger.cpp | 54 +++++++++++++++++------------
 parser/neural-transition-tagger.h   |  4 +++
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index f1a9739..6c4ddfc 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -14,9 +14,12 @@ using namespace cnn::expr;
 
 namespace lstm_parser {
 
+const cnn::expr::Expression NeuralTransitionTagger::USE_ORACLE(
+    nullptr, cnn::VariableIndex(static_cast<unsigned>(-1)));
+
 
 void NeuralTransitionTagger::SaveModel(const string& model_fname,
-                                     bool softlink_created) {
+                                       bool softlink_created) {
   ofstream out_file(model_fname);
   eos::portable_oarchive archive(out_file);
   DoSave(archive);
@@ -99,31 +102,38 @@ vector<unsigned> NeuralTransitionTagger::LogProbTagger(
     }
 
     Expression r_t = GetActionProbabilities(state.get());
-    // adist = log_softmax(r_t, current_valid_actions)
-    Expression adiste = log_softmax(r_t, current_valid_actions);
-    vector<float> adist = as_vector(cg->incremental_forward());
-    double best_score = adist[current_valid_actions[0]];
-    unsigned best_a = current_valid_actions[0];
-    for (unsigned i = 1; i < current_valid_actions.size(); ++i) {
-      if (adist[current_valid_actions[i]] > best_score) {
-        best_score = adist[current_valid_actions[i]];
-        best_a = current_valid_actions[i];
+    unsigned action;
+    if (r_t.pg == USE_ORACLE.pg && r_t.i == USE_ORACLE.i) {
+      assert(!correct_actions.empty() && action_count < correct_actions.size());
+      action = correct_actions[action_count];
+      // cerr << "Using oracle action: " << vocab.action_names[action] << endl;
+    } else {
+      // adist = log_softmax(r_t, current_valid_actions)
+      Expression adiste = log_softmax(r_t, current_valid_actions);
+      vector<float> adist = as_vector(cg->incremental_forward());
+      double best_score = adist[current_valid_actions[0]];
+      unsigned best_a = current_valid_actions[0];
+      for (unsigned i = 1; i < current_valid_actions.size(); ++i) {
+        if (adist[current_valid_actions[i]] > best_score) {
+          best_score = adist[current_valid_actions[i]];
+          best_a = current_valid_actions[i];
+        }
       }
-    }
-    unsigned action = best_a;
-
-    if (!correct_actions.empty()) {
-      assert(action_count < correct_actions.size() || !training);
-      unsigned correct_action = correct_actions[action_count];
-      if (correct && best_a == correct_action) {
-        (*correct)++;
+      action = best_a;
+
+      if (!correct_actions.empty()) {
+        assert(action_count < correct_actions.size() || !training);
+        unsigned correct_action = correct_actions[action_count];
+        if (correct && best_a == correct_action) {
+          (*correct)++;
+        }
+        // If we're training, use the reference action.
+        if (training)
+          action = correct_action;
       }
-      // If we're training, use the reference action.
-      if (training)
-        action = correct_action;
+      log_probs.push_back(pick(adiste, action));
     }
     ++action_count;
-    log_probs.push_back(pick(adiste, action));
     results.push_back(action);
 
     DoAction(action, state.get(), cg, states_to_expose);
diff --git a/parser/neural-transition-tagger.h b/parser/neural-transition-tagger.h
index 7d4d375..b4afa5c 100644
--- a/parser/neural-transition-tagger.h
+++ b/parser/neural-transition-tagger.h
@@ -66,6 +66,10 @@ class NeuralTransitionTagger {
     virtual ~TaggerState() {}
   };
 
+  // Special network pseudo-node for signaling that an oracle action should
+  // be used.
+  static const cnn::expr::Expression USE_ORACLE;
+
   bool finalized;
   bool in_training;  // expose to virtual fns whether we're doing training
   std::map<cnn::Parameters*, cnn::expr::Expression> param_expressions;

From 53756b2813c2c75cb48bb07ba9acc7f0bc9a1295 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 13 Oct 2018 15:52:26 -0400
Subject: [PATCH 85/88] Allowed attaching metadata to Sentence objects

---
 parser/corpus.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/parser/corpus.h b/parser/corpus.h
index 2a6c1a9..ad5dca6 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -198,6 +198,9 @@ class Sentence {
   typedef std::map<unsigned, unsigned> SentenceMap;
   typedef std::map<unsigned, std::string> SentenceUnkMap;
 
+  // TODO: move correct_act_sent from corpus-level to here
+  struct SentenceMetadata {};
+
   Sentence(const CorpusVocabulary& vocab) : vocab(&vocab), tree(nullptr) {}
 
   SentenceMap words;
@@ -205,6 +208,7 @@ class Sentence {
   SentenceUnkMap unk_surface_forms;
   const CorpusVocabulary* vocab;
   ParseTree* tree;
+  std::unique_ptr<SentenceMetadata> metadata;
 
   size_t Size() const {
     return words.size();

From 13f4e6b70c1f49b79ad77c8e0ba70cb3ddeca771 Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sat, 13 Oct 2018 20:57:12 -0400
Subject: [PATCH 86/88] Fixed Sentence printer for non-training corpora

---
 parser/corpus.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index ad5dca6..0f1efc9 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -189,7 +189,7 @@ class ConllUCorpusReader : public CorpusReader {
 
 
 class Sentence;
-inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence);
+inline std::ostream& operator<<(std::ostream& os, const Sentence& sent);
 
 class ParseTree;  // forward declaration
 
@@ -221,16 +221,17 @@ class Sentence {
   }
 };
 
-inline std::ostream& operator<<(std::ostream& os, const Sentence& sentence) {
-  for (auto &index_and_word_id : sentence.words) {
+inline std::ostream& operator<<(std::ostream& os, const Sentence& sent) {
+  for (auto &index_and_word_id : sent.words) {
     unsigned index = index_and_word_id.first;
     unsigned word_id = index_and_word_id.second;
-    unsigned pos_id = sentence.poses.at(index);
-    auto unk_iter = sentence.unk_surface_forms.find(index);
-    os << (unk_iter == sentence.unk_surface_forms.end() ?
-            sentence.vocab->int_to_words.at(word_id) : unk_iter->second)
-       << '/' << sentence.vocab->int_to_pos.at(pos_id);
-    if (index != sentence.words.rend()->first) {
+    unsigned pos_id = sent.poses.at(index);
+    auto unk_iter = sent.unk_surface_forms.find(index);
+    os << (unk_iter == sent.unk_surface_forms.end() || unk_iter->second == ""
+            ? sent.vocab->int_to_words.at(word_id)
+            : unk_iter->second)
+       << '/' << sent.vocab->int_to_pos.at(pos_id);
+    if (index != sent.words.rend()->first) {
       os << ' ';
     }
   }

From be9091d9a6cb109750f6542e5440efcf9f71043b Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 17 Nov 2019 00:05:44 -0500
Subject: [PATCH 87/88] Improved sentence metadata handling; create dir for
 model if needed

---
 CMakeLists.txt                      | 2 +-
 parser/corpus.cc                    | 4 +++-
 parser/corpus.h                     | 3 ++-
 parser/neural-transition-tagger.cpp | 7 +++++++
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0ca9f46..aa4712a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ if(DEFINED ENV{BOOST_ROOT})
   set(Boost_NO_SYSTEM_PATHS ON)
 endif()
 set(Boost_REALPATH ON)
-find_package(Boost COMPONENTS program_options serialization iostreams regex REQUIRED)
+find_package(Boost COMPONENTS program_options serialization iostreams regex filesystem REQUIRED)
 include_directories(${Boost_INCLUDE_DIR})
 set(LIBS ${LIBS} ${Boost_LIBRARIES})
 
diff --git a/parser/corpus.cc b/parser/corpus.cc
index d9291c9..639963e 100644
--- a/parser/corpus.cc
+++ b/parser/corpus.cc
@@ -185,12 +185,14 @@ void TrainingCorpus::OracleTransitionsCorpusReader::RecordSentence(
     TrainingCorpus* corpus, Sentence::SentenceMap* words,
     Sentence::SentenceMap* sentence_pos,
     Sentence::SentenceUnkMap* sentence_unk_surface_forms,
-    vector<unsigned>* correct_actions) const {
+    vector<unsigned>* correct_actions,
+    Sentence::SentenceMetadata* metadata) const {
   // Store the sentence variables and clear them for the next sentence.
   corpus->sentences.emplace_back(*corpus->vocab);
   Sentence* sentence = &corpus->sentences.back();
   sentence->words.swap(*words);
   sentence->poses.swap(*sentence_pos);
+  sentence->metadata.reset(metadata);
   corpus->correct_act_sent.push_back({});
   corpus->correct_act_sent.back().swap(*correct_actions);
 
diff --git a/parser/corpus.h b/parser/corpus.h
index 0f1efc9..710fe2c 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -299,7 +299,8 @@ class TrainingCorpus : public Corpus {
     void RecordSentence(TrainingCorpus* corpus, Sentence::SentenceMap* words,
                         Sentence::SentenceMap* sentence_pos,
                         Sentence::SentenceUnkMap* sentence_unk_surface_forms,
-                        std::vector<unsigned>* correct_actions) const;
+                        std::vector<unsigned>* correct_actions,
+                        Sentence::SentenceMetadata* metadata = nullptr) const;
 
     static inline unsigned UTF8Len(unsigned char x) {
       if (x < 0x80) return 1;
diff --git a/parser/neural-transition-tagger.cpp b/parser/neural-transition-tagger.cpp
index 6c4ddfc..11f8ec1 100644
--- a/parser/neural-transition-tagger.cpp
+++ b/parser/neural-transition-tagger.cpp
@@ -1,5 +1,6 @@
 #include "neural-transition-tagger.h"
 
+#include <boost/filesystem.hpp>
 #include <fstream>
 #include <string>
 #include <memory>
@@ -20,6 +21,12 @@ const cnn::expr::Expression NeuralTransitionTagger::USE_ORACLE(
 
 void NeuralTransitionTagger::SaveModel(const string& model_fname,
                                        bool softlink_created) {
+  boost::filesystem::path model_dir_path(model_fname);
+  model_dir_path.remove_filename();
+  if (boost::filesystem::create_directories(model_dir_path)) {
+    cerr << "Created directory " << model_dir_path << endl;
+  }
+
   ofstream out_file(model_fname);
   eos::portable_oarchive archive(out_file);
   DoSave(archive);

From a951527ecf8f396eb77b2c450ec2a313a39237ba Mon Sep 17 00:00:00 2001
From: Jesse Dunietz <yavyash@gmail.com>
Date: Sun, 17 Nov 2019 18:18:47 -0500
Subject: [PATCH 88/88] Updated WordForToken to allow iterator hinting for
 speed

---
 parser/corpus.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/parser/corpus.h b/parser/corpus.h
index 710fe2c..b6aa496 100644
--- a/parser/corpus.h
+++ b/parser/corpus.h
@@ -215,7 +215,12 @@ class Sentence {
   }
 
   const std::string& WordForToken(unsigned token_id) const {
-    unsigned word_id = words.at(token_id);
+    return WordForToken(words.find(token_id), token_id);
+  }
+
+  const std::string& WordForToken(SentenceMap::const_iterator words_iter,
+                                  unsigned token_id) const {
+    unsigned word_id = words_iter->second;
     return word_id == vocab->kUNK ? unk_surface_forms.at(token_id)
                                   : vocab->int_to_words[word_id];
   }