diff --git a/.gitignore b/.gitignore index c94ac18..a7cfbae 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ Makefile~ *.npy *.bz2 *#* +.idea \ No newline at end of file diff --git a/examples/export_word2vec_format.py b/examples/export_word2vec_format.py new file mode 100644 index 0000000..39307da --- /dev/null +++ b/examples/export_word2vec_format.py @@ -0,0 +1,13 @@ +from glove import Glove +import argparse + +# Convert binary model to standardized .vec format for compatibility +# Example command: python export_word2vec_format.py -i model.model -o model.vec +if __name__ == '__main__': + # Set up command line parameters. + parser = argparse.ArgumentParser(description='Export model to word2vec format') + parser.add_argument("-i", "--input", type=str, default=None, help="input model") + parser.add_argument("-o", "--output", type=str, default=None, help="output model") + args = parser.parse_args() + glove = Glove.load(args.input) + glove.save_word2vec_format(args.output) diff --git a/glove/glove.py b/glove/glove.py index ec90ca3..f09a72d 100644 --- a/glove/glove.py +++ b/glove/glove.py @@ -218,6 +218,26 @@ def save(self, filename): savefile, protocol=pickle.HIGHEST_PROTOCOL) + def save_word2vec_format(self, filename): + """ + Serialize model to filename in word2vec .vec format. + """ + with open(filename, 'w') as savefile: + (rows, cols) = self.word_vectors.shape + savefile.write(str(rows) + " " + str(cols) + "\n") + if hasattr(self.dictionary, 'iteritems'): + # Python 2 compat + items_iterator = self.dictionary.iteritems() + else: + items_iterator = self.dictionary.items() + + for word, idx in items_iterator: + vector = self.word_vectors[idx] + vector_string = "" + for val_i in vector: + vector_string += " " + str(val_i) + savefile.write((word + vector_string + "\n")) + @classmethod def load(cls, filename): """