diff --git a/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json b/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json deleted file mode 100644 index b193c64..0000000 --- a/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json +++ /dev/null @@ -1 +0,0 @@ -{"layers": [{"layers": [{"layers": [{"truncate_gradient": -1, "name": "LSTM", "inner_activation": "hard_sigmoid", "activation": "tanh", "input_shape": [30, 300], "init": "glorot_uniform", "inner_init": "orthogonal", "input_dim": null, "return_sequences": false, "output_dim": 512, "forget_bias_init": "one", "input_length": null}], "name": "Sequential"}, {"layers": [{"dims": [4096], "name": "Reshape", "input_shape": [4096]}], "name": "Sequential"}], "mode": "concat", "name": "Merge", "concat_axis": 1}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"beta": 0.1, "activation": "tanh", "name": "Activation", "target": 0}, {"p": 0.5, "name": "Dropout"}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"beta": 0.1, "activation": "tanh", "name": "Activation", "target": 0}, {"p": 0.5, "name": "Dropout"}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"beta": 0.1, "activation": "tanh", "name": "Activation", "target": 0}, {"p": 0.5, "name": "Dropout"}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "init": "glorot_uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1000}, {"beta": 0.1, "activation": "softmax", "name": "Activation", "target": 0}], "name": "Sequential"} \ No newline at end of file diff --git a/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json b/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json new file mode 100644 index 0000000..38058c5 --- /dev/null +++ b/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json @@ -0,0 +1 @@ +{"layers": [{"layers": [{"layers": [{"name": "LSTM", "inner_activation": "hard_sigmoid", "go_backwards": false, "output_dim": 512, "input_shape": [30, 300], "stateful": false, "cache_enabled": true, "init": "glorot_uniform", "inner_init": "orthogonal", "input_dim": 300, "return_sequences": false, "activation": "tanh", "forget_bias_init": "one", "input_length": null}], "name": "Sequential"}, {"layers": [{"cache_enabled": true, "dims": [4096], "name": "Reshape", "input_shape": [4096]}], "name": "Sequential"}], "name": "Merge", "concat_axis": 1, "dot_axes": -1, "cache_enabled": true, "mode": "concat"}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "cache_enabled": true, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"cache_enabled": true, "activation": "tanh", "name": "Activation"}, {"cache_enabled": true, "name": "Dropout", "p": 0.5}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "cache_enabled": true, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"cache_enabled": true, "activation": "tanh", "name": "Activation"}, {"cache_enabled": true, "name": "Dropout", "p": 0.5}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "cache_enabled": true, "init": "uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1024}, {"cache_enabled": true, "activation": "tanh", "name": "Activation"}, {"cache_enabled": true, "name": "Dropout", "p": 0.5}, {"b_constraint": null, "name": "Dense", "activity_regularizer": null, "W_constraint": null, "cache_enabled": true, "init": "glorot_uniform", "activation": "linear", "input_dim": null, "b_regularizer": null, "W_regularizer": null, "output_dim": 1000}, {"cache_enabled": true, "activation": "softmax", "name": "Activation"}], "name": "Sequential"} \ No newline at end of file diff --git a/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5 b/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_199.hdf5 similarity index 76% rename from models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5 rename to models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_199.hdf5 index a76440c..9521380 100644 Binary files a/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5 and b/models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_199.hdf5 differ diff --git a/scripts/demo_batch.py b/scripts/demo_batch.py index a9d2315..47908dd 100644 --- a/scripts/demo_batch.py +++ b/scripts/demo_batch.py @@ -26,8 +26,8 @@ def main(): local_images = [ f for f in listdir(image_dir) if isfile(join(image_dir,f)) ] parser = argparse.ArgumentParser() - parser.add_argument('-model', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json') - parser.add_argument('-weights', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5') + parser.add_argument('-model', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json') + parser.add_argument('-weights', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_199.hdf5') parser.add_argument('-sample_size', type=int, default=25) args = parser.parse_args() diff --git a/scripts/evaluateLSTM.py b/scripts/evaluateLSTM.py index 2367bb5..3a3adaa 100644 --- a/scripts/evaluateLSTM.py +++ b/scripts/evaluateLSTM.py @@ -3,6 +3,7 @@ from keras.models import model_from_json from spacy.en import English +import spacy import numpy as np import scipy.io from sklearn.externals import joblib @@ -16,6 +17,7 @@ def main(): parser.add_argument('-model', type=str, required=True) parser.add_argument('-weights', type=str, required=True) parser.add_argument('-results', type=str, required=True) + parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() model = model_from_json(open(args.model).read()) @@ -46,8 +48,13 @@ def main(): id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) - nlp = English() - print 'Loaded word2vec features' + if args.word_vector == 'glove': + nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') + else: + nlp = English() + + print 'loaded ' + args.word_vector + ' word2vec features...' + nb_classes = 1000 y_predict_text = [] @@ -104,4 +111,4 @@ def main(): print 'Final Accuracy on the validation set is', correct_val/total if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/evaluateMLP.py b/scripts/evaluateMLP.py index 483836e..0db1cff 100644 --- a/scripts/evaluateMLP.py +++ b/scripts/evaluateMLP.py @@ -4,6 +4,7 @@ from keras.models import model_from_json from spacy.en import English +import spacy import numpy as np import scipy.io from sklearn.externals import joblib @@ -16,18 +17,16 @@ def main(): parser.add_argument('-model', type=str, required=True) parser.add_argument('-weights', type=str, required=True) parser.add_argument('-results', type=str, required=True) + parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() model = model_from_json(open(args.model).read()) model.load_weights(args.weights) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - questions_val = open('../data/preprocessed/questions_val2014.txt', - 'r').read().decode('utf8').splitlines() - answers_val = open('../data/preprocessed/answers_val2014_all.txt', - 'r').read().decode('utf8').splitlines() - images_val = open('../data/preprocessed/images_val2014.txt', - 'r').read().decode('utf8').splitlines() + questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines() + answers_val = open('../data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() + images_val = open('../data/preprocessed/images_val2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' print 'Model compiled, weights loaded...' @@ -42,19 +41,21 @@ def main(): id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) - nlp = English() - print 'loaded word2vec features' + if args.word_vector == 'glove': + nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') + else: + nlp = English() + + print 'loaded ' + args.word_vector + ' word2vec features...' + nb_classes = 1000 y_predict_text = [] batchSize = 128 - widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), - ' ', ETA()] + widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), ' ', ETA()] pbar = ProgressBar(widgets=widgets) - for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), - grouper(answers_val, batchSize, fillvalue=answers_val[0]), - grouper(images_val, batchSize, fillvalue=images_val[0]))): + for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), grouper(answers_val, batchSize, fillvalue=answers_val[0]), grouper(images_val, batchSize, fillvalue=images_val[0]))): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if 'language_only' in args.model: X_batch = X_q_batch diff --git a/scripts/trainLSTM_1.py b/scripts/trainLSTM_1.py index 1b205e3..35addf2 100644 --- a/scripts/trainLSTM_1.py +++ b/scripts/trainLSTM_1.py @@ -1,4 +1,5 @@ import numpy as np +import spacy import scipy.io import sys import argparse @@ -29,6 +30,7 @@ def main(): parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=5) parser.add_argument('-batch_size', type=int, default=128) + parser.add_argument('-word_vector', type=str, default='') #TODO Feature parser.add_argument('-resume_training', type=str) #TODO Feature parser.add_argument('-language_only', type=bool, default= False) args = parser.parse_args() @@ -94,8 +96,13 @@ def main(): id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) - nlp = English() - print 'loaded word2vec features...' + # Code to choose the word vectors, default is Goldberg but GLOVE is preferred + if args.word_vector == 'glove': + nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') + else: + nlp = English() + + print 'loaded ' + args.word_vector + ' word2vec features...' ## training print 'Training started...' for k in xrange(args.num_epochs): @@ -110,7 +117,8 @@ def main(): X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) - progbar.add(args.batch_size, values=[("train loss", loss)]) + # fix for the Keras v0.3 issue #9 + progbar.add(args.batch_size, values=[("train loss", loss[0])]) if k%args.model_save_interval == 0: @@ -119,4 +127,4 @@ def main(): model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k)) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/trainLSTM_language.py b/scripts/trainLSTM_language.py index 1c46648..c0e151c 100644 --- a/scripts/trainLSTM_language.py +++ b/scripts/trainLSTM_language.py @@ -3,6 +3,7 @@ import argparse import numpy as np +import spacy from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation @@ -23,6 +24,10 @@ def main(): parser.add_argument('-num_lstm_layers', type=int, default=2) parser.add_argument('-dropout', type=float, default=0.2) parser.add_argument('-activation', type=str, default='tanh') + parser.add_argument('-num_epochs', type=int, default=100) + parser.add_argument('-model_save_interval', type=int, default=5) + parser.add_argument('-batch_size', type=int, default=128) + parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() @@ -60,29 +65,33 @@ def main(): print 'Compilation done...' #set up word vectors - nlp = English() - print 'loaded word2vec features...' + # Code to choose the word vectors, default is Goldberg but GLOVE is preferred + if args.word_vector == 'glove': + nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') + else: + nlp = English() + + print 'loaded ' + args.word_vector + ' word2vec features...' ## training + # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval) print 'Training started...' - numEpochs = 100 - model_save_interval = 5 - batchSize = 128 - for k in xrange(numEpochs): + for k in xrange(args.num_epochs): progbar = generic_utils.Progbar(len(questions_train)) - for qu_batch,an_batch,im_batch in zip(grouper(questions_train, batchSize, fillvalue=questions_train[0]), - grouper(answers_train, batchSize, fillvalue=answers_train[0]), - grouper(images_train, batchSize, fillvalue=images_train[0])): + for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), + grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), + grouper(images_train, args.batch_size, fillvalue=images_train[0])): timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_q_batch, Y_batch) - progbar.add(batchSize, values=[("train loss", loss)]) + # fix for the Keras v0.3 issue #9 + progbar.add(args.batch_size, values=[("train loss", loss[0])]) - if k%model_save_interval == 0: + if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1)) diff --git a/scripts/trainMLP.py b/scripts/trainMLP.py index 42c7a4d..7e29865 100644 --- a/scripts/trainMLP.py +++ b/scripts/trainMLP.py @@ -3,6 +3,7 @@ import argparse import numpy as np +import spacy import scipy.io from keras.models import Sequential @@ -29,6 +30,7 @@ def main(): parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=128) + parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() @@ -53,8 +55,13 @@ def main(): id_split = ids.split() id_map[id_split[0]] = int(id_split[1]) - nlp = English() - print 'loaded word2vec features...' + # Code to choose the word vectors, default is Goldberg but GLOVE is preferred + if args.word_vector == 'glove': + nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') + else: + nlp = English() + + print 'loaded ' + args.word_vector + ' word2vec features...' img_dim = 4096 word_vec_dim = 300 @@ -105,7 +112,8 @@ def main(): X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) - progbar.add(args.batch_size, values=[("train loss", loss)]) + # fix for the Keras v0.3 issue #9 + progbar.add(args.batch_size, values=[("train loss", loss[0])]) #print type(loss) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) @@ -113,4 +121,4 @@ def main(): model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) if __name__ == "__main__": - main() \ No newline at end of file + main()