diff --git a/models/entail1509.py b/models/entail1509.py new file mode 100644 index 0000000..de1286b --- /dev/null +++ b/models/entail1509.py @@ -0,0 +1,72 @@ +""" +A model that is similar to the one from +Rocktaschel et al. "Reasoning about entailment with neural attention." +approaches 2015-state-of-art results on the anssel-wang task (with +token flags). + +The implementation is inspired by https://github.com/shyamupa/snli-entailment/blob/master/amodel.py + + +""" + +from keras.layers.core import Layer +from keras.layers import GRU, Dropout, Lambda, Dense, RepeatVector, TimeDistributedDense, Activation, Reshape, Permute, Flatten +from keras.regularizers import l2 +from keras import backend as K +import pysts.kerasts.blocks as B + +spad=60 + +def config(c): + c['dropout'] = 4/5 + c['dropoutfix_inp'] = 0 + c['dropoutfix_rec'] = 0 + c['l2reg'] = 1e-4 + c['rnnact'] = 'tanh' + c['rnninit'] = 'glorot_uniform' + c['sdim'] = 2 + + +def get_last_time_dim(X): + ans=X[:, -1, :] + return ans + +def get_R(X): + Y, alpha = X.values() + ans=K.T.batched_dot(Y,alpha) + return ans + +def get_first_sentence(X): + spad = K.spad + return X[:, :spad, :] + +def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4, pfx=''): + setattr(K, 'spad',spad) + model.add_node(Lambda(get_last_time_dim, output_shape=(N,)), name=pfx+'h_n', input=inputs[1]) + model.add_node(Lambda(get_first_sentence, output_shape=(N,spad)), name=pfx+'Y', input=inputs[0]) + model.add_node(Permute((2,1)), name=pfx+"Yp", input=pfx+'Y') + model.add_node(Dense(N,W_regularizer=l2(l2reg)),name=pfx+'Wh_n', input=pfx+'h_n') + model.add_node(RepeatVector(spad), name=pfx+'Wh_n_cross_e', input=pfx+'Wh_n') + model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name=pfx+'WY', input=pfx+'Yp') + model.add_node(Activation('tanh'), name=pfx+'M', inputs=[pfx+'Wh_n_cross_e', pfx+'WY'], merge_mode='sum') + model.add_node(TimeDistributedDense(1,activation='linear'), name=pfx+'alpha', input=pfx+'M') + model.add_node(Lambda(get_R, output_shape=(N,1)), name=pfx+'_r', inputs=[pfx+'Yp',pfx+'alpha'], merge_mode='join') + model.add_node(Permute((2,1)), name=pfx+"_rp", input=pfx+'_r') + model.add_node(Flatten(input_shape = (N,1)),name=pfx+'r', input=pfx+'_rp') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wr', input=pfx+'r') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wh', input=pfx+'h_n') + outputs=[pfx+'Wr',pfx+'Wh'] + return outputs + + +def prep_model(model, N, s0pad, s1pad, c): + model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear')) + B.rnn_input(model, N, spad, dropout=c['dropout'], dropoutfix_inp=c['dropoutfix_inp'], dropoutfix_rec=c['dropoutfix_rec'], + sdim=2, rnnbidi=True, return_sequences=True, + rnn=GRU, rnnact='tanh', rnninit='glorot_uniform', rnnbidi_mode='sum', + rnnlevels=1, + inputs=['embmerge']) + rnn_outputs=['e0s_', 'e1s_'] + outputs = entailment_embedding(model, rnn_outputs,2*N,spad,c['l2reg']) + return outputs + diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py index af4218e..3e1b8d6 100644 --- a/pysts/kerasts/blocks.py +++ b/pysts/kerasts/blocks.py @@ -87,35 +87,69 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, rnn_input(model, N, spad, dropout=0, sdim=sdim, rnnbidi=rnnbidi, return_sequences=True, rnn=rnn, rnnact=rnnact, rnninit=rnninit, rnnbidi_mode=rnnbidi_mode, rnnlevels=1, inputs=deep_inputs, pfx=pfx+'L%d'%(i,)) - model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear')) - model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear')) - deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)] - + if len(deep_inputs)>1 or rnnbidi: + model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear')) + model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear')) + deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)] + else: + model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%d'%(i,)], merge_mode='concat', layer=Activation('linear')) + if rnnbidi: if rnnbidi_mode == 'concat': sdim /= 2 - model.add_shared_node(name=pfx+'rnnf', inputs=deep_inputs, outputs=[pfx+'e0sf', pfx+'e1sf'], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + rnnf_args={} + rnnb_args={} + rnnfa_args={} + rnnba_args={} + func = model.add_shared_node + if len(deep_inputs)>1: + rnnf_args['inputs']=deep_inputs + rnnb_args['inputs']=deep_inputs + rnnf_args['outputs']= [pfx+'e%dsf'%(i) for i in range(len(deep_inputs))] + rnnb_args['outputs']= [pfx+'e%dsb'%(i) for i in range(len(deep_inputs))] + rnnfa_args['merge_mode']=rnnbidi_mode + rnnba_args['merge_mode']=rnnbidi_mode + rnnfa_args['inputs']=[pfx+'e%dsf'%(i) for i in range(len(deep_inputs))] + rnnba_args['inputs']=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))] + else: + func = model.add_node + rnnf_args['input']=deep_inputs[0] + rnnb_args['input']=deep_inputs[0] + rnnfa_args['input']=pfx+'rnnf' + rnnba_args['input']=pfx+'rnnb' + + func(name=pfx+'rnnf', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) - model.add_shared_node(name=pfx+'rnnb', inputs=deep_inputs, outputs=[pfx+'e0sb', pfx+'e1sb'], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) + func(name=pfx+'rnnb', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, go_backwards=True, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) - model.add_node(name=pfx+'e0s', inputs=[pfx+'e0sf', pfx+'e0sb'], merge_mode=rnnbidi_mode, layer=Activation('linear')) - model.add_node(name=pfx+'e1s', inputs=[pfx+'e1sf', pfx+'e1sb'], merge_mode=rnnbidi_mode, layer=Activation('linear')) + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args) + model.add_node(name=pfx+'e0s', layer=Activation('linear'), **rnnfa_args) + model.add_node(name=pfx+'e1s', layer=Activation('linear'), **rnnba_args) + model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], + layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) else: - model.add_shared_node(name=pfx+'rnn', inputs=deep_inputs, outputs=[pfx+'e0s', pfx+'e1s'], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + rnns_args = {} + rnndrop_args = {} + if len(deep_inputs)>1: + rnns_args['inputs']=deep_inputs + rnns_args['outputs']=[pfx+'e0s', pfx+'e1s'] + rnndrop_args['inputs'] = rnns_args['outputs'] + rnndrop_args[outputs] = [s+'_' for s in rnndrop_args['inputs']] + func=model.add_shared_node + else: + rnns_args['input']=deep_inputs[0] + rnndrop_args['input'] = pfx+'rnn' + func=model.add_node + func(name=pfx+'rnn', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) - - model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], - layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args) + func(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], + layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)), **rnndrop_args) def add_multi_node(model, name, inputs, outputs, layer_class, diff --git a/tasks/rte.py b/tasks/rte.py index fb14ea3..e7c85cc 100644 --- a/tasks/rte.py +++ b/tasks/rte.py @@ -62,28 +62,17 @@ def prep_model(self, module_prep_model): # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) - # Measurement - - if self.c['ptscorer'] == '1': - # special scoring mode just based on the answer - # (assuming that the question match is carried over to the answer - # via attention or another mechanism) - ptscorer = B.cat_ptscorer - final_outputs = [final_outputs[1]] - else: - ptscorer = self.c['ptscorer'] - kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax')) model.add_output(name='score', input='scoreV')