From 709b139e24bb7b7a88fd1f5869da35c5703ef0aa Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Sat, 20 Aug 2016 10:45:02 +0200 Subject: [PATCH 1/6] added simple ptscorers for entail1509 model --- models/entail1509.py | 92 +++++++++++++++++++++++++++++++++++++++++ pysts/kerasts/blocks.py | 23 +++++++++++ 2 files changed, 115 insertions(+) create mode 100644 models/entail1509.py diff --git a/models/entail1509.py b/models/entail1509.py new file mode 100644 index 0000000..a993a6a --- /dev/null +++ b/models/entail1509.py @@ -0,0 +1,92 @@ +""" +A model that is similar to the one from +Rocktaschel et al. "Reasoning about entailment with neural attention." +approaches 2015-state-of-art results on the anssel-wang task (with +token flags). + +The implementation is inspired by https://github.com/shyamupa/snli-entailment/blob/master/amodel.py + + +""" + + +from keras.layers import GRU, Dropout, Lambda, Dense, RepeatVector, TimeDistributedDense, Activation, Reshape, Permute, Flatten +from keras.regularizers import l2 +from keras import backend as K +import pysts.kerasts.blocks as B + +def config(c): + c['dropout'] = 4/5 + c['dropoutfix_inp'] = 0 + c['dropoutfix_rec'] = 0 + c['l2reg'] = 1e-4 + c['rnnact'] = 'tanh' + c['rnninit'] = 'glorot_uniform' + c['sdim'] = 2 + + + +def get_H_n(X): + ans=X[:, -1, :] + return ans + + +def generate_get_Y(size): + def get_Y_generator(X): + return get_Y(X,size) + return get_Y_generator + +def get_Y(X): + return X[:, :60, :] + +def get_H_0(): + return X[:, 0, :] + + +def get_R(X): + Y, alpha = X.values() # Y should be (None,L,k) and alpha should be (None,L,1) and ans should be (None, k,1) + ans=K.T.batched_dot(Y,alpha) + return ans + +def rnn_input(model, N, spad, input,c): + model.add_node(name='forward', input=input, + layer=GRU(input_dim=N, output_dim=N, input_length=2*spad, + init=c['rnninit'], activation=c['rnnact'], + return_sequences=True, + dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) + + model.add_node(name='backward', input=input, + layer=GRU(input_dim=N, output_dim=N, input_length=2*spad, + init=c['rnninit'], activation=c['rnnact'], + return_sequences=True, go_backwards=True, + dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) + outputs=['e0s_', 'e1s_'] + model.add_node(name='rnndrop', inputs=['forward', 'backward'], merge_mode='concat' , + layer=Dropout(c['dropout'], input_shape=(2*spad, int(N*c['sdim'])) )) + return ['rnndrop']*2 + + +def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): + model.add_node(Lambda(get_H_n, output_shape=(N,)), name='h_n', input=inputs[1]) + model.add_node(Lambda(get_Y, output_shape=(spad, N)), name='Y', input=inputs[0]) + model.add_node(Dense(N,W_regularizer=l2(l2reg)),name='Wh_n', input='h_n') + model.add_node(RepeatVector(spad), name='Wh_n_cross_e', input='Wh_n') + model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Y') + model.add_node(Activation('tanh'), name='M', inputs=['Wh_n_cross_e', 'WY'], merge_mode='sum') + model.add_node(TimeDistributedDense(1,activation='softmax'), name='alpha', input='M') + model.add_node(Permute((2,1)), name="Yp", input='Y') + #model.add_node(name='_r',layer=B.dot_time_distributed_merge(model, ['Y','alpha'], + # cos_norm=False)) + model.add_node(Lambda(get_R, output_shape=(N,1)), name='_r', inputs=['Yp','alpha'], merge_mode='join') + model.add_node(Flatten(input_shape = (N,1)),name='r', input='_r') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wr', input='r') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wh', input='h_n') + outputs=['Wr','Wh'] + return outputs + + +def prep_model(model, N, s0pad, s1pad, c): + model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear')) + rnn_outputs=rnn_input(model,N,s0pad,'embmerge',c) + output = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg']) + return output diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py index af4218e..e36ac08 100644 --- a/pysts/kerasts/blocks.py +++ b/pysts/kerasts/blocks.py @@ -117,6 +117,29 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) +def prep_to_n_kwargs(inputs, extra_inp): + kwargs = {} + inputs = list(inputs) + if len(inputs)+len(extra_inp)==1: + if len(inputs)>len(extra_inp): + kwargs['input']=inputs[0] + else: + kwargs['input']=inputs[0] + else: + kwargs['inputs']=inputs+extra_inp + kwargs['merge_mode']='sum' + return kwargs + +def to_n_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1): + kwargs = prep_to_n_kwargs(inputs, extra_inp) + model.add_node(Activation('tanh'), name=pfx+'to_n_sum', **kwargs) + model.add_node(Dense(output_dim=output_dim,activation='softmax',W_regularizer=l2(l2reg)), name=pfx+'to_n_out', input=pfx+'to_n_sum') + return (pfx+"to_n_out") + +def to_n_simple_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1): + kwargs = prep_to_n_kwargs(inputs, extra_inp) + model.add_node(Dense(output_dim=output_dim,activation='linear',W_regularizer=l2(l2reg)), name=pfx+'_to_n_out', **kwargs) + return pfx+'_to_n_out' def add_multi_node(model, name, inputs, outputs, layer_class, layer_args, siamese=True, **kwargs): From 186b007a63501bfb14115d3bf75fb1124ed11a4c Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Sat, 20 Aug 2016 16:34:53 +0200 Subject: [PATCH 2/6] Added entailment model implementation --- models/entail1509.py | 31 ++++++++++++++++++++++--------- tasks/rte.py | 6 +++--- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/models/entail1509.py b/models/entail1509.py index a993a6a..0c33297 100644 --- a/models/entail1509.py +++ b/models/entail1509.py @@ -9,7 +9,7 @@ """ - +from keras.layers.core import Layer from keras.layers import GRU, Dropout, Lambda, Dense, RepeatVector, TimeDistributedDense, Activation, Reshape, Permute, Flatten from keras.regularizers import l2 from keras import backend as K @@ -23,6 +23,7 @@ def config(c): c['rnnact'] = 'tanh' c['rnninit'] = 'glorot_uniform' c['sdim'] = 2 + c['ptscorer']=B.to_n_simple_ptscorer @@ -36,15 +37,28 @@ def get_Y_generator(X): return get_Y(X,size) return get_Y_generator -def get_Y(X): - return X[:, :60, :] +def get_Y(X, size): + return X[:, :size, :] def get_H_0(): return X[:, 0, :] +class SplitSequence(Layer): + def __init__(self, split_ind , **kwargs): + self.split_ind=split_ind + super(SplitSequence, self).__init__(**kwargs) + + def call(self, x, mask=None): + return x[:, :self.split_ind, :] + + def get_output_shape_for(self, input_shape): + input_shape[1]=self.split_ind + return input_shape + + def get_R(X): - Y, alpha = X.values() # Y should be (None,L,k) and alpha should be (None,L,1) and ans should be (None, k,1) + Y, alpha = X.values() ans=K.T.batched_dot(Y,alpha) return ans @@ -68,15 +82,13 @@ def rnn_input(model, N, spad, input,c): def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): model.add_node(Lambda(get_H_n, output_shape=(N,)), name='h_n', input=inputs[1]) - model.add_node(Lambda(get_Y, output_shape=(spad, N)), name='Y', input=inputs[0]) + model.add_node(SplitSequence(spad), name='Y', input=inputs[0]) model.add_node(Dense(N,W_regularizer=l2(l2reg)),name='Wh_n', input='h_n') model.add_node(RepeatVector(spad), name='Wh_n_cross_e', input='Wh_n') model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Y') model.add_node(Activation('tanh'), name='M', inputs=['Wh_n_cross_e', 'WY'], merge_mode='sum') model.add_node(TimeDistributedDense(1,activation='softmax'), name='alpha', input='M') model.add_node(Permute((2,1)), name="Yp", input='Y') - #model.add_node(name='_r',layer=B.dot_time_distributed_merge(model, ['Y','alpha'], - # cos_norm=False)) model.add_node(Lambda(get_R, output_shape=(N,1)), name='_r', inputs=['Yp','alpha'], merge_mode='join') model.add_node(Flatten(input_shape = (N,1)),name='r', input='_r') model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wr', input='r') @@ -88,5 +100,6 @@ def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): def prep_model(model, N, s0pad, s1pad, c): model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear')) rnn_outputs=rnn_input(model,N,s0pad,'embmerge',c) - output = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg']) - return output + outputs = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg']) + final_output = B.to_n_ptscorer(model, outputs, c['Ddim'], N, c['l2reg'], pfx="entail_out", output_dim=3) + return [final_output] diff --git a/tasks/rte.py b/tasks/rte.py index fb14ea3..91bc14f 100644 --- a/tasks/rte.py +++ b/tasks/rte.py @@ -79,11 +79,11 @@ def prep_model(self, module_prep_model): kwargs['Dinit'] = self.c['Dinit'] model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs), - layer=Activation('sigmoid')) + layer=Activation('linear')) model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax')) model.add_output(name='score', input='scoreV') From 2feb4f9e8f683c2ec230b189072fbac59b13c3a7 Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Fri, 9 Sep 2016 12:01:55 +0200 Subject: [PATCH 3/6] Now using rnn from blocks in entail1509 --- models/entail1509.py | 52 ++++++++++++++++++++----------------- pysts/kerasts/blocks.py | 57 ++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/models/entail1509.py b/models/entail1509.py index 0c33297..ce49167 100644 --- a/models/entail1509.py +++ b/models/entail1509.py @@ -15,6 +15,8 @@ from keras import backend as K import pysts.kerasts.blocks as B +spad=60 + def config(c): c['dropout'] = 4/5 c['dropoutfix_inp'] = 0 @@ -27,23 +29,12 @@ def config(c): + def get_H_n(X): ans=X[:, -1, :] return ans -def generate_get_Y(size): - def get_Y_generator(X): - return get_Y(X,size) - return get_Y_generator - -def get_Y(X, size): - return X[:, :size, :] - -def get_H_0(): - return X[:, 0, :] - - class SplitSequence(Layer): def __init__(self, split_ind , **kwargs): self.split_ind=split_ind @@ -53,6 +44,7 @@ def call(self, x, mask=None): return x[:, :self.split_ind, :] def get_output_shape_for(self, input_shape): + shape = input_shape input_shape[1]=self.split_ind return input_shape @@ -64,33 +56,40 @@ def get_R(X): def rnn_input(model, N, spad, input,c): model.add_node(name='forward', input=input, - layer=GRU(input_dim=N, output_dim=N, input_length=2*spad, + layer=GRU(input_dim=N, output_dim=N, input_length=spad, init=c['rnninit'], activation=c['rnnact'], return_sequences=True, dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) model.add_node(name='backward', input=input, - layer=GRU(input_dim=N, output_dim=N, input_length=2*spad, + layer=GRU(input_dim=N, output_dim=N, input_length=spad, init=c['rnninit'], activation=c['rnnact'], return_sequences=True, go_backwards=True, dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) outputs=['e0s_', 'e1s_'] - model.add_node(name='rnndrop', inputs=['forward', 'backward'], merge_mode='concat' , - layer=Dropout(c['dropout'], input_shape=(2*spad, int(N*c['sdim'])) )) + model.add_node(name='rnndrop', inputs=['forward', 'backward'], merge_mode='concat', concat_axis=1, + layer=Dropout(c['dropout'], input_shape=( spad, N) )) return ['rnndrop']*2 +def get_Y(X): + spad = K.spad + return X[:, :spad, :] def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): + setattr(K, 'spad',spad) model.add_node(Lambda(get_H_n, output_shape=(N,)), name='h_n', input=inputs[1]) - model.add_node(SplitSequence(spad), name='Y', input=inputs[0]) + model.add_node(Lambda(get_Y, output_shape=(N,spad)), name='Y', input=inputs[0]) + #model.add_node(SplitSequence(spad), name='Yp', input=inputs[0]) + model.add_node(Permute((2,1)), name="Yp", input='Y') model.add_node(Dense(N,W_regularizer=l2(l2reg)),name='Wh_n', input='h_n') model.add_node(RepeatVector(spad), name='Wh_n_cross_e', input='Wh_n') - model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Y') + model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Yp') + model.summary() model.add_node(Activation('tanh'), name='M', inputs=['Wh_n_cross_e', 'WY'], merge_mode='sum') - model.add_node(TimeDistributedDense(1,activation='softmax'), name='alpha', input='M') - model.add_node(Permute((2,1)), name="Yp", input='Y') + model.add_node(TimeDistributedDense(1,activation='linear'), name='alpha', input='M') model.add_node(Lambda(get_R, output_shape=(N,1)), name='_r', inputs=['Yp','alpha'], merge_mode='join') - model.add_node(Flatten(input_shape = (N,1)),name='r', input='_r') + model.add_node(Permute((2,1)), name="_rp", input='_r') + model.add_node(Flatten(input_shape = (N,1)),name='r', input='_rp') model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wr', input='r') model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wh', input='h_n') outputs=['Wr','Wh'] @@ -99,7 +98,14 @@ def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): def prep_model(model, N, s0pad, s1pad, c): model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear')) - rnn_outputs=rnn_input(model,N,s0pad,'embmerge',c) + B.rnn_input(model, N, s0pad, dropout=c['dropout'], dropoutfix_inp=c['dropoutfix_inp'], dropoutfix_rec=c['dropoutfix_rec'], + sdim=2, rnnbidi=True, return_sequences=True, + rnn=GRU, rnnact='tanh', rnninit='glorot_uniform', rnnbidi_mode='sum', + rnnlevels=1, + inputs=['embmerge']) + rnn_outputs=['e0s_', 'e1s_'] + #rnn_outputs=rnn_input(model,2*N,s0pad,'embmerge',c) + #rnn_outputs=['embmerge','embmerge'] outputs = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg']) final_output = B.to_n_ptscorer(model, outputs, c['Ddim'], N, c['l2reg'], pfx="entail_out", output_dim=3) - return [final_output] + return final_output diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py index e36ac08..213115b 100644 --- a/pysts/kerasts/blocks.py +++ b/pysts/kerasts/blocks.py @@ -87,6 +87,7 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, rnn_input(model, N, spad, dropout=0, sdim=sdim, rnnbidi=rnnbidi, return_sequences=True, rnn=rnn, rnnact=rnnact, rnninit=rnninit, rnnbidi_mode=rnnbidi_mode, rnnlevels=1, inputs=deep_inputs, pfx=pfx+'L%d'%(i,)) + model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear')) model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear')) deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)] @@ -94,26 +95,64 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, if rnnbidi: if rnnbidi_mode == 'concat': sdim /= 2 - model.add_shared_node(name=pfx+'rnnf', inputs=deep_inputs, outputs=[pfx+'e0sf', pfx+'e1sf'], + rnnfa_args={} + rnnba_args={} + if len(deep_inputs)>1: + model.add_shared_node(name=pfx+'rnnf', inputs = deep_inputs, outputs = [pfx+'e%dsf'%(i) for i in range(len(deep_inputs))], layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) - model.add_shared_node(name=pfx+'rnnb', inputs=deep_inputs, outputs=[pfx+'e0sb', pfx+'e1sb'], + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) + model.add_shared_node(name=pfx+'rnnb', inputs=deep_inputs, outputs=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))], layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, go_backwards=True, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) - model.add_node(name=pfx+'e0s', inputs=[pfx+'e0sf', pfx+'e0sb'], merge_mode=rnnbidi_mode, layer=Activation('linear')) - model.add_node(name=pfx+'e1s', inputs=[pfx+'e1sf', pfx+'e1sb'], merge_mode=rnnbidi_mode, layer=Activation('linear')) + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args) + rnnfa_args['merge_mode']=rnnbidi_mode + rnnba_args['merge_mode']=rnnbidi_mode + rnnfa_args['inputs']=[pfx+'e%dsf'%(i) for i in range(len(deep_inputs))] + rnnba_args['inputs']=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))] - else: - model.add_shared_node(name=pfx+'rnn', inputs=deep_inputs, outputs=[pfx+'e0s', pfx+'e1s'], + else: + model.add_node(name=pfx+'rnnf', input=deep_inputs[0], layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec)) + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) + model.add_node(name=pfx+'rnnb', deep_inputs[0], + layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + init=rnninit, activation=rnnact, + return_sequences=return_sequences, go_backwards=True, + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args) + rnnfa_args['input']=pfx+'rnnf' + rnnba_args['input']=pfx+'rnnb' + model.add_node(name=pfx+'e0s', layer=Activation('linear'), **rnnfa_args) + model.add_node(name=pfx+'e1s', layer=Activation('linear'), **rnnba_args) + else: + rnns_args = {} + if len(deep_inputs)>1: + rnns_args['inputs']=deep_inputs + rnns_args['outputs']=[pfx+'e0s', pfx+'e1s'] + model.add_shared_node(name=pfx+'rnn', inputs]=deep_inputs, outputs=[pfx+'e0s', pfx+'e1s'], + layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + init=rnninit, activation=rnnact, + return_sequences=return_sequences, + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args) + else: + model.add_shared_node(name=pfx+'rnn', input=deep_inputs[0], + layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + init=rnninit, activation=rnnact, + return_sequences=return_sequences, + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args) + rnndrop_args={} + if len(deep_inputs)>1: + rnndrop_args['inputs']=[pfx+'e0s', pfx+'e1s'] + rnndrop_args['outputs']=[pfx+'e0s_', pfx+'e1s_'] + else: + rnndrop_args['input']=[pfx+'e0s', pfx+'e1s'] + rnndrop_args['output']=[pfx+'e0s_'] + model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) From 6f08651962e7784d48fab32dc53464db10a571d1 Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Mon, 26 Sep 2016 18:49:45 +0200 Subject: [PATCH 4/6] Fixed rnn_input support for single input --- pysts/kerasts/blocks.py | 85 +++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py index 213115b..99c31eb 100644 --- a/pysts/kerasts/blocks.py +++ b/pysts/kerasts/blocks.py @@ -87,83 +87,78 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, rnn_input(model, N, spad, dropout=0, sdim=sdim, rnnbidi=rnnbidi, return_sequences=True, rnn=rnn, rnnact=rnnact, rnninit=rnninit, rnnbidi_mode=rnnbidi_mode, rnnlevels=1, inputs=deep_inputs, pfx=pfx+'L%d'%(i,)) - - model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear')) - model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear')) - deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)] - + if len(deep_inputs)>1 or rnnbidi: + model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear')) + model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear')) + deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)] + else: + model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%d'%(i,)], merge_mode='concat', layer=Activation('linear')) + if rnnbidi: if rnnbidi_mode == 'concat': sdim /= 2 + rnnf_args={} + rnnb_args={} rnnfa_args={} rnnba_args={} + func = model.add_shared_node if len(deep_inputs)>1: - model.add_shared_node(name=pfx+'rnnf', inputs = deep_inputs, outputs = [pfx+'e%dsf'%(i) for i in range(len(deep_inputs))], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, - init=rnninit, activation=rnnact, - return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) - model.add_shared_node(name=pfx+'rnnb', inputs=deep_inputs, outputs=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, - init=rnninit, activation=rnnact, - return_sequences=return_sequences, go_backwards=True, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args) + rnnf_args['inputs']=deep_inputs + rnnb_args['inputs']=deep_inputs + rnnf_args['outputs']= [pfx+'e%dsf'%(i) for i in range(len(deep_inputs))] + rnnb_args['outputs']= [pfx+'e%dsb'%(i) for i in range(len(deep_inputs))] rnnfa_args['merge_mode']=rnnbidi_mode rnnba_args['merge_mode']=rnnbidi_mode rnnfa_args['inputs']=[pfx+'e%dsf'%(i) for i in range(len(deep_inputs))] rnnba_args['inputs']=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))] - else: - model.add_node(name=pfx+'rnnf', input=deep_inputs[0], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + func = model.add_node + rnnf_args['input']=deep_inputs[0] + rnnb_args['input']=deep_inputs[0] + rnnfa_args['input']=pfx+'rnnf' + rnnba_args['input']=pfx+'rnnb' + + func(name=pfx+'rnnf', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) - model.add_node(name=pfx+'rnnb', deep_inputs[0], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args) + func(name=pfx+'rnnb', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, go_backwards=True, dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args) - rnnfa_args['input']=pfx+'rnnf' - rnnba_args['input']=pfx+'rnnb' model.add_node(name=pfx+'e0s', layer=Activation('linear'), **rnnfa_args) model.add_node(name=pfx+'e1s', layer=Activation('linear'), **rnnba_args) + model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], + layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) else: rnns_args = {} + rnndrop_args = {} if len(deep_inputs)>1: rnns_args['inputs']=deep_inputs rnns_args['outputs']=[pfx+'e0s', pfx+'e1s'] - model.add_shared_node(name=pfx+'rnn', inputs]=deep_inputs, outputs=[pfx+'e0s', pfx+'e1s'], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, - init=rnninit, activation=rnnact, - return_sequences=return_sequences, - dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args) - else: - model.add_shared_node(name=pfx+'rnn', input=deep_inputs[0], - layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, + rnndrop_args['inputs'] = rnns_args['outputs'] + rnndrop_args[outputs] = [s+'_' for s in rnndrop_args['inputs']] + func=model.add_shared_node + else: + rnns_args['input']=deep_inputs[0] + rnndrop_args['input'] = pfx+'rnn' + func=model.add_node + func(name=pfx+'rnn', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad, init=rnninit, activation=rnnact, return_sequences=return_sequences, dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args) - rnndrop_args={} - if len(deep_inputs)>1: - rnndrop_args['inputs']=[pfx+'e0s', pfx+'e1s'] - rnndrop_args['outputs']=[pfx+'e0s_', pfx+'e1s_'] - else: - rnndrop_args['input']=[pfx+'e0s', pfx+'e1s'] - rnndrop_args['output']=[pfx+'e0s_'] - - model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], - layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),))) + func(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], + layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)), **rnndrop_args) def prep_to_n_kwargs(inputs, extra_inp): kwargs = {} inputs = list(inputs) if len(inputs)+len(extra_inp)==1: if len(inputs)>len(extra_inp): - kwargs['input']=inputs[0] + kwargs['input']=inputs else: - kwargs['input']=inputs[0] + kwargs['input']=extra_inp[0] else: kwargs['inputs']=inputs+extra_inp kwargs['merge_mode']='sum' @@ -173,11 +168,11 @@ def to_n_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output kwargs = prep_to_n_kwargs(inputs, extra_inp) model.add_node(Activation('tanh'), name=pfx+'to_n_sum', **kwargs) model.add_node(Dense(output_dim=output_dim,activation='softmax',W_regularizer=l2(l2reg)), name=pfx+'to_n_out', input=pfx+'to_n_sum') - return (pfx+"to_n_out") + return pfx+"to_n_out" def to_n_simple_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1): kwargs = prep_to_n_kwargs(inputs, extra_inp) - model.add_node(Dense(output_dim=output_dim,activation='linear',W_regularizer=l2(l2reg)), name=pfx+'_to_n_out', **kwargs) + model.add_node(layer=Dense(output_dim=output_dim,activation='linear',W_regularizer=l2(l2reg)), name=pfx+"_to_n_out", **kwargs) return pfx+'_to_n_out' def add_multi_node(model, name, inputs, outputs, layer_class, From 7ed2f29b41d02a8aa2ca083089ea7818dd3d2f79 Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Mon, 26 Sep 2016 18:52:06 +0200 Subject: [PATCH 5/6] Cleanup in entail model --- models/entail1509.py | 78 ++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 57 deletions(-) diff --git a/models/entail1509.py b/models/entail1509.py index ce49167..fa49338 100644 --- a/models/entail1509.py +++ b/models/entail1509.py @@ -30,82 +30,46 @@ def config(c): -def get_H_n(X): +def get_last_time_dim(X): ans=X[:, -1, :] return ans - -class SplitSequence(Layer): - def __init__(self, split_ind , **kwargs): - self.split_ind=split_ind - super(SplitSequence, self).__init__(**kwargs) - - def call(self, x, mask=None): - return x[:, :self.split_ind, :] - - def get_output_shape_for(self, input_shape): - shape = input_shape - input_shape[1]=self.split_ind - return input_shape - - def get_R(X): Y, alpha = X.values() ans=K.T.batched_dot(Y,alpha) return ans -def rnn_input(model, N, spad, input,c): - model.add_node(name='forward', input=input, - layer=GRU(input_dim=N, output_dim=N, input_length=spad, - init=c['rnninit'], activation=c['rnnact'], - return_sequences=True, - dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) - - model.add_node(name='backward', input=input, - layer=GRU(input_dim=N, output_dim=N, input_length=spad, - init=c['rnninit'], activation=c['rnnact'], - return_sequences=True, go_backwards=True, - dropout_W=c['dropoutfix_inp'], dropout_U=c['dropoutfix_rec'])) - outputs=['e0s_', 'e1s_'] - model.add_node(name='rnndrop', inputs=['forward', 'backward'], merge_mode='concat', concat_axis=1, - layer=Dropout(c['dropout'], input_shape=( spad, N) )) - return ['rnndrop']*2 - -def get_Y(X): +def get_first_sentence(X): spad = K.spad return X[:, :spad, :] -def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4): +def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4, pfx=''): setattr(K, 'spad',spad) - model.add_node(Lambda(get_H_n, output_shape=(N,)), name='h_n', input=inputs[1]) - model.add_node(Lambda(get_Y, output_shape=(N,spad)), name='Y', input=inputs[0]) - #model.add_node(SplitSequence(spad), name='Yp', input=inputs[0]) - model.add_node(Permute((2,1)), name="Yp", input='Y') - model.add_node(Dense(N,W_regularizer=l2(l2reg)),name='Wh_n', input='h_n') - model.add_node(RepeatVector(spad), name='Wh_n_cross_e', input='Wh_n') - model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name='WY', input='Yp') - model.summary() - model.add_node(Activation('tanh'), name='M', inputs=['Wh_n_cross_e', 'WY'], merge_mode='sum') - model.add_node(TimeDistributedDense(1,activation='linear'), name='alpha', input='M') - model.add_node(Lambda(get_R, output_shape=(N,1)), name='_r', inputs=['Yp','alpha'], merge_mode='join') - model.add_node(Permute((2,1)), name="_rp", input='_r') - model.add_node(Flatten(input_shape = (N,1)),name='r', input='_rp') - model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wr', input='r') - model.add_node(Dense(N,W_regularizer=l2(l2reg)), name='Wh', input='h_n') - outputs=['Wr','Wh'] + model.add_node(Lambda(get_last_time_dim, output_shape=(N,)), name=pfx+'h_n', input=inputs[1]) + model.add_node(Lambda(get_first_sentence, output_shape=(N,spad)), name=pfx+'Y', input=inputs[0]) + model.add_node(Permute((2,1)), name=pfx+"Yp", input=pfx+'Y') + model.add_node(Dense(N,W_regularizer=l2(l2reg)),name=pfx+'Wh_n', input=pfx+'h_n') + model.add_node(RepeatVector(spad), name=pfx+'Wh_n_cross_e', input=pfx+'Wh_n') + model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name=pfx+'WY', input=pfx+'Yp') + model.add_node(Activation('tanh'), name=pfx+'M', inputs=[pfx+'Wh_n_cross_e', pfx+'WY'], merge_mode='sum') + model.add_node(TimeDistributedDense(1,activation='linear'), name=pfx+'alpha', input=pfx+'M') + model.add_node(Lambda(get_R, output_shape=(N,1)), name=pfx+'_r', inputs=[pfx+'Yp',pfx+'alpha'], merge_mode='join') + model.add_node(Permute((2,1)), name=pfx+"_rp", input=pfx+'_r') + model.add_node(Flatten(input_shape = (N,1)),name=pfx+'r', input=pfx+'_rp') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wr', input=pfx+'r') + model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wh', input=pfx+'h_n') + outputs=[pfx+'Wr',pfx+'Wh'] return outputs def prep_model(model, N, s0pad, s1pad, c): model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear')) - B.rnn_input(model, N, s0pad, dropout=c['dropout'], dropoutfix_inp=c['dropoutfix_inp'], dropoutfix_rec=c['dropoutfix_rec'], + B.rnn_input(model, N, spad, dropout=c['dropout'], dropoutfix_inp=c['dropoutfix_inp'], dropoutfix_rec=c['dropoutfix_rec'], sdim=2, rnnbidi=True, return_sequences=True, rnn=GRU, rnnact='tanh', rnninit='glorot_uniform', rnnbidi_mode='sum', rnnlevels=1, inputs=['embmerge']) rnn_outputs=['e0s_', 'e1s_'] - #rnn_outputs=rnn_input(model,2*N,s0pad,'embmerge',c) - #rnn_outputs=['embmerge','embmerge'] - outputs = entailment_embedding(model, rnn_outputs,2*N,s0pad,c['l2reg']) - final_output = B.to_n_ptscorer(model, outputs, c['Ddim'], N, c['l2reg'], pfx="entail_out", output_dim=3) - return final_output + outputs = entailment_embedding(model, rnn_outputs,2*N,spad,c['l2reg']) + return outputs + From 526789c04133e458fd2a58ec178e9ca6e0b13dc8 Mon Sep 17 00:00:00 2001 From: Tomas Vyskocil Date: Fri, 7 Oct 2016 10:55:36 +0200 Subject: [PATCH 6/6] Changes according to PR comments: deleted simple_scorer --- models/entail1509.py | 3 --- pysts/kerasts/blocks.py | 23 ----------------------- tasks/rte.py | 11 ----------- 3 files changed, 37 deletions(-) diff --git a/models/entail1509.py b/models/entail1509.py index fa49338..de1286b 100644 --- a/models/entail1509.py +++ b/models/entail1509.py @@ -25,9 +25,6 @@ def config(c): c['rnnact'] = 'tanh' c['rnninit'] = 'glorot_uniform' c['sdim'] = 2 - c['ptscorer']=B.to_n_simple_ptscorer - - def get_last_time_dim(X): diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py index 99c31eb..3e1b8d6 100644 --- a/pysts/kerasts/blocks.py +++ b/pysts/kerasts/blocks.py @@ -151,29 +151,6 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0, func(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'], layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)), **rnndrop_args) -def prep_to_n_kwargs(inputs, extra_inp): - kwargs = {} - inputs = list(inputs) - if len(inputs)+len(extra_inp)==1: - if len(inputs)>len(extra_inp): - kwargs['input']=inputs - else: - kwargs['input']=extra_inp[0] - else: - kwargs['inputs']=inputs+extra_inp - kwargs['merge_mode']='sum' - return kwargs - -def to_n_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1): - kwargs = prep_to_n_kwargs(inputs, extra_inp) - model.add_node(Activation('tanh'), name=pfx+'to_n_sum', **kwargs) - model.add_node(Dense(output_dim=output_dim,activation='softmax',W_regularizer=l2(l2reg)), name=pfx+'to_n_out', input=pfx+'to_n_sum') - return pfx+"to_n_out" - -def to_n_simple_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[], output_dim=1): - kwargs = prep_to_n_kwargs(inputs, extra_inp) - model.add_node(layer=Dense(output_dim=output_dim,activation='linear',W_regularizer=l2(l2reg)), name=pfx+"_to_n_out", **kwargs) - return pfx+'_to_n_out' def add_multi_node(model, name, inputs, outputs, layer_class, layer_args, siamese=True, **kwargs): diff --git a/tasks/rte.py b/tasks/rte.py index 91bc14f..e7c85cc 100644 --- a/tasks/rte.py +++ b/tasks/rte.py @@ -62,17 +62,6 @@ def prep_model(self, module_prep_model): # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) - # Measurement - - if self.c['ptscorer'] == '1': - # special scoring mode just based on the answer - # (assuming that the question match is carried over to the answer - # via attention or another mechanism) - ptscorer = B.cat_ptscorer - final_outputs = [final_outputs[1]] - else: - ptscorer = self.c['ptscorer'] - kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum']