Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions models/entail1509.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
A model that is similar to the one from
Rocktaschel et al. "Reasoning about entailment with neural attention."
approaches 2015-state-of-art results on the anssel-wang task (with
token flags).

The implementation is inspired by https://github.com/shyamupa/snli-entailment/blob/master/amodel.py


"""

from keras.layers.core import Layer
from keras.layers import GRU, Dropout, Lambda, Dense, RepeatVector, TimeDistributedDense, Activation, Reshape, Permute, Flatten
from keras.regularizers import l2
from keras import backend as K
import pysts.kerasts.blocks as B

spad=60

def config(c):
c['dropout'] = 4/5
c['dropoutfix_inp'] = 0
c['dropoutfix_rec'] = 0
c['l2reg'] = 1e-4
c['rnnact'] = 'tanh'
c['rnninit'] = 'glorot_uniform'
c['sdim'] = 2


def get_last_time_dim(X):
ans=X[:, -1, :]
return ans

def get_R(X):
Y, alpha = X.values()
ans=K.T.batched_dot(Y,alpha)
return ans

def get_first_sentence(X):
spad = K.spad
return X[:, :spad, :]

def entailment_embedding(model, inputs,N=608, spad=60, l2reg=1e-4, pfx=''):
setattr(K, 'spad',spad)
model.add_node(Lambda(get_last_time_dim, output_shape=(N,)), name=pfx+'h_n', input=inputs[1])
model.add_node(Lambda(get_first_sentence, output_shape=(N,spad)), name=pfx+'Y', input=inputs[0])
model.add_node(Permute((2,1)), name=pfx+"Yp", input=pfx+'Y')
model.add_node(Dense(N,W_regularizer=l2(l2reg)),name=pfx+'Wh_n', input=pfx+'h_n')
model.add_node(RepeatVector(spad), name=pfx+'Wh_n_cross_e', input=pfx+'Wh_n')
model.add_node(TimeDistributedDense(N,W_regularizer=l2(l2reg)), name=pfx+'WY', input=pfx+'Yp')
model.add_node(Activation('tanh'), name=pfx+'M', inputs=[pfx+'Wh_n_cross_e', pfx+'WY'], merge_mode='sum')
model.add_node(TimeDistributedDense(1,activation='linear'), name=pfx+'alpha', input=pfx+'M')
model.add_node(Lambda(get_R, output_shape=(N,1)), name=pfx+'_r', inputs=[pfx+'Yp',pfx+'alpha'], merge_mode='join')
model.add_node(Permute((2,1)), name=pfx+"_rp", input=pfx+'_r')
model.add_node(Flatten(input_shape = (N,1)),name=pfx+'r', input=pfx+'_rp')
model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wr', input=pfx+'r')
model.add_node(Dense(N,W_regularizer=l2(l2reg)), name=pfx+'Wh', input=pfx+'h_n')
outputs=[pfx+'Wr',pfx+'Wh']
return outputs


def prep_model(model, N, s0pad, s1pad, c):
model.add_node(name="embmerge", inputs=['e0','e1'], merge_mode='concat', layer=Activation('linear'))
B.rnn_input(model, N, spad, dropout=c['dropout'], dropoutfix_inp=c['dropoutfix_inp'], dropoutfix_rec=c['dropoutfix_rec'],
sdim=2, rnnbidi=True, return_sequences=True,
rnn=GRU, rnnact='tanh', rnninit='glorot_uniform', rnnbidi_mode='sum',
rnnlevels=1,
inputs=['embmerge'])
rnn_outputs=['e0s_', 'e1s_']
outputs = entailment_embedding(model, rnn_outputs,2*N,spad,c['l2reg'])
return outputs

70 changes: 52 additions & 18 deletions pysts/kerasts/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,35 +87,69 @@ def rnn_input(model, N, spad, dropout=3/4, dropoutfix_inp=0, dropoutfix_rec=0,
rnn_input(model, N, spad, dropout=0, sdim=sdim, rnnbidi=rnnbidi, return_sequences=True,
rnn=rnn, rnnact=rnnact, rnninit=rnninit, rnnbidi_mode=rnnbidi_mode,
rnnlevels=1, inputs=deep_inputs, pfx=pfx+'L%d'%(i,))
model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear'))
model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear'))
deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)]

if len(deep_inputs)>1 or rnnbidi:
model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%de0s_'%(i,)], merge_mode='concat', layer=Activation('linear'))
model.add_node(name=pfx+'L%de1s_j'%(i,), inputs=[inputs[1], pfx+'L%de1s_'%(i,)], merge_mode='concat', layer=Activation('linear'))
deep_inputs = ['L%de0s_j'%(i,), 'L%de1s_j'%(i,)]
else:
model.add_node(name=pfx+'L%de0s_j'%(i,), inputs=[inputs[0], pfx+'L%d'%(i,)], merge_mode='concat', layer=Activation('linear'))

if rnnbidi:
if rnnbidi_mode == 'concat':
sdim /= 2
model.add_shared_node(name=pfx+'rnnf', inputs=deep_inputs, outputs=[pfx+'e0sf', pfx+'e1sf'],
layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
rnnf_args={}
rnnb_args={}
rnnfa_args={}
rnnba_args={}
func = model.add_shared_node
if len(deep_inputs)>1:
rnnf_args['inputs']=deep_inputs
rnnb_args['inputs']=deep_inputs
rnnf_args['outputs']= [pfx+'e%dsf'%(i) for i in range(len(deep_inputs))]
rnnb_args['outputs']= [pfx+'e%dsb'%(i) for i in range(len(deep_inputs))]
rnnfa_args['merge_mode']=rnnbidi_mode
rnnba_args['merge_mode']=rnnbidi_mode
rnnfa_args['inputs']=[pfx+'e%dsf'%(i) for i in range(len(deep_inputs))]
rnnba_args['inputs']=[pfx+'e%dsb'%(i) for i in range(len(deep_inputs))]
else:
func = model.add_node
rnnf_args['input']=deep_inputs[0]
rnnb_args['input']=deep_inputs[0]
rnnfa_args['input']=pfx+'rnnf'
rnnba_args['input']=pfx+'rnnb'

func(name=pfx+'rnnf', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
init=rnninit, activation=rnnact,
return_sequences=return_sequences,
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec))
model.add_shared_node(name=pfx+'rnnb', inputs=deep_inputs, outputs=[pfx+'e0sb', pfx+'e1sb'],
layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnf_args)
func(name=pfx+'rnnb', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
init=rnninit, activation=rnnact,
return_sequences=return_sequences, go_backwards=True,
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec))
model.add_node(name=pfx+'e0s', inputs=[pfx+'e0sf', pfx+'e0sb'], merge_mode=rnnbidi_mode, layer=Activation('linear'))
model.add_node(name=pfx+'e1s', inputs=[pfx+'e1sf', pfx+'e1sb'], merge_mode=rnnbidi_mode, layer=Activation('linear'))
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnnb_args)
model.add_node(name=pfx+'e0s', layer=Activation('linear'), **rnnfa_args)
model.add_node(name=pfx+'e1s', layer=Activation('linear'), **rnnba_args)

model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'],
layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)))
else:
model.add_shared_node(name=pfx+'rnn', inputs=deep_inputs, outputs=[pfx+'e0s', pfx+'e1s'],
layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
rnns_args = {}
rnndrop_args = {}
if len(deep_inputs)>1:
rnns_args['inputs']=deep_inputs
rnns_args['outputs']=[pfx+'e0s', pfx+'e1s']
rnndrop_args['inputs'] = rnns_args['outputs']
rnndrop_args[outputs] = [s+'_' for s in rnndrop_args['inputs']]
func=model.add_shared_node
else:
rnns_args['input']=deep_inputs[0]
rnndrop_args['input'] = pfx+'rnn'
func=model.add_node
func(name=pfx+'rnn', layer=rnn(input_dim=N, output_dim=int(N*sdim), input_length=spad,
init=rnninit, activation=rnnact,
return_sequences=return_sequences,
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec))

model.add_shared_node(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'],
layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)))
dropout_W=dropoutfix_inp, dropout_U=dropoutfix_rec), **rnns_args)
func(name=pfx+'rnndrop', inputs=[pfx+'e0s', pfx+'e1s'], outputs=[pfx+'e0s_', pfx+'e1s_'],
layer=Dropout(dropout, input_shape=(spad, int(N*sdim)) if return_sequences else (int(N*sdim),)), **rnndrop_args)


def add_multi_node(model, name, inputs, outputs, layer_class,
Expand Down
17 changes: 3 additions & 14 deletions tasks/rte.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,28 +62,17 @@ def prep_model(self, module_prep_model):
# Sentence-aggregate embeddings
final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c)

# Measurement

if self.c['ptscorer'] == '1':
# special scoring mode just based on the answer
# (assuming that the question match is carried over to the answer
# via attention or another mechanism)
ptscorer = B.cat_ptscorer
final_outputs = [final_outputs[1]]
else:
ptscorer = self.c['ptscorer']

kwargs = dict()
if ptscorer == B.mlp_ptscorer:
kwargs['sum_mode'] = self.c['mlpsum']
kwargs['Dinit'] = self.c['Dinit']

model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))
model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))
model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs),
layer=Activation('sigmoid'))
layer=Activation('linear'))

model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax'))
model.add_output(name='score', input='scoreV')
Expand Down