PaddlePaddle · guoshengCS · Apr 30, 2020 · Apr 30, 2020 · May 7, 2020 · May 9, 2020
diff --git a/examples/sentiment_classification/models.py b/examples/sentiment_classification/models.py
@@ -16,12 +16,13 @@
 from paddle.fluid.dygraph.base import to_variable
 import numpy as np
 from hapi.model import Model
-from hapi.text.text import GRUEncoderLayer as BiGRUEncoder
-from hapi.text.test import BOWEncoder, CNNEncoder, GRUEncoder
+from hapi.text.text import _GRUEncoder as GRUEncoder
+from hapi.text.text import _GRUEncoder as BiGRUEncoder
+from hapi.text.test import BOWEncoder, CNNEncoder
 
 
 class CNN(Model):
-    def __init__(self,  dict_dim, batch_size, seq_len):
+    def __init__(self, dict_dim, batch_size, seq_len):
         super(CNN, self).__init__()
         self.dict_dim = dict_dim
         self.emb_dim = 128
@@ -36,15 +37,19 @@ def __init__(self,  dict_dim, batch_size, seq_len):
             dict_size=self.dict_dim + 1,
             emb_dim=self.emb_dim,
             seq_len=self.seq_len,
-            filter_size= self.win_size,
-            num_filters= self.hid_dim,
-            hidden_dim= self.hid_dim,
+            filter_size=self.win_size,
+            num_filters=self.hid_dim,
+            hidden_dim=self.hid_dim,
             padding_idx=None,
             act='tanh')
-        self._fc1 = Linear(input_dim = self.hid_dim*self.seq_len, output_dim=self.fc_hid_dim, act="softmax")
-        self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
-                                 output_dim = self.class_dim,
-                                 act="softmax")
+        self._fc1 = Linear(
+            input_dim=self.hid_dim * self.seq_len,
+            output_dim=self.fc_hid_dim,
+            act="softmax")
+        self._fc_prediction = Linear(
+            input_dim=self.fc_hid_dim,
+            output_dim=self.class_dim,
+            act="softmax")
 
     def forward(self, inputs):
         conv_3 = self._encoder(inputs)
@@ -69,11 +74,14 @@ def __init__(self, dict_dim, batch_size, seq_len):
             padding_idx=None,
             bow_dim=self.hid_dim,
             seq_len=self.seq_len)
-        self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim, act="tanh")
-        self._fc2 = Linear(input_dim = self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
-        self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
-                                 output_dim = self.class_dim,
-                                 act="softmax")
+        self._fc1 = Linear(
+            input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh")
+        self._fc2 = Linear(
+            input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
+        self._fc_prediction = Linear(
+            input_dim=self.fc_hid_dim,
+            output_dim=self.class_dim,
+            act="softmax")
 
     def forward(self, inputs):
         bow_1 = self._encoder(inputs)
@@ -94,10 +102,12 @@ def __init__(self, dict_dim, batch_size, seq_len):
         self.class_dim = 2
         self.batch_size = batch_size
         self.seq_len = seq_len
-        self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
-        self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
-                                 output_dim=self.class_dim,
-                                 act="softmax")
+        self._fc1 = Linear(
+            input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
+        self._fc_prediction = Linear(
+            input_dim=self.fc_hid_dim,
+            output_dim=self.class_dim,
+            act="softmax")
         self._encoder = GRUEncoder(
             dict_size=self.dict_dim + 1,
             emb_dim=self.emb_dim,
@@ -112,7 +122,7 @@ def forward(self, inputs):
         prediction = self._fc_prediction(fc_1)
         return prediction
 
-        
+
 class BiGRU(Model):
     def __init__(self, dict_dim, batch_size, seq_len):
         super(BiGRU, self).__init__()
@@ -130,11 +140,13 @@ def __init__(self, dict_dim, batch_size, seq_len):
             is_sparse=False)
         h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
         h_0 = to_variable(h_0)
-        self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim*3)
-        self._fc2 = Linear(input_dim = self.hid_dim*2, output_dim=self.fc_hid_dim, act="tanh")
-        self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
-                                 output_dim=self.class_dim,
-                                 act="softmax")
+        self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
+        self._fc2 = Linear(
+            input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh")
+        self._fc_prediction = Linear(
+            input_dim=self.fc_hid_dim,
+            output_dim=self.class_dim,
+            act="softmax")
         self._encoder = BiGRUEncoder(
             grnn_hidden_dim=self.hid_dim,
             input_dim=self.hid_dim * 3,
@@ -144,7 +156,8 @@ def __init__(self, dict_dim, batch_size, seq_len):
 
     def forward(self, inputs):
         emb = self.embedding(inputs)
-        emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
+        emb = fluid.layers.reshape(
+            emb, shape=[self.batch_size, -1, self.hid_dim])
         fc_1 = self._fc1(emb)
         encoded_vector = self._encoder(fc_1)
         encoded_vector = fluid.layers.tanh(encoded_vector)

diff --git a/examples/transformer/transformer.py b/examples/transformer/transformer.py
@@ -18,7 +18,7 @@
 
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
+from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
 from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
 from hapi.model import Model, CrossEntropy, Loss
 from hapi.text import TransformerBeamSearchDecoder, DynamicDecode
@@ -606,17 +606,18 @@ def forward(self, src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
         return predict
 
 
-class TransfomerCell(object):
+class TransformerCell(Layer):
     """
     Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
     used as RNNCell
     """
 
     def __init__(self, decoder):
+        super(TransformerCell, self).__init__()
         self.decoder = decoder
 
-    def __call__(self, inputs, states, trg_src_attn_bias, enc_output,
-                 static_caches):
+    def forward(self, inputs, states, trg_src_attn_bias, enc_output,
+                static_caches):
         trg_word, trg_pos = inputs
         for cache, static_cache in zip(states, static_caches):
             cache.update(static_cache)
@@ -657,7 +658,7 @@ def __init__(self,
         self.beam_size = args.pop("beam_size")
         self.max_out_len = args.pop("max_out_len")
         super(InferTransformer, self).__init__(**args)
-        cell = TransfomerCell(self.decoder)
+        cell = TransformerCell(self.decoder)
         self.beam_search_decoder = DynamicDecode(
             TransformerBeamSearchDecoder(
                 cell, bos_id, eos_id, beam_size, var_dim_in_state=2),

diff --git a/hapi/model.py b/hapi/model.py
@@ -38,7 +38,7 @@
 from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
 from hapi.metrics import Metric
 from hapi.callbacks import config_callbacks
-from hapi.utils import to_list, to_numpy, flatten_list, restore_flatten_list
+from hapi.utils import to_list, to_numpy, flatten_list, restore_flatten_list, extract_args
 
 __all__ = [
     'Model',
@@ -495,14 +495,15 @@ def train_batch(self, inputs, labels=None):
         if labels is not None:
             labels = [to_variable(l) for l in to_list(labels)]
         if self._nranks > 1:
-            outputs = self.ddp_model.forward(*[to_variable(x) for x in inputs])
+            outputs = self.ddp_model.forward(
+                * [to_variable(x) for x in inputs])
             losses = self.model._loss_function(outputs, labels)
             final_loss = fluid.layers.sum(losses)
             final_loss = self.ddp_model.scale_loss(final_loss)
             final_loss.backward()
             self.ddp_model.apply_collective_grads()
         else:
-            outputs = self.model.forward(*[to_variable(x) for x in inputs])
+            outputs = self.model.forward(* [to_variable(x) for x in inputs])
             losses = self.model._loss_function(outputs, labels)
             final_loss = fluid.layers.sum(losses)
             final_loss.backward()
@@ -511,9 +512,9 @@ def train_batch(self, inputs, labels=None):
         self.model.clear_gradients()
         metrics = []
         for metric in self.model._metrics:
-            metric_outs = metric.add_metric_op(*(
-                to_list(outputs) + to_list(labels)))
-            m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
+            metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
+                labels)))
+            m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
             metrics.append(m)
 
         return ([to_numpy(l) for l in losses], metrics) \
@@ -525,7 +526,7 @@ def eval_batch(self, inputs, labels=None):
         inputs = to_list(inputs)
         if labels is not None:
             labels = [to_variable(l) for l in to_list(labels)]
-        outputs = self.model.forward(*[to_variable(x) for x in inputs])
+        outputs = self.model.forward(* [to_variable(x) for x in inputs])
         if self.model._loss_function:
             losses = self.model._loss_function(outputs, labels)
         else:
@@ -551,9 +552,9 @@ def eval_batch(self, inputs, labels=None):
                     self._merge_count[self.mode + '_total'] += samples
                     self._merge_count[self.mode + '_batch'] = samples
 
-            metric_outs = metric.add_metric_op(*(
-                to_list(outputs) + to_list(labels)))
-            m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
+            metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
+                labels)))
+            m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
             metrics.append(m)
 
         # To be consistent with static graph