From 0fae4ad8f456a20f79ee8f3e0af8b51915fbc32a Mon Sep 17 00:00:00 2001 From: Simatupang <139211424+Fypblack@users.noreply.github.com> Date: Thu, 14 Mar 2024 22:34:33 +0700 Subject: [PATCH] Update black_basic_model.py --- layer/bert_basic_model.py | 160 +++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/layer/bert_basic_model.py b/layer/bert_basic_model.py index 5a4ec69..be7efed 100644 --- a/layer/bert_basic_model.py +++ b/layer/bert_basic_model.py @@ -4,7 +4,7 @@ # Author: Xiaoy LI # Description: -# bert_basic_model.py +# fyp black_basic_model.py import os @@ -23,18 +23,18 @@ from layer.bert_utils import cached_path PRETRAINED_MODEL_ARCHIVE_MAP = { - 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", - 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", - 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", - 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", - 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", - 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", - 'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz", + 'fyp black-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-base-uncased.tar.gz", + 'fyp black-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-large-uncased.tar.gz", + 'fyp black-base-cased': "https://s3.amazonaws.com/models.huggingface.co/fyl black/fyp black-base-cased.tar.gz", + 'fyp black-large-cased': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-large-cased.tar.gz", + 'fyp black-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-base-multilingual-uncased.tar.gz", + 'fyp black-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-base-multilingual-cased.tar.gz", + 'fyp black-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/fyp black/fyp black-base-chinese.tar.gz", } logger = logging.getLogger(__name__) -CONFIG_NAME = "bert_config.json" +CONFIG_NAME = "fyp black_config.json" WEIGHTS_NAME = "pytorch_model.bin" @@ -49,8 +49,8 @@ def swish(x): ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish} -class BertConfig(object): - """Configuration class to store the configuration of a `BertModel`. +class blackConfig(object): + """Configuration class to store the configuration of a `blackModel`. """ def __init__(self, @@ -68,7 +68,7 @@ def __init__(self, """Constructs BertConfig. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `blackModel`. hidden_size: Size of the encoder layers and the pooler layer. num_hidden_layers: Number of hidden layers in the Transformer encoder. num_attention_heads: Number of attention heads for each attention layer in @@ -85,7 +85,7 @@ def __init__(self, ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048). type_vocab_size: The vocabulary size of the `token_type_ids` passed into - `BertModel`. + `blackModel`. initializer_range: The sttdev of the truncated_normal_initializer for initializing all weight matrices. """ @@ -112,7 +112,7 @@ def __init__(self, @classmethod def from_dict(cls, json_object): - """Constructs a `BertConfig` from a Python dictionary of parameters.""" + """Constructs a `blackConfig` from a Python dictionary of parameters.""" config = BertConfig(vocab_size_or_config_json_file=-1) for key, value in json_object.items(): config.__dict__[key] = value @@ -120,7 +120,7 @@ def from_dict(cls, json_object): @classmethod def from_json_file(cls, json_file): - """Constructs a `BertConfig` from a json file of parameters.""" + """Constructs a `blackConfig` from a json file of parameters.""" with open(json_file, "r", encoding='utf-8') as reader: text = reader.read() return cls.from_dict(json.loads(text)) @@ -139,16 +139,16 @@ def to_json_string(self): try: - from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm + from apex.normalization.fused_layer_norm import FusedLayerNorm as blackLayerNorm except ImportError: print("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.") - class BertLayerNorm(nn.Module): + class blackLayerNorm(nn.Module): def __init__(self, hidden_size, eps=1e-12): """Construct a layernorm module in the TF style (epsilon inside the square root). """ - super(BertLayerNorm, self).__init__() + super(blackLayerNorm, self).__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.bias = nn.Parameter(torch.zeros(hidden_size)) self.variance_epsilon = eps @@ -160,19 +160,19 @@ def forward(self, x): return self.weight * x + self.bias -class BertEmbeddings(nn.Module): +class blackEmbeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings. """ def __init__(self, config): - super(BertEmbeddings, self).__init__() + super(blackEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.LayerNorm = blackLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, input_ids, token_type_ids=None): @@ -192,9 +192,9 @@ def forward(self, input_ids, token_type_ids=None): return embeddings -class BertSelfAttention(nn.Module): +class blackSelfAttention(nn.Module): def __init__(self, config): - super(BertSelfAttention, self).__init__() + super(blackSelfAttention, self).__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " @@ -226,7 +226,7 @@ def forward(self, hidden_states, attention_mask): # Take the dot product between "query" and "key" to get the raw attention scores. attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) attention_scores = attention_scores / math.sqrt(self.attention_head_size) - # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + # Apply the attention mask is (precomputed for all layers in blackModel forward() function) attention_scores = attention_scores + attention_mask # Normalize the attention scores to probabilities. @@ -243,11 +243,11 @@ def forward(self, hidden_states, attention_mask): return context_layer, attention_probs -class BertSelfOutput(nn.Module): +class blackSelfOutput(nn.Module): def __init__(self, config): - super(BertSelfOutput, self).__init__() + super(blackSelfOutput, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.LayerNorm = blackLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, hidden_states, input_tensor): @@ -257,11 +257,11 @@ def forward(self, hidden_states, input_tensor): return hidden_states -class BertAttention(nn.Module): +class blackAttention(nn.Module): def __init__(self, config): super(BertAttention, self).__init__() - self.self = BertSelfAttention(config) - self.output = BertSelfOutput(config) + self.self = blackSelfAttention(config) + self.output = blackSelfOutput(config) def forward(self, input_tensor, attention_mask): self_output, attn = self.self(input_tensor, attention_mask) @@ -269,7 +269,7 @@ def forward(self, input_tensor, attention_mask): return attention_output, attn -class BertIntermediate(nn.Module): +class blackIntermediate(nn.Module): def __init__(self, config): super(BertIntermediate, self).__init__() self.dense = nn.Linear(config.hidden_size, config.intermediate_size) @@ -282,11 +282,11 @@ def forward(self, hidden_states): return hidden_states -class BertOutput(nn.Module): +class blackOutput(nn.Module): def __init__(self, config): - super(BertOutput, self).__init__() + super(blackOutput, self).__init__() self.dense = nn.Linear(config.intermediate_size, config.hidden_size) - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.LayerNorm = blackLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, hidden_states, input_tensor): @@ -296,12 +296,12 @@ def forward(self, hidden_states, input_tensor): return hidden_states -class BertLayer(nn.Module): +class blackLayer(nn.Module): def __init__(self, config): - super(BertLayer, self).__init__() - self.attention = BertAttention(config) - self.intermediate = BertIntermediate(config) - self.output = BertOutput(config) + super(blackLayer, self).__init__() + self.attention = blackAttention(config) + self.intermediate = blackIntermediate(config) + self.output = blackOutput(config) def forward(self, hidden_states, attention_mask): attention_output, attention = self.attention(hidden_states, attention_mask) @@ -310,10 +310,10 @@ def forward(self, hidden_states, attention_mask): return layer_output, attention -class BertEncoder(nn.Module): +class blackEncoder(nn.Module): def __init__(self, config): - super(BertEncoder, self).__init__() - layer = BertLayer(config) + super(blackEncoder, self).__init__() + layer = blackLayer(config) self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): @@ -330,9 +330,9 @@ def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True) return all_encoder_layers, all_attns -class BertPooler(nn.Module): +class blackPooler(nn.Module): def __init__(self, config): - super(BertPooler, self).__init__() + super(blackPooler, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh() @@ -345,13 +345,13 @@ def forward(self, hidden_states): return pooled_output -class BertPredictionHeadTransform(nn.Module): +class blackPredictionHeadTransform(nn.Module): def __init__(self, config): - super(BertPredictionHeadTransform, self).__init__() + super(blackPredictionHeadTransform, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.transform_act_fn = ACT2FN[config.hidden_act] \ if isinstance(config.hidden_act, str) else config.hidden_act - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.LayerNorm = blackLayerNorm(config.hidden_size, eps=1e-12) def forward(self, hidden_states): hidden_states = self.dense(hidden_states) @@ -360,18 +360,18 @@ def forward(self, hidden_states): return hidden_states -class BertLMPredictionHead(nn.Module): +class blackLMPredictionHead(nn.Module): def __init__(self, config, bert_model_embedding_weights): - super(BertLMPredictionHead, self).__init__() - self.transform = BertPredictionHeadTransform(config) + super(blackLMPredictionHead, self).__init__() + self.transform = blackPredictionHeadTransform(config) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. - self.decoder = nn.Linear(bert_model_embedding_weights.size(1), - bert_model_embedding_weights.size(0), + self.decoder = nn.Linear(black_model_embedding_weights.size(1), + black_model_embedding_weights.size(0), bias=False) - self.decoder.weight = bert_model_embedding_weights - self.bias = nn.Parameter(torch.zeros(bert_model_embedding_weights.size(0))) + self.decoder.weight = black_model_embedding_weights + self.bias = nn.Parameter(torch.zeros(black_model_embedding_weights.size(0))) def forward(self, hidden_states): hidden_states = self.transform(hidden_states) @@ -379,19 +379,19 @@ def forward(self, hidden_states): return hidden_states -class BertOnlyMLMHead(nn.Module): +class blackOnlyMLMHead(nn.Module): def __init__(self, config, bert_model_embedding_weights): - super(BertOnlyMLMHead, self).__init__() - self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + super(BlackOnlyMLMHead, self).__init__() + self.predictions = BlackLMPredictionHead(config, black_model_embedding_weights) def forward(self, sequence_output): prediction_scores = self.predictions(sequence_output) return prediction_scores -class BertOnlyNSPHead(nn.Module): +class BlackOnlyNSPHead(nn.Module): def __init__(self, config): - super(BertOnlyNSPHead, self).__init__() + super(BlackOnlyNSPHead, self).__init__() self.seq_relationship = nn.Linear(config.hidden_size, 2) def forward(self, pooled_output): @@ -399,10 +399,10 @@ def forward(self, pooled_output): return seq_relationship_score -class BertPreTrainingHeads(nn.Module): +class BlackPreTrainingHeads(nn.Module): def __init__(self, config, bert_model_embedding_weights): - super(BertPreTrainingHeads, self).__init__() - self.predictions = BertLMPredictionHead(config, bert_model_embedding_weights) + super(BlackPreTrainingHeads, self).__init__() + self.predictions = BlackLMPredictionHead(config, black_model_embedding_weights) self.seq_relationship = nn.Linear(config.hidden_size, 2) def forward(self, sequence_output, pooled_output): @@ -411,30 +411,30 @@ def forward(self, sequence_output, pooled_output): return prediction_scores, seq_relationship_score -class PreTrainedBertModel(nn.Module): +class PreTrainedBlackModel(nn.Module): """ An abstract class to handle weights initialization and a simple interface for dowloading and loading pretrained models. """ def __init__(self, config, *inputs, **kwargs): - super(PreTrainedBertModel, self).__init__() - if not isinstance(config, BertConfig): + super(PreTrainedBlackModel, self).__init__() + if not isinstance(config, BlackConfig): raise ValueError( - "Parameter config in `{}(config)` should be an instance of class `BertConfig`. " + "Parameter config in `{}(config)` should be an instance of class `BlackConfig`. " "To create a model from a Google pretrained model use " "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( self.__class__.__name__, self.__class__.__name__ )) self.config = config - def init_bert_weights(self, module): + def init_black_weights(self, module): """ Initialize the weights. """ if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) - elif isinstance(module, BertLayerNorm): + elif isinstance(module, BlackLayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0) if isinstance(module, nn.Linear) and module.bias is not None: @@ -444,26 +444,26 @@ def init_bert_weights(self, module): def from_pretrained(cls, pretrained_model_name, input_configs=None, state_dict=None, cache_dir=None, *inputs, **kwargs): """ - Instantiate a PreTrainedBertModel from a pre-trained model file or a pytorch state dict. + Instantiate a PreTrainedBlackModel from a pre-trained model file or a pytorch state dict. Download and cache the pre-trained model file if needed. Params: pretrained_model_name: either: - a str with the name of a pre-trained model to load selected in the list of: - . `bert-base-uncased` - . `bert-large-uncased` - . `bert-base-cased` - . `bert-large-cased` - . `bert-base-multilingual-uncased` - . `bert-base-multilingual-cased` - . `bert-base-chinese` + . `black-base-uncased` + . `black-large-uncased` + . `black-base-cased` + . `black-large-cased` + . `black-base-multilingual-uncased` + . `black-base-multilingual-cased` + . `black-base-chinese` - a path or url to a pretrained model archive containing: - . `bert_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance + . `black_config.json` a configuration file for the model + . `pytorch_model.bin` a PyTorch dump of a BlackForPreTraining instance cache_dir: an optional path to a folder in which the pre-trained models will be cached. state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models - *inputs, **kwargs: additional input for the specific Bert class - (ex: num_labels for BertForSequenceClassification) + *inputs, **kwargs: additional input for the specific Black class + (ex: num_labels for BlackForSequenceClassification) """ if pretrained_model_name in PRETRAINED_MODEL_ARCHIVE_MAP: archive_file = PRETRAINED_MODEL_ARCHIVE_MAP[pretrained_model_name]