diff --git a/bert.cpp b/bert.cpp index 2e033ba..f04c6c5 100644 --- a/bert.cpp +++ b/bert.cpp @@ -23,6 +23,7 @@ struct bert_hparams int32_t n_intermediate = 1536; int32_t n_head = 12; int32_t n_layer = 6; + int32_t n_vocab_size = 2; int32_t f16 = 1; }; @@ -364,6 +365,7 @@ struct bert_ctx * bert_load_from_file(const char *fname) fin.read((char *)&hparams.n_intermediate, sizeof(hparams.n_intermediate)); fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.n_vocab_size, sizeof(hparams.n_vocab_size)); fin.read((char *)&hparams.f16, sizeof(hparams.f16)); printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); @@ -372,6 +374,7 @@ struct bert_ctx * bert_load_from_file(const char *fname) printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); + printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size); printf("%s: f16 = %d\n", __func__, hparams.f16); } @@ -489,11 +492,13 @@ struct bert_ctx * bert_load_from_file(const char *fname) const int n_intermediate = hparams.n_intermediate; const int n_max_tokens = hparams.n_max_tokens; const int n_vocab = hparams.n_vocab; + const int n_vocab_size = hparams.n_vocab_size; + model.layers.resize(n_layer); model.word_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); - model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, 2); + model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab_size); model.position_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_max_tokens); model.ln_e_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); diff --git a/models/convert-to-ggml.py b/models/convert-to-ggml.py index 7ef5b80..c05ee25 100644 --- a/models/convert-to-ggml.py +++ b/models/convert-to-ggml.py @@ -61,6 +61,7 @@ fout.write(struct.pack("i", hparams["intermediate_size"])) fout.write(struct.pack("i", hparams["num_attention_heads"])) fout.write(struct.pack("i", hparams["num_hidden_layers"])) +fout.write(struct.pack("i", hparams["type_vocab_size"])) fout.write(struct.pack("i", ftype)) for i in range(hparams["vocab_size"]): diff --git a/models/quantize.cpp b/models/quantize.cpp index 22411a1..83f2fd2 100644 --- a/models/quantize.cpp +++ b/models/quantize.cpp @@ -20,6 +20,7 @@ struct bert_hparams int32_t n_intermediate = 1536; int32_t n_head = 12; int32_t n_layer = 6; + int32_t n_vocab_size = 2; int32_t f16 = 1; }; @@ -74,6 +75,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam finp.read((char *) &hparams.n_intermediate, sizeof(hparams.n_intermediate)); finp.read((char *) &hparams.n_head, sizeof(hparams.n_head)); finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); + finp.read((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size)); finp.read((char *) &hparams.f16, sizeof(hparams.f16)); printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); @@ -82,6 +84,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate); printf("%s: n_head = %d\n", __func__, hparams.n_head); printf("%s: n_layer = %d\n", __func__, hparams.n_layer); + printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size); printf("%s: f16 = %d\n", __func__, hparams.f16); fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); @@ -90,6 +93,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam fout.write((char *) &hparams.n_intermediate, sizeof(hparams.n_intermediate)); fout.write((char *) &hparams.n_head, sizeof(hparams.n_head)); fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer)); + fout.write((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size)); fout.write((char *) &itype, sizeof(hparams.f16)); }