From 1342ef60836bd480995501df2be2d3a5c5750ba2 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Fri, 15 Sep 2023 00:38:40 -0700 Subject: [PATCH] commit --- .vscode/settings.json | 28 +++++++++++++++++-- .../models/llama/LlamaDecoderLayerWeight.cc | 2 ++ .../models/llama/LlamaWeight.cc | 2 ++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 6f535da99..6df8277d0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -67,6 +67,30 @@ "unordered_set": "cpp", "future": "cpp", "cfenv": "cpp", - "typeindex": "cpp" + "typeindex": "cpp", + "__bit_reference": "cpp", + "__bits": "cpp", + "__config": "cpp", + "__debug": "cpp", + "__errc": "cpp", + "__hash_table": "cpp", + "__locale": "cpp", + "__mutex_base": "cpp", + "__node_handle": "cpp", + "__split_buffer": "cpp", + "__threading_support": "cpp", + "__tree": "cpp", + "__tuple": "cpp", + "__verbose_abort": "cpp", + "bit": "cpp", + "ios": "cpp", + "locale": "cpp", + "queue": "cpp", + "stack": "cpp", + "variant": "cpp", + "__nullptr": "cpp", + "__string": "cpp", + "compare": "cpp", + "concepts": "cpp" } -} \ No newline at end of file +} diff --git a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc b/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc index 34ad480cf..0850303dc 100644 --- a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc +++ b/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc @@ -193,6 +193,7 @@ void LlamaDecoderLayerWeight::loadModel(std::string dir_path, FtCudaDataType { FT_CHECK(is_maintain_buffer == true); const std::string rank_spec = std::to_string(tensor_para_rank_); + FT_LOG_INFO("loading llama model weight rank %s from %s", rank_spec, dir_path); // fill all bias to zeros deviceFill(weights_ptr[0], (size_t)hidden_units_, (T)0.0); @@ -285,6 +286,7 @@ void LlamaDecoderLayerWeight::loadModel(std::string dir_path, FtCudaDataType model_file_type); } + FT_LOG_INFO("finished loading llama model weight rank %s from %s", rank_spec, dir_path); } template diff --git a/src/fastertransformer/models/llama/LlamaWeight.cc b/src/fastertransformer/models/llama/LlamaWeight.cc index a1bda4053..92f7354ed 100644 --- a/src/fastertransformer/models/llama/LlamaWeight.cc +++ b/src/fastertransformer/models/llama/LlamaWeight.cc @@ -254,6 +254,7 @@ void LlamaWeight::loadModel(std::string dir_path) { FtCudaDataType model_file_type = getModelFileType(dir_path + "/config.ini", "llama"); FT_CHECK(is_maintain_buffer == true); + FT_LOG_INFO("loading llama model from %s", dir_path); loadWeightFromBin( weights_ptr[0], {(size_t)(vocab_size_ * hidden_units_)}, dir_path + "/model.wte.weight.bin", model_file_type); @@ -292,6 +293,7 @@ void LlamaWeight::loadModel(std::string dir_path) decoder_layer_weights[l]->loadModel(dir_path + "/model.layers." + std::to_string(l), model_file_type); } } + FT_LOG_INFO("finished loading llama model from %s", dir_path); } template