From 1342ef60836bd480995501df2be2d3a5c5750ba2 Mon Sep 17 00:00:00 2001
From: sfc-gh-zhwang <flex.wang@snowflake.com>
Date: Fri, 15 Sep 2023 00:38:40 -0700
Subject: [PATCH] commit

---
 .vscode/settings.json                         | 28 +++++++++++++++++--
 .../models/llama/LlamaDecoderLayerWeight.cc   |  2 ++
 .../models/llama/LlamaWeight.cc               |  2 ++
 3 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6f535da99..6df8277d0 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -67,6 +67,30 @@
         "unordered_set": "cpp",
         "future": "cpp",
         "cfenv": "cpp",
-        "typeindex": "cpp"
+        "typeindex": "cpp",
+        "__bit_reference": "cpp",
+        "__bits": "cpp",
+        "__config": "cpp",
+        "__debug": "cpp",
+        "__errc": "cpp",
+        "__hash_table": "cpp",
+        "__locale": "cpp",
+        "__mutex_base": "cpp",
+        "__node_handle": "cpp",
+        "__split_buffer": "cpp",
+        "__threading_support": "cpp",
+        "__tree": "cpp",
+        "__tuple": "cpp",
+        "__verbose_abort": "cpp",
+        "bit": "cpp",
+        "ios": "cpp",
+        "locale": "cpp",
+        "queue": "cpp",
+        "stack": "cpp",
+        "variant": "cpp",
+        "__nullptr": "cpp",
+        "__string": "cpp",
+        "compare": "cpp",
+        "concepts": "cpp"
     }
-}
\ No newline at end of file
+}
diff --git a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc b/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc
index 34ad480cf..0850303dc 100644
--- a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc
+++ b/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc
@@ -193,6 +193,7 @@ void LlamaDecoderLayerWeight<T>::loadModel(std::string dir_path, FtCudaDataType
 {
     FT_CHECK(is_maintain_buffer == true);
     const std::string rank_spec = std::to_string(tensor_para_rank_);
+    FT_LOG_INFO("loading llama model weight rank %s from %s", rank_spec, dir_path);
 
     // fill all bias to zeros
     deviceFill(weights_ptr[0], (size_t)hidden_units_, (T)0.0);
@@ -285,6 +286,7 @@ void LlamaDecoderLayerWeight<T>::loadModel(std::string dir_path, FtCudaDataType
                                                      model_file_type);
 
     }
+    FT_LOG_INFO("finished loading llama model weight rank %s from %s", rank_spec, dir_path);
 }
 
 template<typename T>
diff --git a/src/fastertransformer/models/llama/LlamaWeight.cc b/src/fastertransformer/models/llama/LlamaWeight.cc
index a1bda4053..92f7354ed 100644
--- a/src/fastertransformer/models/llama/LlamaWeight.cc
+++ b/src/fastertransformer/models/llama/LlamaWeight.cc
@@ -254,6 +254,7 @@ void LlamaWeight<T>::loadModel(std::string dir_path)
 {
     FtCudaDataType model_file_type = getModelFileType(dir_path + "/config.ini", "llama");
     FT_CHECK(is_maintain_buffer == true);
+    FT_LOG_INFO("loading llama model from %s", dir_path);
 
     loadWeightFromBin<T>(
         weights_ptr[0], {(size_t)(vocab_size_ * hidden_units_)}, dir_path + "/model.wte.weight.bin", model_file_type);
@@ -292,6 +293,7 @@ void LlamaWeight<T>::loadModel(std::string dir_path)
             decoder_layer_weights[l]->loadModel(dir_path + "/model.layers." + std::to_string(l), model_file_type);
         }
     }
+    FT_LOG_INFO("finished loading llama model from %s", dir_path);
 }
 
 template<typename T>