From ffc302b0fd1ac5f0a3dbaedcb899df0f9f615f44 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 28 Sep 2023 22:44:29 -0700 Subject: [PATCH 1/3] commit --- examples/cpp/llama/llama_config.ini | 4 ++-- examples/cpp/llama/llama_example.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cpp/llama/llama_config.ini b/examples/cpp/llama/llama_config.ini index ef789d35d..ea1a860f5 100644 --- a/examples/cpp/llama/llama_config.ini +++ b/examples/cpp/llama/llama_config.ini @@ -6,7 +6,7 @@ tensor_para_size=1 pipeline_para_size=1 model_name=llama_7b -model_dir=/notebooks/llama-2-70b-hf-ft-tp-1_llama_decoder/1/1-gpu/ +model_dir=/notebooks/code-llama-ft/code-llama-34b/code-llama-34b_llama_decoder/1/1-gpu/ [request] beam_width=1 # beam width for beam search @@ -24,7 +24,7 @@ request_output_len=32 # determine by the request head_num = 64 kv_head_num = 8 size_per_head = 128 -inter_size = 28672 +inter_size = 22016 num_layer = 3 rotary_embedding = 128 layernorm_eps = 1e-05 diff --git a/examples/cpp/llama/llama_example.cc b/examples/cpp/llama/llama_example.cc index 84a0b54aa..00fa5bad5 100644 --- a/examples/cpp/llama/llama_example.cc +++ b/examples/cpp/llama/llama_example.cc @@ -44,7 +44,7 @@ int main(int argc, char* argv[]) ini_name = std::string(argv[1]); } else { - ini_name = "/notebooks/FasterTransformer/examples/cpp/llama/llama_config.ini"; + ini_name = "/notebooks/tmp/FasterTransformer/examples/cpp/llama/llama_config.ini"; } INIReader reader = INIReader(ini_name); From 69b87ae9135a7978943a40ff489f848b86e7a634 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 28 Sep 2023 22:49:30 -0700 Subject: [PATCH 2/3] commit --- examples/cpp/llama/llama_config.ini | 2 +- examples/cpp/llama/start_ids.csv | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/examples/cpp/llama/llama_config.ini b/examples/cpp/llama/llama_config.ini index ea1a860f5..0c272e358 100644 --- a/examples/cpp/llama/llama_config.ini +++ b/examples/cpp/llama/llama_config.ini @@ -17,7 +17,7 @@ repetition_penalty=1.0 ; Use for sampling presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed. len_penalty=0.0 beam_search_diversity_rate=0.0 -request_batch_size=8 # determine by the request +request_batch_size=1 # determine by the request request_output_len=32 # determine by the request [llama_7b] diff --git a/examples/cpp/llama/start_ids.csv b/examples/cpp/llama/start_ids.csv index 6b8b9c375..612c85964 100644 --- a/examples/cpp/llama/start_ids.csv +++ b/examples/cpp/llama/start_ids.csv @@ -1,8 +1 @@ 1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637 -1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637, 29892, 526, 366, 1136, 455, 2470, 29973, 1815, 366, 5193, 304, 592, 29973 -1, 18637, 29892, 526, 366, 1136 From bbf6c70205701d8d9dd15cd5cbe0071e3e9e0fb2 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 28 Sep 2023 22:53:02 -0700 Subject: [PATCH 3/3] commit --- examples/cpp/llama/llama_config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/llama/llama_config.ini b/examples/cpp/llama/llama_config.ini index 0c272e358..3e9ea8c92 100644 --- a/examples/cpp/llama/llama_config.ini +++ b/examples/cpp/llama/llama_config.ini @@ -6,7 +6,7 @@ tensor_para_size=1 pipeline_para_size=1 model_name=llama_7b -model_dir=/notebooks/code-llama-ft/code-llama-34b/code-llama-34b_llama_decoder/1/1-gpu/ +model_dir=/notebooks/code-llama-ft/code-llama-34b/code-llama-34b_llama_decoder/1/1-gpu [request] beam_width=1 # beam width for beam search