From 2201642cb16a5454a045b2f6deef36820bd1ae19 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 19 Jul 2023 09:36:19 -0700 Subject: [PATCH 01/19] abc --- CMakeLists.txt | 2 +- src/libfastertransformer.cc | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9c6c5c..d539453 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,7 @@ if (EXISTS ${FT_DIR}) else() FetchContent_Declare( repo-ft - GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer.git + GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git GIT_TAG main GIT_SHALLOW ON ) diff --git a/src/libfastertransformer.cc b/src/libfastertransformer.cc index a870aa0..f2bbc0e 100644 --- a/src/libfastertransformer.cc +++ b/src/libfastertransformer.cc @@ -49,6 +49,7 @@ // FT's libraries have dependency with triton's lib #include "src/fastertransformer/triton_backend/bert/BertTritonModel.h" +#include "src/fastertransformer/triton_backend/deberta/DebertaTritonModel.h" #include "src/fastertransformer/triton_backend/gptj/GptJTritonModel.h" #include "src/fastertransformer/triton_backend/gptj/GptJTritonModelInstance.h" #include "src/fastertransformer/triton_backend/gptneox/GptNeoXTritonModel.h" @@ -327,6 +328,22 @@ std::shared_ptr ModelState::ModelFactory( } else if (data_type == "bf16") { ft_model = std::make_shared>( tp, pp, custom_ar, model_dir, int8_mode, is_sparse, remove_padding); +#endif + } + } else if (model_type == "deberta") { + const int is_sparse = param_get_bool(param,"is_sparse", false); + const int remove_padding = param_get_bool(param,"is_remove_padding", false); + + if (data_type == "fp16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); + } else if (data_type == "fp32") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); +#ifdef ENABLE_BF16 + } else if (data_type == "bf16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); #endif } } else { From 968920ad849522e401ef321252723a7b0847fa68 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Fri, 18 Aug 2023 23:59:17 -0700 Subject: [PATCH 02/19] commit --- src/libfastertransformer.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libfastertransformer.cc b/src/libfastertransformer.cc index a870aa0..a3b459f 100644 --- a/src/libfastertransformer.cc +++ b/src/libfastertransformer.cc @@ -53,6 +53,7 @@ #include "src/fastertransformer/triton_backend/gptj/GptJTritonModelInstance.h" #include "src/fastertransformer/triton_backend/gptneox/GptNeoXTritonModel.h" #include "src/fastertransformer/triton_backend/gptneox/GptNeoXTritonModelInstance.h" +#include "src/fastertransformer/triton_backend/llama/LlamaTritonModel.h" #include "src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h" #include "src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModelInstance.h" #include "src/fastertransformer/triton_backend/t5/T5TritonModel.h" @@ -327,6 +328,21 @@ std::shared_ptr ModelState::ModelFactory( } else if (data_type == "bf16") { ft_model = std::make_shared>( tp, pp, custom_ar, model_dir, int8_mode, is_sparse, remove_padding); +#endif + } + } else if (model_type == "llama") { + const int int8_mode = param_get_int(param, "int8_mode"); + + if (data_type == "fp16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, int8_mode); + } else if (data_type == "fp32") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, int8_mode); +#ifdef ENABLE_BF16 + } else if (data_type == "bf16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, int8_mode); #endif } } else { From 1d8b7fd88df97cd9d2ee0f1642c57418a2a691f9 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Sat, 19 Aug 2023 00:00:31 -0700 Subject: [PATCH 03/19] commit --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9c6c5c..bd0fe84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,8 +110,8 @@ if (EXISTS ${FT_DIR}) else() FetchContent_Declare( repo-ft - GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer.git - GIT_TAG main + GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer + GIT_TAG e770ddf2bc66217034b6e9e3b0c3256ebf1c1b40 GIT_SHALLOW ON ) endif() From 123933b93a97488e6bf8ad28d15670ed18bb7637 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Sat, 19 Aug 2023 22:24:08 -0700 Subject: [PATCH 04/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd0fe84..afb42dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG e770ddf2bc66217034b6e9e3b0c3256ebf1c1b40 + GIT_TAG d7ac0faef8b31ad207dd80a57deb65405992f7f9 GIT_SHALLOW ON ) endif() From ab152e2d3c797d18d4c22f1673be215df2826758 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Mon, 21 Aug 2023 13:26:33 -0700 Subject: [PATCH 05/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index afb42dd..7d8ba3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG d7ac0faef8b31ad207dd80a57deb65405992f7f9 + GIT_TAG 7555256b6d861345886e09120083cae82c61a1fc GIT_SHALLOW ON ) endif() From 5db164bc56df46825c28353233f43d4a398a60fb Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Mon, 21 Aug 2023 13:47:52 -0700 Subject: [PATCH 06/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d8ba3e..8aba851 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG 7555256b6d861345886e09120083cae82c61a1fc + GIT_TAG 0cda56c9830ebba575477457258e1046c121fdea GIT_SHALLOW ON ) endif() From 27b9a3660076fb6621a84bbec3e08318c7322782 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 23 Aug 2023 20:17:42 -0700 Subject: [PATCH 07/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8aba851..b88d2ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG 0cda56c9830ebba575477457258e1046c121fdea + GIT_TAG 5db164bc56df46825c28353233f43d4a398a60fb GIT_SHALLOW ON ) endif() From 314cc9d0435fd58387dcbe8e75fa32ddc1f90ef7 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 23 Aug 2023 20:19:19 -0700 Subject: [PATCH 08/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b88d2ee..411b120 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG 5db164bc56df46825c28353233f43d4a398a60fb + GIT_TAG 5a28f0e460ac005545c893acd265329c884d24e8 GIT_SHALLOW ON ) endif() From ec8ba9476923b6b14acc4e3734f36227aed5f864 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 23 Aug 2023 20:22:49 -0700 Subject: [PATCH 09/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 411b120..139b1ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG 5a28f0e460ac005545c893acd265329c884d24e8 + GIT_TAG f6e340308d71ec0ddb2a9e009d3df568505a1612 GIT_SHALLOW ON ) endif() From 63cb0b67a192df6bca0a853480cb3bf20db775ab Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 23 Aug 2023 23:47:08 -0700 Subject: [PATCH 10/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 139b1ff..0ef201e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG f6e340308d71ec0ddb2a9e009d3df568505a1612 + GIT_TAG 2d332b3a3766a56df18c9b981841ead3236415ba GIT_SHALLOW ON ) endif() From 25fbeb902e1c53939af7a7f7df471d08fdb48c8d Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 24 Aug 2023 14:43:11 -0700 Subject: [PATCH 11/19] commit --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ef201e..f7b8f3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,8 +110,8 @@ if (EXISTS ${FT_DIR}) else() FetchContent_Declare( repo-ft - GIT_REPOSITORY https://github.com/sfc-gh-zhwang/FasterTransformer - GIT_TAG 2d332b3a3766a56df18c9b981841ead3236415ba + GIT_REPOSITORY https://github.com/neevaco/FasterTransformer + GIT_TAG 9c7b9934db47ba6d8034e3c54294288a165f520a GIT_SHALLOW ON ) endif() From 35b938849c522058079e84bfa89da78f32e2f8a0 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 24 Aug 2023 14:54:47 -0700 Subject: [PATCH 12/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f7b8f3f..b03ee1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer - GIT_TAG 9c7b9934db47ba6d8034e3c54294288a165f520a + GIT_TAG 86c24253bae05e507f784cd28e5726dbb86cdea9 GIT_SHALLOW ON ) endif() From d259efc5c299238cb6b854761b50fff3f243dc71 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 24 Aug 2023 15:04:37 -0700 Subject: [PATCH 13/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b03ee1f..572c8df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer - GIT_TAG 86c24253bae05e507f784cd28e5726dbb86cdea9 + GIT_TAG f1e61e6429f7342ab5aa42b56e9feddf708c26dc GIT_SHALLOW ON ) endif() From e0562b113af64c1a149489a7a8aef2ef14882f76 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Thu, 24 Aug 2023 17:36:57 -0700 Subject: [PATCH 14/19] commit --- src/libfastertransformer.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/libfastertransformer.cc b/src/libfastertransformer.cc index a3b459f..f478c22 100644 --- a/src/libfastertransformer.cc +++ b/src/libfastertransformer.cc @@ -49,6 +49,7 @@ // FT's libraries have dependency with triton's lib #include "src/fastertransformer/triton_backend/bert/BertTritonModel.h" +#include "src/fastertransformer/triton_backend/deberta/DebertaTritonModel.h" #include "src/fastertransformer/triton_backend/gptj/GptJTritonModel.h" #include "src/fastertransformer/triton_backend/gptj/GptJTritonModelInstance.h" #include "src/fastertransformer/triton_backend/gptneox/GptNeoXTritonModel.h" @@ -343,6 +344,22 @@ std::shared_ptr ModelState::ModelFactory( } else if (data_type == "bf16") { ft_model = std::make_shared>( tp, pp, custom_ar, model_dir, int8_mode); +#endif + } + } else if (model_type == "deberta") { + const int is_sparse = param_get_bool(param,"is_sparse", false); + const int remove_padding = param_get_bool(param,"is_remove_padding", false); + + if (data_type == "fp16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); + } else if (data_type == "fp32") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); +#ifdef ENABLE_BF16 + } else if (data_type == "bf16") { + ft_model = std::make_shared>( + tp, pp, custom_ar, model_dir, is_sparse, remove_padding); #endif } } else { From 2d60bbec0dd8e8aa9b2c6257129dbb1e5c5507d0 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Mon, 28 Aug 2023 14:15:51 -0700 Subject: [PATCH 15/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 572c8df..23919e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer - GIT_TAG f1e61e6429f7342ab5aa42b56e9feddf708c26dc + GIT_TAG affa1ef1c175d03db8ff5b14824cc58dd2c52c2b GIT_SHALLOW ON ) endif() From 31babb05d8052699b340b6d42f4571f818bc2c9b Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Mon, 28 Aug 2023 14:19:20 -0700 Subject: [PATCH 16/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23919e4..9ff166f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,7 @@ if (EXISTS ${FT_DIR}) else() FetchContent_Declare( repo-ft - GIT_REPOSITORY https://github.com/neevaco/FasterTransformer + GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git GIT_TAG affa1ef1c175d03db8ff5b14824cc58dd2c52c2b GIT_SHALLOW ON ) From 08625c3aafb375f1da80c84174062208cbf6ae97 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Tue, 29 Aug 2023 23:17:44 -0700 Subject: [PATCH 17/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ff166f..06a780b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git - GIT_TAG affa1ef1c175d03db8ff5b14824cc58dd2c52c2b + GIT_TAG 6dad842e87f72bd074867d1c11ddca0121862e85 GIT_SHALLOW ON ) endif() From ffd06a78527ef910cdb5be86453731455f31c71e Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 30 Aug 2023 15:40:28 -0700 Subject: [PATCH 18/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06a780b..96e733a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git - GIT_TAG 6dad842e87f72bd074867d1c11ddca0121862e85 + GIT_TAG 031c8c433cb84c091eeb1839c18c089d15de381d GIT_SHALLOW ON ) endif() From 58b54ecab0b0a4447c61cab28d11f2caf54100f0 Mon Sep 17 00:00:00 2001 From: sfc-gh-zhwang Date: Wed, 30 Aug 2023 16:15:51 -0700 Subject: [PATCH 19/19] commit --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 96e733a..7fe767f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ else() FetchContent_Declare( repo-ft GIT_REPOSITORY https://github.com/neevaco/FasterTransformer.git - GIT_TAG 031c8c433cb84c091eeb1839c18c089d15de381d + GIT_TAG 7bb372317da21dc7a898cb0e6e0ce7c11b0b38ec GIT_SHALLOW ON ) endif()