From 00a4adb8bcaee98a625a3edb074a1eaabcc5c67f Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Wed, 27 Dec 2023 05:15:19 -0600 Subject: [PATCH 01/16] Intel Mac's don't support [AMX](https://www.intel.com/content/www/us/en/products/docs/accelerator-engines/what-is-intel-amx.html) anyway per https://github.com/intel/intel-extension-for-transformers/discussions/1081#discussioncomment-7952888 Signed-off-by: Luke Nezda --- .../llm/library/jblas/jblas/jit_blas_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/library/jblas/jblas/jit_blas_utils.h b/intel_extension_for_transformers/llm/library/jblas/jblas/jit_blas_utils.h index eed0c3bdaad..37069e6c267 100644 --- a/intel_extension_for_transformers/llm/library/jblas/jblas/jit_blas_utils.h +++ b/intel_extension_for_transformers/llm/library/jblas/jblas/jit_blas_utils.h @@ -366,7 +366,7 @@ inline float get_mxfp_maxnorm(const JBLAS_DTYPE t, int ebits, int mantissa_bits) return max_norm; } -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(__APPLE__) static void request_perm_xtile_data() { unsigned long bitmask; long rc; From 5f4e0393e0073f5d63412f85a3e5665f5693e5fd Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Wed, 27 Dec 2023 05:16:48 -0600 Subject: [PATCH 02/16] =?UTF-8?q?gcc-13=20said=20`reinterpret=5Fcast(nullptr)`=20was=20ambiguous=20=C2=AF\=5F(=E3=83=84)=5F/?= =?UTF-8?q?=C2=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luke Nezda --- .../llm/library/jblas/jblas/kernel_jit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h b/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h index 4a711736e9d..70faa236c7c 100644 --- a/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h +++ b/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h @@ -1228,7 +1228,7 @@ class PaddingTransInterleaveCvt : protected xbyak::JitAvx512f { jmp(ptr[reg_tmp + reg_tmp2 * sizeof(void*)], T_NEAR); // switch(rows-iterrow) ... align(sizeof(intptr_t)); L(l_tail_tbl); - db(reinterpret_cast(nullptr), sizeof(intptr_t)); // case 0 should never occur + db(nullptr, sizeof(intptr_t)); // case 0 should never occur for (int i = 1; i < trans_cell; ++i) putL(l_tail_case[i]); for (int m_tail = 1; m_tail < trans_cell; ++m_tail) { // case (m_tail): From 1b143041b108f4283ec9d75599e023d4a691c6c9 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Wed, 27 Dec 2023 17:33:30 -0600 Subject: [PATCH 03/16] fix docker command Signed-off-by: Luke Nezda --- docker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index 7ca3499c1bb..08fe1dc50d5 100644 --- a/docker/README.md +++ b/docker/README.md @@ -24,7 +24,7 @@ docker compose build OR ```bash docker pull intel/ai-tools:itrex-1.3.0 -docker pull intel/ai-tools:itrex-devel-1.3.0 +docker pull intel/ai-tools:itrex-1.3.0-devel ``` ## Use Docker Image From f02ed60fbfcd3cc6a553a4837ee094a7ac7873f8 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Wed, 27 Dec 2023 22:34:13 -0600 Subject: [PATCH 04/16] disable the build of qbits and the deprecated executor on darwin per https://github.com/intel/intel-extension-for-transformers/discussions/1081#discussioncomment-7961621 Signed-off-by: Luke Nezda --- setup.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 583a24221ff..0fe02d4a7bc 100644 --- a/setup.py +++ b/setup.py @@ -234,21 +234,24 @@ def check_submodules(): end = time.time() print(f' --- Submodule initialization took {end - start:.2f} sec') except Exception: - print(' --- Submodule initalization failed') + print(' --- Submodule initialization failed') print('Please run:\n\tgit submodule update --init --recursive') sys.exit(1) if __name__ == '__main__': - ext_modules = [CMakeExtension( - "intel_extension_for_transformers.qbits", 'intel_extension_for_transformers/llm/operator/csrc', lib_only=True)] + ext_modules = [] + if sys.platform != "darwin": + ext_modules.append(CMakeExtension("intel_extension_for_transformers.qbits", + "intel_extension_for_transformers/llm/operator/csrc", lib_only=True)) if not SKIP_RUNTIME: check_submodules() - ext_modules.extend([ - CMakeExtension("intel_extension_for_transformers.neural_engine_py", "intel_extension_for_transformers/llm/runtime/deprecated/"), - CMakeExtension("intel_extension_for_transformers.llm.runtime.graph.mpt_cpp", "intel_extension_for_transformers/llm/runtime/graph/"), - ]) - cmdclass={'build_ext': CMakeBuild} + ext_modules.append(CMakeExtension("intel_extension_for_transformers.llm.runtime.graph.mpt_cpp", + "intel_extension_for_transformers/llm/runtime/graph/")) + if sys.platform != "darwin": + ext_modules.append(CMakeExtension("intel_extension_for_transformers.neural_engine_py", + "intel_extension_for_transformers/llm/runtime/deprecated/")) + cmdclass = {'build_ext': CMakeBuild} setup( name="intel-extension-for-transformers", From 356fa1e55402fbd643275df77b4ace351813e032 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Wed, 27 Dec 2023 22:35:25 -0600 Subject: [PATCH 05/16] attempting to address error : error: 'sysctlbyname' was not declared in this scope; did you mean 'SYS_sysctlbyname'? ld: library not found for -lrt collect2: error: ld returned 1 exit status [2/164] Building CXX object models/mpt/CMakeFiles/mpt.dir/__/model_utils/util.cpp.o FAILED: models/mpt/CMakeFiles/mpt.dir/__/model_utils/util.cpp.o /usr/local/opt/gcc/bin/g++-13 -DNE_GELU_USE_VEC -DNE_SIMD_VEC_DOT_F16 -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/core/. -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/library/jblas -O3 -DNDEBUG -std=c++17 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk -fPIC -mf16c -mfma -mavx -mavx2 -fopenmp -MD -MT models/mpt/CMakeFiles/mpt.dir/__/model_utils/util.cpp.o -MF models/mpt/CMakeFiles/mpt.dir/__/model_utils/util.cpp.o.d -o models/mpt/CMakeFiles/mpt.dir/__/model_utils/util.cpp.o -c /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp: In function 'int32_t get_num_physical_cores()': /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp:36:16: error: 'sysctlbyname' was not declared in this scope; did you mean 'SYS_sysctlbyname'? 36 | int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); | ^~~~~~~~~~~~ | SYS_sysctlbyname [3/164] Building CXX object models/llama/CMakeFiles/llama.dir/__/model_utils/util.cpp.o FAILED: models/llama/CMakeFiles/llama.dir/__/model_utils/util.cpp.o /usr/local/opt/gcc/bin/g++-13 -DNE_GELU_USE_VEC -DNE_SIMD_VEC_DOT_F16 -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/core/. -I/Users/nezda/code/itrex/intel_extension_for_transformers/llm/library/jblas -O3 -DNDEBUG -std=c++17 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk -fPIC -mf16c -mfma -mavx -mavx2 -fopenmp -MD -MT models/llama/CMakeFiles/llama.dir/__/model_utils/util.cpp.o -MF models/llama/CMakeFiles/llama.dir/__/model_utils/util.cpp.o.d -o models/llama/CMakeFiles/llama.dir/__/model_utils/util.cpp.o -c /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp: In function 'int32_t get_num_physical_cores()': /Users/nezda/code/itrex/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp:36:16: error: 'sysctlbyname' was not declared in this scope; did you mean 'SYS_sysctlbyname'? 36 | int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); | ^~~~~~~~~~~~ | SYS_sysctlbyname Signed-off-by: Luke Nezda --- .../llm/runtime/graph/models/model_utils/util.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp b/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp index 936030174de..be86f3ff8a6 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp +++ b/intel_extension_for_transformers/llm/runtime/graph/models/model_utils/util.cpp @@ -33,11 +33,11 @@ int32_t get_num_physical_cores() { #elif defined(__APPLE__) && defined(__MACH__) int32_t num_physical_cores; size_t len = sizeof(num_physical_cores); - int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + int result = syscall(SYS_sysctlbyname, "hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { return num_physical_cores; } - result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + result = syscall(SYS_sysctlbyname, "hw.physicalcpu", &num_physical_cores, &len, NULL, 0); if (result == 0) { return num_physical_cores; } From 46b64bd81b60b0e6a014d10b34a37911475a6475 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Thu, 28 Dec 2023 06:36:02 -0600 Subject: [PATCH 06/16] don't link to -rt for APPLE either per https://github.com/intel/intel-extension-for-transformers/discussions/1081#discussioncomment-7962061 Signed-off-by: Luke Nezda --- .../llm/runtime/graph/core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt index bcf34a9ca4b..e443458dfaa 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt @@ -31,7 +31,7 @@ else () target_link_libraries(ne_layers PUBLIC Threads::Threads jblas::jblas ne_vec) endif() -if(NOT WIN32) +if(NOT WIN32 AND NOT APPLE) target_link_libraries(ne_layers PUBLIC rt) endif() @@ -54,7 +54,7 @@ function(add_test_target src) target_link_options(${test_target} PRIVATE -fsanitize=address) target_include_directories(${test_target} PUBLIC .) target_link_libraries(${test_target} PUBLIC Threads::Threads jblas::jblas ne_vec) - if(NOT WIN32) + if(NOT WIN32 AND NOT APPLE) target_link_libraries(${test_target} PUBLIC rt) endif() add_test(NAME ${test_target} COMMAND ${test_target}) From 8f99e30e14a2de58f4d09f2462082be4e3194eb4 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Thu, 28 Dec 2023 09:03:05 -0600 Subject: [PATCH 07/16] typos Signed-off-by: Luke Nezda --- .../question-answering/pruning/group_lasso/modeling.py | 4 ++-- .../llm/runtime/graph/__init__.py | 4 ++-- .../llm/runtime/graph/developer_document.md | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/huggingface/pytorch/question-answering/pruning/group_lasso/modeling.py b/examples/huggingface/pytorch/question-answering/pruning/group_lasso/modeling.py index ff0397a2368..8731dcd2184 100644 --- a/examples/huggingface/pytorch/question-answering/pruning/group_lasso/modeling.py +++ b/examples/huggingface/pytorch/question-answering/pruning/group_lasso/modeling.py @@ -601,7 +601,7 @@ def forward(self, sequence_output, pooled_output): class BertPreTrainedModel(nn.Module): """ An abstract class to handle weights initialization and - a simple interface for dowloading and loading pretrained models. + a simple interface for downloading and loading pretrained models. """ def __init__(self, config, *inputs, **kwargs): super(BertPreTrainedModel, self).__init__() @@ -663,7 +663,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d . `model.chkpt` a TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models + state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models *inputs, **kwargs: additional input for the specific Bert class (ex: num_labels for BertForSequenceClassification) """ diff --git a/intel_extension_for_transformers/llm/runtime/graph/__init__.py b/intel_extension_for_transformers/llm/runtime/graph/__init__.py index af3c79e47fd..85ba32b911e 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/__init__.py +++ b/intel_extension_for_transformers/llm/runtime/graph/__init__.py @@ -65,7 +65,7 @@ def __import_package(self, model_type): elif model_type == "mistral": import intel_extension_for_transformers.llm.runtime.graph.mistral_cpp as cpp_model else: - raise TypeError("Unspported model type {}!".format(model_type)) + raise TypeError("Unsupported model type {}!".format(model_type)) self.module = cpp_model @staticmethod @@ -212,7 +212,7 @@ def eos_token_id(self): if self.model_type == 'qwen': return self.tokenizer.special_tokens['<|endoftext|>'] return self.tokenizer.eos_token_id - + def pad_token_id(self): if self.tokenizer.pad_token_id == None: if self.batch_size == 1: diff --git a/intel_extension_for_transformers/llm/runtime/graph/developer_document.md b/intel_extension_for_transformers/llm/runtime/graph/developer_document.md index fef2fc4702b..628437f8e14 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/developer_document.md +++ b/intel_extension_for_transformers/llm/runtime/graph/developer_document.md @@ -79,8 +79,8 @@ graph LR; We need to implement corresponding serialization methods from pytorch format, which is mainly divided into the following three steps. -## 1.1. Hyperparamters -The term **"hyperparamters"** describes a value that is used to configure the behavior of a large language model; this is in contrast to the model's parameters, which are the weight that were derived in the training process that was used to create the model. Each model defines its own hyperparameter structure that defines the hyperparameter values accepted by that model. Valid ITREX graph files must list these values in the correct order, and each value must be represented using the correct data type. Although hyperparameters are different across models, some attributes appear in the hyperparameters for most models: +## 1.1. Hyperparameters +The term **"hyperparameters"** describes a value that is used to configure the behavior of a large language model; this is in contrast to the model's parameters, which are the weight that were derived in the training process that was used to create the model. Each model defines its own hyperparameter structure that defines the hyperparameter values accepted by that model. Valid ITREX graph files must list these values in the correct order, and each value must be represented using the correct data type. Although hyperparameters are different across models, some attributes appear in the hyperparameters for most models: - n_vocab: the size of the model's vocabulary - n_embd: the size of the model's " embedding layer", which is used during prompt ingestion. - n_layer: the number of layers in the model; each layer represents a set of weights. @@ -328,7 +328,7 @@ Most of our model examples only support single prompt processing. You need to ad +set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_link_libraries(${TARGET} PUBLIC ne_layers jblas::jblas) ``` - and and new_model to [models_CMakeLists.txt](https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/llm/runtime/graph/models/CMakeLists.txt). + and new_model to [models_CMakeLists.txt](https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/llm/runtime/graph/models/CMakeLists.txt). ```diff add_subdirectory(opt) add_subdirectory(bloom) From 9087bfbaf9beedf8f3df4bfdee556c4716697c52 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Thu, 28 Dec 2023 23:01:27 -0600 Subject: [PATCH 08/16] trying to make Intel mac work... (this didn't help?) Signed-off-by: Luke Nezda --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0fe02d4a7bc..2f1a816d839 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ class CMakeBuild(build_ext): @staticmethod def _is_target_file(file_name: str) -> bool: - if file_name.endswith(".dll") or file_name.endswith(".exe") or file_name.endswith(".pyd"): + if file_name.endswith(".dll") or file_name.endswith(".exe") or file_name.endswith(".pyd") or file_name.endswith(".dylib"): return True if file_name.endswith(".so") or ".so." in file_name: return True From 5fe2be0cb5082eeaf6b1d2a957dfec285cdaafe3 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Thu, 28 Dec 2023 23:17:17 -0600 Subject: [PATCH 09/16] hacked requirements to strip "+cpu" suffix from torch==2.1.0 requirements Signed-off-by: Luke Nezda --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 291bbba5ace..77b22f3523d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ py-cpuinfo setuptools>=65 setuptools_scm[toml]>=6.2 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.1.0+cpu +#LN HACKEDtorch==2.1.0+cpu +torch==2.1.0 accelerate optimum-intel From aae98433fe2383d8c68db322d5037ff183bcf580 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Thu, 28 Dec 2023 23:17:23 -0600 Subject: [PATCH 10/16] hacked requirements to strip "+cpu" suffix from torch==2.1.0 requirements Signed-off-by: Luke Nezda --- .../llm/runtime/graph/scripts/requirements/common.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt index 441da4dde29..08f913d602e 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt @@ -1,5 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.1.0+cpu +#LN HACKEDtorch==2.1.0+cpu +torch==2.1.0 transformers numpy sentencepiece From beb027b0243a61b9878fc2041a1fa3038649f061 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 06:12:26 -0600 Subject: [PATCH 11/16] added missing rope_scale param Signed-off-by: Luke Nezda --- .../llm/runtime/graph/scripts/convert_mistral.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py index aeb029e5ab7..76f6e4a6ea6 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py @@ -192,6 +192,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: Path) -> 'Params': ffn_hidden_size=ffn_hidden_size, rms_norm_eps=rms_norm_eps, rope_theta=rope_theta, + rope_scale=rope_scale, ) # LLaMA v2 70B params.json @@ -1064,8 +1065,8 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None: self.fout.write( struct.pack("i", 1) - ) - # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json + ) + # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json # but bos_token_id = 1 in llama.cpp self.fout.write(struct.pack("i", 2)) From aaee4543c6ca16e46d845cc2de77d0272d6afd50 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 06:29:54 -0600 Subject: [PATCH 12/16] attempt to deal with missing Linux-only os.sched_getaffinity for macos too Signed-off-by: Luke Nezda --- intel_extension_for_transformers/llm/runtime/graph/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/__init__.py b/intel_extension_for_transformers/llm/runtime/graph/__init__.py index 85ba32b911e..1d8d34bb494 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/__init__.py +++ b/intel_extension_for_transformers/llm/runtime/graph/__init__.py @@ -134,7 +134,7 @@ def init_from_bin(self, model_type, model_path, **generate_kwargs): import platform sys_platform = platform.platform().lower() if threads is None: - if "windows" in sys_platform: + if "windows" in sys_platform or "macos" in sys_platform: cpu_count = os.cpu_count() generate_kwargs["threads"] = int(cpu_count) else: From bfec292a498d053eba997707db3f640b06c0a742 Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 09:20:47 -0600 Subject: [PATCH 13/16] I think f_norm_eps should be rms_norm_eps; `guessed` still missing ffn_hidden_size, rope_scale, rope_theta - tried to fix write_vocab_only but Params here missing lots too Signed-off-by: Luke Nezda --- .../llm/runtime/graph/scripts/convert_llama.py | 17 ++++++++--------- .../runtime/graph/scripts/convert_mistral.py | 7 +++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py index 93bcd8cde76..3cf517b397d 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_llama.py @@ -165,7 +165,7 @@ def guessed(model: 'LazyModel') -> 'Params': n_mult=256, n_head=n_embd // 128, n_head_kv=n_embd // 128, - f_norm_eps=1e-5, + rms_norm_eps=1e-5, n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model), ) @@ -203,7 +203,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: Path) -> 'Params': ) # LLaMA v2 70B params.json - # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, + # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, # "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1} @staticmethod def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params': @@ -230,8 +230,8 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: Path) -> 'Params': n_head=n_head, n_head_kv=n_head_kv, ffn_hidden_size=ffn_hidden_size, - bos_token_id = bos_token_id, - eos_token_id = eos_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, ) @staticmethod @@ -278,7 +278,7 @@ def __init__(self, fname_tokenizer: Path, params_vocab_size: int, fname_added_to def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]: tokenizer = self.sentencepiece_tokenizer for i in range(self.params_vocab_size): - text: bytes + text: bytes if i < tokenizer.vocab_size(): if tokenizer.is_unknown(i): text = " \u2047 ".encode("utf-8") @@ -1086,7 +1086,7 @@ def write_file_header(self, params: Params, file_type: NEFileType) -> None: self.fout.write(struct.pack("f", params.rope_theta)) self.fout.write(struct.pack("f", params.rope_scale)) - # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json + # TODO, bos_token_id = 0 in https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/config.json # but bos_token_id = 1 in llama.cpp self.fout.write(struct.pack("i", params.bos_token_id)) self.fout.write(struct.pack("i", params.eos_token_id)) @@ -1108,10 +1108,9 @@ def write_vocab(self, vocab: Vocab) -> None: @staticmethod def write_vocab_only(fname_out: Path, vocab: Vocab) -> None: + params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0) of = OutputFile(fname_out) - params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32) - of = OutputFile(fname_out) - of.write_file_header(params) + of.write_file_header(params, file_type=NEFileType.AllF32) of.write_vocab(vocab) of.fout.close() diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py index 76f6e4a6ea6..8bdefe4b714 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/convert_mistral.py @@ -164,7 +164,7 @@ def guessed(model: 'LazyModel') -> 'Params': n_mult=256, n_head=n_embd // 128, n_head_kv=n_embd // 128, - f_norm_eps=1e-5, + rms_norm_eps=1e-5, n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model), ) @@ -1088,10 +1088,9 @@ def write_vocab(self, vocab: Vocab) -> None: @staticmethod def write_vocab_only(fname_out: Path, vocab: Vocab) -> None: + params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0) of = OutputFile(fname_out) - params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, n_head=1, n_layer=0, file_type=NEFileType.AllF32) - of = OutputFile(fname_out) - of.write_file_header(params) + of.write_file_header(params, file_type=NEFileType.AllF32) of.write_vocab(vocab) of.fout.close() From 4ff62db83c836b2fd3f9e809afcc98d9baa14a7c Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 11:47:30 -0600 Subject: [PATCH 14/16] Update intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt Co-authored-by: Yi DING Signed-off-by: Luke Nezda --- .../llm/runtime/graph/scripts/requirements/common.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt index 08f913d602e..a8f8b58c79f 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu -#LN HACKEDtorch==2.1.0+cpu -torch==2.1.0 +torch==2.1.0+cpu ; sys_platform != 'darwin' +torch==2.1.0 ; sys_platform == 'darwin' transformers numpy sentencepiece From cd05487cd3810bdad4c09b60f19fd08a6301d20a Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 11:52:36 -0600 Subject: [PATCH 15/16] on mac (darwin) strip "+cpu" suffix from torch==2.1.0 requirements Co-authored-by: Yi DING Signed-off-by: Luke Nezda --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 77b22f3523d..72d4784db27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ py-cpuinfo setuptools>=65 setuptools_scm[toml]>=6.2 --extra-index-url https://download.pytorch.org/whl/cpu -#LN HACKEDtorch==2.1.0+cpu -torch==2.1.0 +torch==2.1.0+cpu ; sys_platform != 'darwin' +torch==2.1.0 ; sys_platform == 'darwin' accelerate optimum-intel From 05b66dbef53d3611b175ed34a2fc04e2176548db Mon Sep 17 00:00:00 2001 From: Luke Nezda Date: Fri, 29 Dec 2023 12:22:11 -0600 Subject: [PATCH 16/16] =?UTF-8?q?gcc-13=20said=20`reinterpret=5Fcast(nullptr)`=20was=20ambiguous=20=C2=AF\=5F(=E3=83=84)=5F/?= =?UTF-8?q?=C2=AF=20-=20adjust=20per=20code=20review=20@DDEle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luke Nezda --- .../llm/library/jblas/jblas/kernel_jit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h b/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h index 70faa236c7c..46eb4153be9 100644 --- a/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h +++ b/intel_extension_for_transformers/llm/library/jblas/jblas/kernel_jit.h @@ -1228,7 +1228,7 @@ class PaddingTransInterleaveCvt : protected xbyak::JitAvx512f { jmp(ptr[reg_tmp + reg_tmp2 * sizeof(void*)], T_NEAR); // switch(rows-iterrow) ... align(sizeof(intptr_t)); L(l_tail_tbl); - db(nullptr, sizeof(intptr_t)); // case 0 should never occur + db(reinterpret_cast(nullptr), sizeof(intptr_t)); // case 0 should never occur for (int i = 1; i < trans_cell; ++i) putL(l_tail_case[i]); for (int m_tail = 1; m_tail < trans_cell; ++m_tail) { // case (m_tail):