diff --git a/README.md b/README.md index f4206e8d45d..c4dc18a306b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,52 @@ +# llama.cpp with sentencepiece + +This fork aims to enhance llama.cpp by integrating the SentencePiece library as a tokenizer, enabling more flexible and language-agnostic tokenization for LLM inference. By using SentencePiece, this project supports advanced tokenization strategies, improves compatibility with a wider range of models, and simplifies workflows for users who require custom or multilingual tokenization. The scope of the fork includes adding a new tokenizer option, supporting SentencePiece model blobs, and implementing chat templates for specific models such as "Teuken 7B". + +It introduces a new tokenizer named `sentencepiece` for the key `tokenizer.ggml.model`. In this case, the key `tokenizer.ggml.sentencepiece` must contain the binary blob with the SentencePiece model. Tokenization is performed by the SentencePiece library instead of the built-in algorithms in `llama.cpp`. + +Additionally, this fork implements the chat template used by the LLM "Teuken 7B". + +## Using llama.cpp with sentencepiece + +### Setup for Windows + +In the release section, you will find a setup for Windows containing `llama.cpp` with SentencePiece support and "Teuken 7B". +After install browser opens with the chat ui of `llama.cpp` and "Teuken 7B" as the llm. + +### Installing from the source + +First install sentencepiece static library with headers for your OS and compiler (see sentencepice documentation). + +Clone the llama.cpp with sentencepiece repository: + +```sh +git clone https://github.com/awenzel67/llama.cpp.git +cd llama.cpp +git switch teuken +``` + +Configure the build (see sentencepiece for details): +```sh +cmake -B buildFullCuda -DCURL_INCLUDE_DIR=C:/Del/vcpkg/installed/x64-windows/include -DCURL_LIBRARY=C:/Del/vcpkg/installed/x64-windows/lib/libcurl.lib -DSPIE_INCLUDE_DIR=C:\NHKI\llama\sentencepiece\src -DSPIE_LIBRARY=C:\NHKI\llama\sentencepiece\build\src\Release\sentencepiece.lib -DGGML_CUDA=ON +``` + +The cmake commands contains a variable to specify the include directory for sentencepiece library: +```sh +-DSPIE_INCLUDE_DIR=C:\NHKI\llama\sentencepiece\src +``` + +The cmake commands contains a variable to specify the path to the static sentencepiece library: +```sh +-DSPIE_LIBRARY=C:\NHKI\llama\sentencepiece\build\src\Release\sentencepiece.lib +``` +Now you can use the common llama.cpp tools like llama-cli or llama-server. + +You can use all models for llama.cpp. Additional the following Teuken 7B ggufs can be used: + +- Teuken-7.5B-BF16-CM.gguf +- Teuken-7.5B-Q4_K_M.gguf + + # llama.cpp ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) diff --git a/include/llama.h b/include/llama.h index a0a660bff88..00e1bda644b 100644 --- a/include/llama.h +++ b/include/llama.h @@ -76,6 +76,7 @@ extern "C" { LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming + LLAMA_VOCAB_TYPE_SPIE = 7, // TEUKEN tokenizer based on SentencePiece }; enum llama_rope_type { diff --git a/innosetup.iss b/innosetup.iss new file mode 100644 index 00000000000..0d080b1c241 --- /dev/null +++ b/innosetup.iss @@ -0,0 +1,77 @@ +; Script generated by the Inno Setup Script Wizard. +; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! +; Non-commercial use only + +#define MyAppName "llama.cpp-teuken" +#define MyAppVersion "1.5" +#define MyAppPublisher "awenzel67" +#define MyAppURL "https://github.com/awenzel67/llama.cpp" +#define MyAppExeName "runteuken.bat" +#define MyAppAssocName MyAppName + " File" +#define MyAppAssocExt ".myp" +#define MyAppAssocKey StringChange(MyAppAssocName, " ", "") + MyAppAssocExt + +[Setup] +; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications. +; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) +AppId={{56F7611F-2A10-49B9-B3A8-B627CA731E2B} +AppName={#MyAppName} +AppVersion={#MyAppVersion} +;AppVerName={#MyAppName} {#MyAppVersion} +AppPublisher={#MyAppPublisher} +AppPublisherURL={#MyAppURL} +AppSupportURL={#MyAppURL} +AppUpdatesURL={#MyAppURL} +DefaultDirName={autopf}\{#MyAppName} +UninstallDisplayIcon={app}\{#MyAppExeName} +; "ArchitecturesAllowed=x64compatible" specifies that Setup cannot run +; on anything but x64 and Windows 11 on Arm. +ArchitecturesAllowed=x64compatible +; "ArchitecturesInstallIn64BitMode=x64compatible" requests that the +; install be done in "64-bit mode" on x64 or Windows 11 on Arm, +; meaning it should use the native 64-bit Program Files directory and +; the 64-bit view of the registry. +ArchitecturesInstallIn64BitMode=x64compatible +ChangesAssociations=yes +DisableProgramGroupPage=yes +; Uncomment the following line to run in non administrative install mode (install for current user only). +;PrivilegesRequired=lowest +OutputDir=C:\Del\tk +OutputBaseFilename=llama.cpp-teuken +;SolidCompression=yes +WizardStyle=modern dynamic +DiskSpanning=yes +[Languages] +Name: "english"; MessagesFile: "compiler:Default.isl" +Name: "german"; MessagesFile: "compiler:Languages\German.isl" + +[Tasks] +Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked + +[Files] +Source: "C:\NHKI\mymodell\kilocal\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\llama-server.exe"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-base.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-cpu.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-cuda.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\libcurl.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\llama.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\mtmd.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\zlib1.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "C:\NHKI\mymodell\kilocal\Teuken-7.5B-Q4_K_M.gguf"; DestDir: "{app}"; Flags: ignoreversion +; NOTE: Don't use "Flags: ignoreversion" on any shared system files + +[Registry] +Root: HKA; Subkey: "Software\Classes\{#MyAppAssocExt}\OpenWithProgids"; ValueType: string; ValueName: "{#MyAppAssocKey}"; ValueData: ""; Flags: uninsdeletevalue +Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}"; ValueType: string; ValueName: ""; ValueData: "{#MyAppAssocName}"; Flags: uninsdeletekey +Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\DefaultIcon"; ValueType: string; ValueName: ""; ValueData: "{app}\{#MyAppExeName},0" +Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\shell\open\command"; ValueType: string; ValueName: ""; ValueData: """{app}\{#MyAppExeName}"" ""%1""" + +[Icons] +Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" +Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon + +;[Run] +;Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: postinstall skipifsilent + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 18cfc76564d..1f9d28fd232 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,16 +32,21 @@ add_library(llama llama-quant.cpp llama-sampling.cpp llama-vocab.cpp + llama-vocab-sentencepiece.cpp unicode-data.cpp unicode.cpp unicode.h ) target_include_directories(llama PRIVATE .) -target_include_directories(llama PUBLIC ../include) -target_compile_features (llama PRIVATE cxx_std_17) # don't bump +target_include_directories(llama PUBLIC ../include ${SPIE_INCLUDE_DIR} ${SPIE_INCLUDE_DIR}/..) +#target_link_directories(${TEST_TARGET} PRIVATE "C:/NHKI/llama/sentencepiece/build/src/Debug") -target_link_libraries(llama PUBLIC ggml) +target_compile_features (llama PRIVATE cxx_std_17) # don't bump +message(SPIE_LIBRARY="${SPIE_LIBRARY}") +message(SPIE_LIBRARY="${SPIE_INCLUDE_DIR}") +#target_link_libraries(llama PUBLIC ggml "C:/NHKI/llama/sentencepiece/build/src/Release/sentencepiece.lib") +target_link_libraries(llama PUBLIC ggml ${SPIE_LIBRARY}) if (BUILD_SHARED_LIBS) set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 8ca769c5fd2..0abea7a22c2 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -251,6 +251,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" }, { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" }, { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" }, + { LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL, "tokenizer.ggml.sentencepiece_model" }, { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" }, { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" }, { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" }, diff --git a/src/llama-arch.h b/src/llama-arch.h index dea725c1a75..dd659bd8293 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -240,6 +240,7 @@ enum llm_kv { LLM_KV_TOKENIZER_ADD_PREFIX, LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, + LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL, LLM_KV_TOKENIZER_HF_JSON, LLM_KV_TOKENIZER_RWKV, LLM_KV_TOKENIZER_CHAT_TEMPLATE, diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 0285006d73c..e8d0499be77 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -63,8 +63,6 @@ static const std::map LLM_CHAT_TEMPLATES = { { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, { "bailing", LLM_CHAT_TEMPLATE_BAILING }, - { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK }, - { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 }, { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, @@ -73,6 +71,35 @@ static const std::map LLM_CHAT_TEMPLATES = { { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS }, { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 }, + { "teuken", LLM_CHAT_TEMPLATE_TEUKEN }, +}; + + +static const std::map LLM_TEUKEN_SYSTEM = { + {"BG", "Чат между човек и асистент с изкуствен интелект. Асистентът дава полезни и учтиви отговори на въпросите на човека."}, + {"CS", "Chat mezi člověkem a asistentem s umělou inteligencí. Asistent poskytuje vstřícné a zdvořilé odpovědi na otázky člověka."}, + {"DA", "En chat mellem et menneske og en assistent med kunstig intelligens, som giver hjælpsomme og høflige svar på menneskets spørgsmål."}, + {"DE", "Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen."}, + {"EL", "Μια συνομιλία μεταξύ ενός ανθρώπου και ενός βοηθού τεχνητής νοημοσύνης. Ο βοηθός δίνει χρήσιμες και ευγενικές απαντήσεις στις ερωτήσεις του ανθρώπου."}, + {"EN", "A chat between a human and an artificial intelligence assistant.The assistant gives helpful and polite answers to the human's questions."}, + {"ES", "Una conversación entre un humano y un asistente de inteligencia artificial. El asistente da respuestas útiles y amables a las preguntas del humano."}, + {"ET", "Inimese ja tehisintellekti assistendi vaheline vestlus. Assistent annab inimese küsimustele abivalmis ja viisakaid vastuseid."}, + {"FI", "Ihmisen ja tekoälyavustajan välinen keskustelu. Avustaja antaa avuliaita ja kohteliaita vastauksia ihmisen kysymyksiin."}, + {"FR", "Conversation entre un humain et un assistant doté d'une intelligence artificielle. L'assistant donne des réponses utiles et polies aux questions de l'homme."}, + {"GA", "Comhrá idir duine agus cúntóir hintleachta saorga. Tugann an cúntóir freagraí cabhracha dea-bhéasacha ar cheisteanna an duine."}, + {"HR", "Razgovor između čovjeka i pomoćnika umjetne inteligencije. Pomoćnik daje korisne i ljubazne odgovore na ljudska pitanja."}, + {"HU", "Egy ember és egy mesterséges intelligencia asszisztens közötti beszélgetés. Az asszisztens segítőkész és udvarias válaszokat ad az ember kérdéseire."}, + {"IT", "Una chat tra un umano e un assistente di intelligenza artificiale. L'assistente fornisce risposte utili ed educate alle domande dell'uomo."}, + {"LT", "Žmogaus ir dirbtinio intelekto asistento pokalbis. Asistentas naudingai ir mandagiai atsako į žmogaus klausimus."}, + {"LV", "Cilvēka un mākslīgā intelekta asistenta tērzēšana. Asistents sniedz noderīgas un pieklājīgas atbildes uz cilvēka jautājumiem."}, + {"MT", "Chat bejn bniedem u assistent ta' intelliġenza artifiċjali. L-assistent jagħti tweġibiet ta' għajnuna u edukat għall-mistoqsijiet tal-bniedem."}, + {"NL", "Een chat tussen een mens en een assistent met kunstmatige intelligentie. De assistent geeft behulpzame en beleefde antwoorden op de vragen van de mens."}, + {"PL", "Czat między człowiekiem a asystentem sztucznej inteligencji. Asystent udziela pomocnych i uprzejmych odpowiedzi na pytania człowieka."}, + {"PT", "Uma conversa entre um ser humano e um assistente de inteligência artificial. O assistente dá respostas úteis e educadas às perguntas do utilizador."}, + {"RO", "O conversație între un om și un asistent cu inteligență artificială. Asistentul oferă răspunsuri utile și politicoase la întrebările omului."}, + {"SK", "Rozhovor medzi človekom a asistentom s umelou inteligenciou. Asistent poskytuje užitočné a zdvorilé odpovede na otázky človeka."}, + {"SL", "Pogovor med človekom in pomočnikom z umetno inteligenco. Pomočnik človeku prijazno in vljudno odgovarja na njegova vprašanja."}, + {"SV", "En chatt mellan en människa och en assistent med artificiell intelligens. Assistenten ger hjälpsamma och artiga svar på människans frågor."} }; llm_chat_template llm_chat_template_from_str(const std::string & name) { @@ -156,6 +183,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_VICUNA_ORCA; } return LLM_CHAT_TEMPLATE_VICUNA; + } else if (tmpl_contains("User: ") && tmpl_contains("Assistant: ") && tmpl_contains("System: ")) { + return LLM_CHAT_TEMPLATE_TEUKEN; } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) { // deepseek-ai/deepseek-coder-33b-instruct return LLM_CHAT_TEMPLATE_DEEPSEEK; @@ -193,10 +222,6 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_YANDEX; } else if (tmpl_contains("ASSISTANT") && tmpl_contains("'HUMAN'")) { return LLM_CHAT_TEMPLATE_BAILING; - } else if (tmpl_contains("ASSISTANT") && tmpl_contains("\"HUMAN\"") && tmpl_contains("")) { - return LLM_CHAT_TEMPLATE_BAILING_THINK; - } else if (tmpl_contains("ASSISTANT") && tmpl_contains("HUMAN") && tmpl_contains("<|role_end|>")) { - return LLM_CHAT_TEMPLATE_BAILING2; } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { return LLM_CHAT_TEMPLATE_LLAMA4; } else if (tmpl_contains("<|endofuserprompt|>")) { @@ -430,6 +455,39 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "ASSISTANT:"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_TEUKEN) { + // eachadea/vicuna-13b-1.1 (and Orca variant) + bool isSysOut=false; + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + const std::string lang=trim( message->content); + if(LLM_TEUKEN_SYSTEM.find(lang)==LLM_TEUKEN_SYSTEM.end()) + { + std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find("EN"))).second; + ss << "System: " << teuken_system << "\n"; + } + else + { + std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find(lang))).second; + ss << "System: " << teuken_system << "\n"; + } + isSysOut=true; + } else if (role == "user") { + if (!isSysOut) + { + std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find("EN"))).second; + ss << "System: " << teuken_system << "\n"; + isSysOut=true; + } + ss << "User: " << message->content << "\n"; + } else if (role == "assistant") { + ss << "Assistant: " << message->content << "\n"; + } + } + if (add_ass) { + ss << "Assistant: "; + } } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { // deepseek-ai/deepseek-coder-33b-instruct for (auto message : chat) { @@ -650,8 +708,8 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << " Ассистент:[SEP]"; } - } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { - // Bailing (Ling/Ring) template + } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) { + // Bailing (Ling) template for (auto message : chat) { std::string role(message->role); @@ -664,33 +722,6 @@ int32_t llm_chat_apply_template( ss << "" << role << "" << message->content; } - if (add_ass) { - ss << "ASSISTANT"; - - if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { - ss << ""; - } - } - } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { - // Bailing2 (Ling 2.0) template - bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; - - if (!has_system) { - ss << "SYSTEMdetailed thinking off<|role_end|>"; - } - - for (auto message : chat) { - std::string role(message->role); - - if (role == "user") { - role = "HUMAN"; - } else { - std::transform(role.begin(), role.end(), role.begin(), ::toupper); - } - - ss << "" << role << "" << message->content << "<|role_end|>"; - } - if (add_ass) { ss << "ASSISTANT"; } diff --git a/src/llama-chat.h b/src/llama-chat.h index da1b7c47997..44baee6337d 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -42,8 +42,6 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_MEGREZ, LLM_CHAT_TEMPLATE_YANDEX, LLM_CHAT_TEMPLATE_BAILING, - LLM_CHAT_TEMPLATE_BAILING_THINK, - LLM_CHAT_TEMPLATE_BAILING2, LLM_CHAT_TEMPLATE_LLAMA4, LLM_CHAT_TEMPLATE_SMOLVLM, LLM_CHAT_TEMPLATE_DOTS1, @@ -53,6 +51,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_KIMI_K2, LLM_CHAT_TEMPLATE_SEED_OSS, LLM_CHAT_TEMPLATE_GROK_2, + LLM_CHAT_TEMPLATE_TEUKEN, LLM_CHAT_TEMPLATE_UNKNOWN, }; diff --git a/src/llama-vocab-sentencepiece.cpp b/src/llama-vocab-sentencepiece.cpp new file mode 100644 index 00000000000..a5f2f399538 --- /dev/null +++ b/src/llama-vocab-sentencepiece.cpp @@ -0,0 +1,38 @@ +#include "sentencepiece_processor.h" +//#include "filesystem.h" +#include "llama-vocab-sentencepiece.h" +#include +#include +#include +#include + + + +std::unique_ptr processor; + +int sp_init(const std::string& sp_binary) + { + if (processor.get()==NULL) + { + processor.reset(new sentencepiece::SentencePieceProcessor); + const auto status = processor->LoadFromSerializedProto(sp_binary); + if (!status.ok()) { + //std::cerr << status.ToString() << std::endl; + // error + throw std::invalid_argument("sentencepiece not initialized"); + return 0; + } + } + return 1; + } + + int sp_encode(const std::string& str,std::vector& token_ids) + { + if (processor.get()==NULL) + { + throw std::invalid_argument("sentencepiece not initialized"); + } + + processor->Encode(str, &token_ids); + return 0; + } diff --git a/src/llama-vocab-sentencepiece.h b/src/llama-vocab-sentencepiece.h new file mode 100644 index 00000000000..04e87dd94b6 --- /dev/null +++ b/src/llama-vocab-sentencepiece.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +#include + + +int sp_init(const std::string& model); +int sp_encode(const std::string& str,std::vector& token_ids); diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 639fecbd317..c317e4fb826 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -4,7 +4,7 @@ #include "gguf.h" #include "llama-impl.h" #include "llama-model-loader.h" - +#include "llama-vocab-sentencepiece.h" #include "unicode.h" #include @@ -1103,6 +1103,44 @@ struct llm_tokenizer_ugm_session { const llm_tokenizer_ugm & tokenizer; }; + + +// +// Spie tokenizer +// + +struct llm_tokenizer_spie : llm_tokenizer { + llm_tokenizer_spie(const llama_vocab & vocab) { + //sp_init(); + } +}; + +struct llm_tokenizer_spie_session { + llm_tokenizer_spie_session(const llama_vocab & vocab, const llm_tokenizer_spie & tokenizer) : vocab(vocab), tokenizer(tokenizer) {} + + /* This implementation is based on SentencePiece optimized Viterbi algorithm for + * unigram language models. The general idea is to: + * - move along the input sequence in steps of one UTF code point, + * - at each step find all possible tokenizations of the prefix by + * traversing the tokens trie, + * - for each tokenization store the best one so far (by higher score) + * - use the position in sequence after given token as an index to store + * results + * - if there was no valid tokenization of the current UTF code point + * then use unknown token with additional score penalty + * After processing the whole sequence we backtrack from the end to get + * the best tokenization. + */ + void tokenize(const std::string & text, std::vector & output) { + sp_encode(text,output); + } + + const llama_vocab & vocab; + const llm_tokenizer_spie & tokenizer; +}; + + + // // RWKV tokenizer // @@ -1592,6 +1630,8 @@ struct llama_vocab::impl { std::vector precompiled_charsmap; + std::string sentencepiece_model; + impl(const llama_vocab & vocab) : vocab(vocab) { } @@ -1708,6 +1748,40 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = LLAMA_TOKEN_NULL; special_mask_id = LLAMA_TOKEN_NULL; + } else if (tokenizer_model == "sentencepiece") { + type = LLAMA_VOCAB_TYPE_SPIE; + // default special tokens + special_bos_id = 1; + special_eos_id = 2; + special_unk_id = 0; + special_sep_id = LLAMA_TOKEN_NULL; + special_pad_id = LLAMA_TOKEN_NULL; + special_mask_id = LLAMA_TOKEN_NULL; + + const int sentencepiece_model_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL).c_str()); + if (sentencepiece_model_keyidx != -1) { + const gguf_type pc_type = gguf_get_arr_type(ctx, sentencepiece_model_keyidx); + GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8); + + const size_t n_sentencepiece_model = gguf_get_arr_n(ctx, sentencepiece_model_keyidx); + const char * pc = (const char *) gguf_get_arr_data(ctx, sentencepiece_model_keyidx); + sentencepiece_model.assign(pc, pc + n_sentencepiece_model); + sp_init(sentencepiece_model); + + + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + // correct endiannes of data in precompiled_charsmap binary blob + uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0]; + *xcda_blob_size = __builtin_bswap32(*xcda_blob_size); + assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap); + size_t xcda_array_size = *xcda_blob_size / sizeof(uint32_t); + uint32_t * xcda_array = (uint32_t *) &precompiled_charsmap[sizeof(uint32_t)]; + for (size_t i = 0; i < xcda_array_size; ++i) { + xcda_array[i] = __builtin_bswap32(xcda_array[i]); + } +#endif + } } else if (tokenizer_model == "bert") { type = LLAMA_VOCAB_TYPE_WPM; @@ -1773,6 +1847,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx); precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap); + #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // correct endiannes of data in precompiled_charsmap binary blob uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0]; @@ -2012,7 +2087,12 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT; add_bos = false; add_eos = true; - } else if (type == LLAMA_VOCAB_TYPE_RWKV) { + } else if (type == LLAMA_VOCAB_TYPE_SPIE) { + pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT; + add_bos = false; + add_eos = false; + } + else if (type == LLAMA_VOCAB_TYPE_RWKV) { pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT; add_space_prefix = false; clean_spaces = false; @@ -2523,6 +2603,7 @@ std::string llama_vocab::impl::type_name() const{ case LLAMA_VOCAB_TYPE_UGM: return "UGM"; case LLAMA_VOCAB_TYPE_RWKV: return "RWKV"; case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2"; + case LLAMA_VOCAB_TYPE_SPIE: return "SPIE"; default: return "unknown"; } } @@ -2567,6 +2648,7 @@ uint8_t llama_vocab::impl::token_to_byte(llama_token id) const { const auto & token_data = id_to_token.at(id); switch (get_type()) { case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_SPIE: case LLAMA_VOCAB_TYPE_UGM: { auto buf = token_data.text.substr(3, 2); return strtol(buf.c_str(), NULL, 16); @@ -2603,6 +2685,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) { case LLAMA_VOCAB_TYPE_UGM: tokenizer = std::make_unique(vocab, precompiled_charsmap); break; + case LLAMA_VOCAB_TYPE_SPIE: + tokenizer = std::make_unique(vocab); + break; case LLAMA_VOCAB_TYPE_RWKV: tokenizer = std::make_unique(vocab); break; @@ -2929,6 +3014,39 @@ std::vector llama_vocab::impl::tokenize( "Are you sure this is what you want?\n", __FUNCTION__); } + if (add_special && add_eos) { + GGML_ASSERT(special_eos_id != LLAMA_TOKEN_NULL); + output.push_back(special_eos_id); + } + } break; + case LLAMA_VOCAB_TYPE_SPIE: + { + if (add_special && add_bos) { + GGML_ASSERT(special_bos_id != LLAMA_TOKEN_NULL); + output.push_back(special_bos_id); + } + + llm_tokenizer_spie_session session(vocab, *static_cast(tokenizer.get())); + + for (const auto & fragment : fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { + std::string text = fragment.raw_text.substr(fragment.offset, fragment.length); +#ifdef PRETOKENIZERDEBUG + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str()); +#endif + session.tokenize(text, output); + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) + output.push_back(fragment.token); + } + } + + if (add_special && add_bos && output.size() >= 2 && output[1] == special_bos_id) { + LLAMA_LOG_WARN( + "%s: Added a BOS token to the prompt as specified by the model but the prompt " + "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. " + "Are you sure this is what you want?\n", __FUNCTION__); + } + if (add_special && add_eos) { GGML_ASSERT(special_eos_id != LLAMA_TOKEN_NULL); output.push_back(special_eos_id); @@ -3016,6 +3134,7 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t switch (get_type()) { case LLAMA_VOCAB_TYPE_WPM: case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_SPIE: case LLAMA_VOCAB_TYPE_UGM: { // NOTE: we accept all unsupported token types, // suppressing them like CONTROL tokens. @@ -3304,6 +3423,7 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const { static const char * hex = "0123456789ABCDEF"; switch (get_type()) { case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_SPIE: case LLAMA_VOCAB_TYPE_UGM: { const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; auto token = pimpl->token_to_id.find(buf); diff --git a/src/unicode.cpp b/src/unicode.cpp index 65f36651715..908dde1da76 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -4,6 +4,7 @@ #include "unicode.h" #include "unicode-data.h" +#include "llama-vocab-sentencepiece.h" #include #include diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d9cc5e933f4..d8177a527ab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -108,6 +108,48 @@ function(llama_build_and_test source) set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL}) endfunction() +# Builds and runs a test source file. +# Optional args: +# - NAME: name of the executable & test target (defaults to the source file name without extension) +# - LABEL: label for the test (defaults to main) +# - ARGS: arguments to pass to the test executable +# - WORKING_DIRECTORY +function(llama_build_and_test_teuken source) + include(CMakeParseArguments) + set(options) + set(oneValueArgs NAME LABEL WORKING_DIRECTORY) + set(multiValueArgs ARGS) + cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if (NOT DEFINED LLAMA_TEST_LABEL) + set(LLAMA_TEST_LABEL "main") + endif() + if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY) + set(LLAMA_TEST_WORKING_DIRECTORY .) + endif() + if (DEFINED LLAMA_TEST_NAME) + set(TEST_TARGET ${LLAMA_TEST_NAME}) + else() + get_filename_component(TEST_TARGET ${source} NAME_WE) + endif() + + add_executable(${TEST_TARGET} ${source} get-model.cpp) + install(TARGETS ${TEST_TARGET} RUNTIME) + + target_include_directories(${TEST_TARGET} PUBLIC "C:/NHKI/llama/llama.cpp/common" "C:/NHKI/llama/sentencepiece/src" "C:/NHKI/llama/sentencepiece") + target_link_directories(${TEST_TARGET} PUBLIC "C:/NHKI/llama/sentencepiece/build/src/Debug") + + target_link_libraries(${TEST_TARGET} PRIVATE common sentencepiece ) + + add_test( + NAME ${TEST_TARGET} + WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY} + COMMAND $ + ${LLAMA_TEST_ARGS}) + + set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL}) +endfunction() + # build test-tokenizer-0 target once and add many tests llama_build(test-tokenizer-0.cpp) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index a5382ae3a35..a422043a583 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -97,7 +97,7 @@ int main(void) { }, { /* .name= */ "OrionStarAI/Orion-14B-Chat", - /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", + /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", /* .expected_output= */ "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", /* .bos_token= */ "", @@ -297,7 +297,16 @@ int main(void) { /* .expected_output_jinja= */ "system\nYou are a helpful assistantuser\nHelloassistant\nHi thereuser\nWho are youassistant\nI am an assistantuser\nAnother questionassistant\n", /* .bos_token= */ "", /* .eos_token= */ "", - } + }, + { + /* .name= */ "teuken", + // No template included in tokenizer_config.json, so this template likely needs to be manually set. + /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'System: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'User: ' + message['content'] + '\n'-}}{%- else -%}{{-'Assistant: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'Assistant:'-}}{%- endif -%}", + /* .expected_output= */ "System: You are a helpful assistant\nUser: Hello\nAssistant: Hi there\nUser: Who are you\nAssistant: I am an assistant \nUser: Another question\nAssistant:", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, }; std::vector formatted_chat(1024); int32_t res; @@ -321,6 +330,9 @@ int main(void) { for (const auto & test_case : test_cases) { printf("\n\n=== %s ===\n\n", test_case.name.c_str()); formatted_chat.resize(1024); + + printf("%s\n",test_case.template_str.c_str()); + res = llama_chat_apply_template( test_case.template_str.c_str(), conversation.data(), @@ -331,13 +343,15 @@ int main(void) { ); formatted_chat.resize(res); std::string output(formatted_chat.data(), formatted_chat.size()); - if (output != test_case.expected_output) { + if (output != test_case.expected_output) + { printf("Expected:\n%s\n", test_case.expected_output.c_str()); printf("-------------------------\n"); printf("Actual:\n%s\n", output.c_str()); fflush(stdout); assert(output == test_case.expected_output); } + return 0; } std::vector messages; diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp index 9095826fa98..2d6cb29bb9f 100644 --- a/tests/test-model-load-cancel.cpp +++ b/tests/test-model-load-cancel.cpp @@ -1,27 +1,277 @@ +//#include "llama.h" +//#include "get-model.h" +// +//#include +// +//int main(int argc, char *argv[] ) { +// auto * model_path = "C:\\NHKI\\mymodell\\kilocal\\Teuken-7.5B-F16d.gguf"; +// auto * file = fopen(model_path, "r"); +// if (file == nullptr) { +// fprintf(stderr, "no model at '%s' found\n", model_path); +// return EXIT_FAILURE; +// } +// +// fprintf(stderr, "using '%s'\n", model_path); +// fclose(file); +// +// llama_backend_init(); +// auto params = llama_model_params{}; +// //params.use_mmap = false; +// params.vocab_only=true; +// params.main_gpu=-1; +// params.n_gpu_layers=0; +// params.progress_callback = [](float progress, void * ctx){ +// (void) ctx; +// return progress > 0.50; +// }; +// auto * model = llama_model_load_from_file(model_path, params); +// +// model. +// +// llama_backend_free(); +// return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE; +//} +// + #include "llama.h" -#include "get-model.h" +#include "common.h" +#include "console.h" +#include "../src/llama-vocab-sentencepiece.h" +#include "../src/unicode.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -int main(int argc, char *argv[] ) { - auto * model_path = get_model_or_exit(argc, argv); - auto * file = fopen(model_path, "r"); - if (file == nullptr) { - fprintf(stderr, "no model at '%s' found\n", model_path); - return EXIT_FAILURE; - } +int main(int argc, char ** argv) { + + const std::string fname = "C:\\NHKI\\mymodell\\kilocal\\Teuken-7.5B-BF16-CM.gguf"; + + fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str()); - fprintf(stderr, "using '%s'\n", model_path); - fclose(file); + llama_model * model; + llama_context * ctx; llama_backend_init(); - auto params = llama_model_params{}; - params.use_mmap = false; - params.progress_callback = [](float progress, void * ctx){ - (void) ctx; - return progress > 0.50; - }; - auto * model = llama_model_load_from_file(model_path, params); + + // load the vocab + { + auto mparams = llama_model_default_params(); + + mparams.vocab_only = true; + + model = llama_model_load_from_file(fname.c_str(), mparams); + + if (model == NULL) { + fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); + return 1; + } + + auto cparams = llama_context_default_params(); + + ctx = llama_init_from_model(model, cparams); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); + llama_model_free(model); + return 1; + } + } + + const llama_vocab * vocab = llama_model_get_vocab(model); + +#ifdef _WIN32 + // We need this for unicode console support + console::init(false, false); + atexit([]() { console::cleanup(); }); +#endif + + const int n_vocab = llama_vocab_n_tokens(vocab); + + std::string str="…"; + str="n ⋅Ball"; + // str="Hallo wer bist Du?"; + + std::vector tokens = common_tokenize(ctx, str, false, true); + + std::vector tokensbos = common_tokenize(ctx, str,true, true); + + { + std::vector conversation { + {"user", "Hello, how are you?"}, + {"assistant", "I'm doing great. How can I help you today?"}, + {"user", "I'd like to show off how chat templating works!"}, + }; + + const char* cl=llama_model_chat_template(model,NULL); + + std::string formatted_chat; + + formatted_chat.resize(1024); + bool add_generation_prompt=false; + int res = llama_chat_apply_template( + cl, + conversation.data(), + conversation.size(), + add_generation_prompt, + formatted_chat.data(), + formatted_chat.size() + ); + formatted_chat.resize(res); + std::string output(formatted_chat.data(), formatted_chat.size()); + + std::string check= "System: A chat between a human and an artificial intelligence assistant.The assistant gives helpful and polite answers to the human's questions.\nUser: Hello, how are you?\nAssistant: I'm doing great. How can I help you today?\nUser: I'd like to show off how chat templating works!\n"; + if (check != output) + { + size_t z=check.size(); + size_t y=output.size(); + for (size_t i = 0; i < z && i < y; i++) + { + if (check[i] != output[i]) + { + char a=check[i]; + char b=output[i]; + } + } + } + } + + { + + std::vector conversation { + {"system", "DE"}, + {"user", "Wie geht es dir?"}, + {"assistant", "Mir geht es gut. Kann ich Dir helfen?"}, + {"user", "Ich möchte gerne wissen wie chat templates funktionieren!"}, + }; + + const char* cl=llama_model_chat_template(model,NULL); + + std::string formatted_chat; + + formatted_chat.resize(1024); + bool add_generation_prompt=false; + int res = llama_chat_apply_template( + cl, + conversation.data(), + conversation.size(), + add_generation_prompt, + formatted_chat.data(), + formatted_chat.size() + ); + formatted_chat.resize(res); + std::string output(formatted_chat.data(), formatted_chat.size()); + + std::string check= "System: Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen.\nUser: Wie geht es dir?\nAssistant: Mir geht es gut. Kann ich Dir helfen?\nUser: Ich möchte gerne wissen wie chat templates funktionieren!\n"; + if (check != output) + { + size_t z=check.size(); + size_t y=output.size(); + for (size_t i = 0; i < z && i < y; i++) + { + if (check[i] != output[i]) + { + char a=check[i]; + char b=output[i]; + } + } + + } + } + + + + { + + std::vector conversation { + {"system", "DE"}, + {"user", "Wie geht es dir?"}, + {"assistant", "Mir geht es gut. Kann ich Dir helfen?"}, + {"user", "Ich möchte gerne wissen wie chat templates funktionieren!"}, + }; + + const char* cl=llama_model_chat_template(model,NULL); + + std::string formatted_chat; + + formatted_chat.resize(1024); + bool add_generation_prompt=true; + int res = llama_chat_apply_template( + cl, + conversation.data(), + conversation.size(), + add_generation_prompt, + formatted_chat.data(), + formatted_chat.size() + ); + formatted_chat.resize(res); + std::string output(formatted_chat.data(), formatted_chat.size()); + + std::string check="System: Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen.\nUser: Wie geht es dir?\nAssistant: Mir geht es gut. Kann ich Dir helfen?\nUser: Ich möchte gerne wissen wie chat templates funktionieren!\nAssistant: "; + + if (check != output) + { + size_t z=check.size(); + size_t y=output.size(); + for (size_t i = 0; i < z && i < y; i++) + { + if (check[i] != output[i]) + { + char a=check[i]; + char b=output[i]; + } + } + + } + } + + // std::vector token_ids; + // sp_encode(str,token_ids); + + //{ + // sentencepiece::SentencePieceProcessor processor; + // const auto status = processor.Load("C:/NHKI/mymodell/kilocal/teuken/tokenizer.model"); + // if (!status.ok()) { + // //std::cerr << status.ToString() << std::endl; + // // error + // } + // std::vector token_ids; + // processor.Encode(str, &token_ids); + // for (const int id : token_ids) { + // // std::cout << token << std::endl; + // } + //} + //{ + // std::string filename="C:/NHKI/mymodell/kilocal/teuken/tokenizer.model"; + // auto input = sentencepiece::filesystem::NewReadableFile(filename, true); + // std::string serialized; + // if (!input->ReadAll(&serialized)) { + // // hh + // } + // sentencepiece::SentencePieceProcessor processor; + // const auto status = processor.LoadFromSerializedProto(serialized); + // std::vector token_ids; + // processor.Encode(str, &token_ids); + // for (const int id : token_ids) { + // // std::cout << token << std::endl; + // } + //} + + + //auto model_proto = std::make_unique(); + + //model_proto->ParseFromArray(serialized.data(), serialized.size()) + + llama_model_free(model); + llama_free(ctx); + llama_backend_free(); - return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE; + + return 0; }