diff --git a/README.md b/README.md
index f4206e8d45d..c4dc18a306b 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,52 @@
+# llama.cpp with sentencepiece
+
+This fork aims to enhance llama.cpp by integrating the SentencePiece library as a tokenizer, enabling more flexible and language-agnostic tokenization for LLM inference. By using SentencePiece, this project supports advanced tokenization strategies, improves compatibility with a wider range of models, and simplifies workflows for users who require custom or multilingual tokenization. The scope of the fork includes adding a new tokenizer option, supporting SentencePiece model blobs, and implementing chat templates for specific models such as "Teuken 7B".
+
+It introduces a new tokenizer named `sentencepiece` for the key `tokenizer.ggml.model`. In this case, the key `tokenizer.ggml.sentencepiece` must contain the binary blob with the SentencePiece model. Tokenization is performed by the SentencePiece library instead of the built-in algorithms in `llama.cpp`.
+
+Additionally, this fork implements the chat template used by the LLM "Teuken 7B".
+
+## Using llama.cpp with sentencepiece
+
+### Setup for Windows
+
+In the release section, you will find a setup for Windows containing `llama.cpp` with SentencePiece support and "Teuken 7B".
+After install browser opens with the chat ui of `llama.cpp` and "Teuken 7B" as the llm.
+
+### Installing from the source
+
+First install sentencepiece static library with headers for your OS and compiler (see sentencepice documentation).
+
+Clone the llama.cpp with sentencepiece repository:
+
+```sh
+git clone https://github.com/awenzel67/llama.cpp.git
+cd llama.cpp
+git switch teuken
+```
+
+Configure the build (see sentencepiece for details):
+```sh
+cmake -B buildFullCuda -DCURL_INCLUDE_DIR=C:/Del/vcpkg/installed/x64-windows/include -DCURL_LIBRARY=C:/Del/vcpkg/installed/x64-windows/lib/libcurl.lib -DSPIE_INCLUDE_DIR=C:\NHKI\llama\sentencepiece\src -DSPIE_LIBRARY=C:\NHKI\llama\sentencepiece\build\src\Release\sentencepiece.lib -DGGML_CUDA=ON
+```
+
+The cmake commands contains a variable to specify the include directory for sentencepiece library: 
+```sh
+-DSPIE_INCLUDE_DIR=C:\NHKI\llama\sentencepiece\src
+```
+
+The cmake commands contains a variable to specify the path to the static sentencepiece library: 
+```sh
+-DSPIE_LIBRARY=C:\NHKI\llama\sentencepiece\build\src\Release\sentencepiece.lib
+```
+Now you can use the common llama.cpp tools like llama-cli or llama-server.
+
+You can use all models for llama.cpp. Additional the following Teuken 7B ggufs can be used:
+
+- Teuken-7.5B-BF16-CM.gguf
+- Teuken-7.5B-Q4_K_M.gguf
+
+
 # llama.cpp
 
 ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
diff --git a/include/llama.h b/include/llama.h
index a0a660bff88..00e1bda644b 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -76,6 +76,7 @@ extern "C" {
         LLAMA_VOCAB_TYPE_UGM    = 4, // T5 tokenizer based on Unigram
         LLAMA_VOCAB_TYPE_RWKV   = 5, // RWKV tokenizer based on greedy tokenization
         LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
+        LLAMA_VOCAB_TYPE_SPIE   = 7, // TEUKEN tokenizer based on SentencePiece
     };
 
     enum llama_rope_type {
diff --git a/innosetup.iss b/innosetup.iss
new file mode 100644
index 00000000000..0d080b1c241
--- /dev/null
+++ b/innosetup.iss
@@ -0,0 +1,77 @@
+; Script generated by the Inno Setup Script Wizard.
+; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
+; Non-commercial use only
+
+#define MyAppName "llama.cpp-teuken"
+#define MyAppVersion "1.5"
+#define MyAppPublisher "awenzel67"
+#define MyAppURL "https://github.com/awenzel67/llama.cpp"
+#define MyAppExeName "runteuken.bat"
+#define MyAppAssocName MyAppName + " File"
+#define MyAppAssocExt ".myp"
+#define MyAppAssocKey StringChange(MyAppAssocName, " ", "") + MyAppAssocExt
+
+[Setup]
+; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications.
+; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
+AppId={{56F7611F-2A10-49B9-B3A8-B627CA731E2B}
+AppName={#MyAppName}
+AppVersion={#MyAppVersion}
+;AppVerName={#MyAppName} {#MyAppVersion}
+AppPublisher={#MyAppPublisher}
+AppPublisherURL={#MyAppURL}
+AppSupportURL={#MyAppURL}
+AppUpdatesURL={#MyAppURL}
+DefaultDirName={autopf}\{#MyAppName}
+UninstallDisplayIcon={app}\{#MyAppExeName}
+; "ArchitecturesAllowed=x64compatible" specifies that Setup cannot run
+; on anything but x64 and Windows 11 on Arm.
+ArchitecturesAllowed=x64compatible
+; "ArchitecturesInstallIn64BitMode=x64compatible" requests that the
+; install be done in "64-bit mode" on x64 or Windows 11 on Arm,
+; meaning it should use the native 64-bit Program Files directory and
+; the 64-bit view of the registry.
+ArchitecturesInstallIn64BitMode=x64compatible
+ChangesAssociations=yes
+DisableProgramGroupPage=yes
+; Uncomment the following line to run in non administrative install mode (install for current user only).
+;PrivilegesRequired=lowest
+OutputDir=C:\Del\tk
+OutputBaseFilename=llama.cpp-teuken
+;SolidCompression=yes
+WizardStyle=modern dynamic
+DiskSpanning=yes
+[Languages]
+Name: "english"; MessagesFile: "compiler:Default.isl"
+Name: "german"; MessagesFile: "compiler:Languages\German.isl"
+
+[Tasks]
+Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
+
+[Files]
+Source: "C:\NHKI\mymodell\kilocal\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\llama-server.exe"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-base.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-cpu.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\ggml-cuda.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\libcurl.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\llama.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\mtmd.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\llama\llama.cpp\buildFullCuda\bin\Release\zlib1.dll"; DestDir: "{app}"; Flags: ignoreversion
+Source: "C:\NHKI\mymodell\kilocal\Teuken-7.5B-Q4_K_M.gguf"; DestDir: "{app}"; Flags: ignoreversion
+; NOTE: Don't use "Flags: ignoreversion" on any shared system files
+
+[Registry]
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocExt}\OpenWithProgids"; ValueType: string; ValueName: "{#MyAppAssocKey}"; ValueData: ""; Flags: uninsdeletevalue
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}"; ValueType: string; ValueName: ""; ValueData: "{#MyAppAssocName}"; Flags: uninsdeletekey
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\DefaultIcon"; ValueType: string; ValueName: ""; ValueData: "{app}\{#MyAppExeName},0"
+Root: HKA; Subkey: "Software\Classes\{#MyAppAssocKey}\shell\open\command"; ValueType: string; ValueName: ""; ValueData: """{app}\{#MyAppExeName}"" ""%1"""
+
+[Icons]
+Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
+Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
+
+;[Run]
+;Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: postinstall skipifsilent
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 18cfc76564d..1f9d28fd232 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,16 +32,21 @@ add_library(llama
             llama-quant.cpp
             llama-sampling.cpp
             llama-vocab.cpp
+            llama-vocab-sentencepiece.cpp
             unicode-data.cpp
             unicode.cpp
             unicode.h
             )
 
 target_include_directories(llama PRIVATE .)
-target_include_directories(llama PUBLIC ../include)
-target_compile_features   (llama PRIVATE cxx_std_17) # don't bump
+target_include_directories(llama PUBLIC ../include ${SPIE_INCLUDE_DIR} ${SPIE_INCLUDE_DIR}/..)
+#target_link_directories(${TEST_TARGET} PRIVATE "C:/NHKI/llama/sentencepiece/build/src/Debug")
 
-target_link_libraries(llama PUBLIC ggml)
+target_compile_features   (llama PRIVATE cxx_std_17) # don't bump
+message(SPIE_LIBRARY="${SPIE_LIBRARY}")
+message(SPIE_LIBRARY="${SPIE_INCLUDE_DIR}")
+#target_link_libraries(llama PUBLIC ggml "C:/NHKI/llama/sentencepiece/build/src/Release/sentencepiece.lib")
+target_link_libraries(llama PUBLIC ggml ${SPIE_LIBRARY})
 
 if (BUILD_SHARED_LIBS)
     set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index 8ca769c5fd2..0abea7a22c2 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -251,6 +251,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
     { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
     { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
+    { LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL,  "tokenizer.ggml.sentencepiece_model"      },
     { LLM_KV_TOKENIZER_HF_JSON,              "tokenizer.huggingface.json"              },
     { LLM_KV_TOKENIZER_RWKV,                 "tokenizer.rwkv.world"                    },
     { LLM_KV_TOKENIZER_CHAT_TEMPLATE,        "tokenizer.chat_template"                 },
diff --git a/src/llama-arch.h b/src/llama-arch.h
index dea725c1a75..dd659bd8293 100644
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -240,6 +240,7 @@ enum llm_kv {
     LLM_KV_TOKENIZER_ADD_PREFIX,
     LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
     LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
+    LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL,
     LLM_KV_TOKENIZER_HF_JSON,
     LLM_KV_TOKENIZER_RWKV,
     LLM_KV_TOKENIZER_CHAT_TEMPLATE,
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index 0285006d73c..e8d0499be77 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -63,8 +63,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
     { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
     { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
-    { "bailing-think",     LLM_CHAT_TEMPLATE_BAILING_THINK     },
-    { "bailing2",          LLM_CHAT_TEMPLATE_BAILING2          },
     { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
     { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
     { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
@@ -73,6 +71,35 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
     { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
     { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
+    { "teuken",            LLM_CHAT_TEMPLATE_TEUKEN            },
+};
+
+
+static const std::map<std::string,std::string> LLM_TEUKEN_SYSTEM = {
+     {"BG", "Чат между човек и асистент с изкуствен интелект. Асистентът дава полезни и учтиви отговори на въпросите на човека."},
+     {"CS", "Chat mezi člověkem a asistentem s umělou inteligencí. Asistent poskytuje vstřícné a zdvořilé odpovědi na otázky člověka."},
+     {"DA", "En chat mellem et menneske og en assistent med kunstig intelligens, som giver hjælpsomme og høflige svar på menneskets spørgsmål."},
+     {"DE", "Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen."},
+     {"EL", "Μια συνομιλία μεταξύ ενός ανθρώπου και ενός βοηθού τεχνητής νοημοσύνης. Ο βοηθός δίνει χρήσιμες και ευγενικές απαντήσεις στις ερωτήσεις του ανθρώπου."},
+     {"EN", "A chat between a human and an artificial intelligence assistant.The assistant gives helpful and polite answers to the human's questions."},
+     {"ES", "Una conversación entre un humano y un asistente de inteligencia artificial. El asistente da respuestas útiles y amables a las preguntas del humano."},
+     {"ET", "Inimese ja tehisintellekti assistendi vaheline vestlus. Assistent annab inimese küsimustele abivalmis ja viisakaid vastuseid."},
+     {"FI", "Ihmisen ja tekoälyavustajan välinen keskustelu. Avustaja antaa avuliaita ja kohteliaita vastauksia ihmisen kysymyksiin."},
+     {"FR", "Conversation entre un humain et un assistant doté d'une intelligence artificielle. L'assistant donne des réponses utiles et polies aux questions de l'homme."},
+     {"GA", "Comhrá idir duine agus cúntóir hintleachta saorga. Tugann an cúntóir freagraí cabhracha dea-bhéasacha ar cheisteanna an duine."},
+     {"HR", "Razgovor između čovjeka i pomoćnika umjetne inteligencije. Pomoćnik daje korisne i ljubazne odgovore na ljudska pitanja."},
+     {"HU", "Egy ember és egy mesterséges intelligencia asszisztens közötti beszélgetés. Az asszisztens segítőkész és udvarias válaszokat ad az ember kérdéseire."},
+     {"IT", "Una chat tra un umano e un assistente di intelligenza artificiale. L'assistente fornisce risposte utili ed educate alle domande dell'uomo."},
+     {"LT", "Žmogaus ir dirbtinio intelekto asistento pokalbis. Asistentas naudingai ir mandagiai atsako į žmogaus klausimus."},
+     {"LV", "Cilvēka un mākslīgā intelekta asistenta tērzēšana. Asistents sniedz noderīgas un pieklājīgas atbildes uz cilvēka jautājumiem."},
+     {"MT", "Chat bejn bniedem u assistent ta' intelliġenza artifiċjali. L-assistent jagħti tweġibiet ta' għajnuna u edukat għall-mistoqsijiet tal-bniedem."},
+     {"NL", "Een chat tussen een mens en een assistent met kunstmatige intelligentie. De assistent geeft behulpzame en beleefde antwoorden op de vragen van de mens."},
+     {"PL", "Czat między człowiekiem a asystentem sztucznej inteligencji. Asystent udziela pomocnych i uprzejmych odpowiedzi na pytania człowieka."},
+     {"PT", "Uma conversa entre um ser humano e um assistente de inteligência artificial. O assistente dá respostas úteis e educadas às perguntas do utilizador."},
+     {"RO", "O conversație între un om și un asistent cu inteligență artificială. Asistentul oferă răspunsuri utile și politicoase la întrebările omului."}, 
+     {"SK", "Rozhovor medzi človekom a asistentom s umelou inteligenciou. Asistent poskytuje užitočné a zdvorilé odpovede na otázky človeka."},
+     {"SL", "Pogovor med človekom in pomočnikom z umetno inteligenco. Pomočnik človeku prijazno in vljudno odgovarja na njegova vprašanja."},
+     {"SV", "En chatt mellan en människa och en assistent med artificiell intelligens. Assistenten ger hjälpsamma och artiga svar på människans frågor."}
 };
 
 llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -156,6 +183,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
             return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
         }
         return LLM_CHAT_TEMPLATE_VICUNA;
+    } else if (tmpl_contains("User: ") && tmpl_contains("Assistant: ") && tmpl_contains("System: ")) {
+        return LLM_CHAT_TEMPLATE_TEUKEN;
     } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
         // deepseek-ai/deepseek-coder-33b-instruct
         return LLM_CHAT_TEMPLATE_DEEPSEEK;
@@ -193,10 +222,6 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
         return LLM_CHAT_TEMPLATE_YANDEX;
     } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
         return LLM_CHAT_TEMPLATE_BAILING;
-    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
-        return LLM_CHAT_TEMPLATE_BAILING_THINK;
-    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
-        return LLM_CHAT_TEMPLATE_BAILING2;
     } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
         return LLM_CHAT_TEMPLATE_LLAMA4;
     } else if (tmpl_contains("<|endofuserprompt|>")) {
@@ -430,6 +455,39 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << "ASSISTANT:";
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_TEUKEN) {
+        // eachadea/vicuna-13b-1.1 (and Orca variant)
+        bool isSysOut=false;
+        for (auto message : chat) {
+            std::string role(message->role);
+            if (role == "system") {
+               const std::string lang=trim( message->content);
+               if(LLM_TEUKEN_SYSTEM.find(lang)==LLM_TEUKEN_SYSTEM.end())
+               {
+                   std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find("EN"))).second;
+                   ss << "System: " << teuken_system << "\n";
+               }
+               else
+               {
+                   std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find(lang))).second;
+                   ss << "System: " << teuken_system << "\n";
+               }
+              isSysOut=true;
+            } else if (role == "user") {
+                if (!isSysOut)
+                {
+                   std::string teuken_system=(*(LLM_TEUKEN_SYSTEM.find("EN"))).second;
+                   ss << "System: " << teuken_system << "\n";
+                   isSysOut=true;
+                }
+                ss << "User: " << message->content << "\n";
+            } else if (role == "assistant") {
+                ss << "Assistant: " << message->content << "</s>\n";
+            }
+        }
+        if (add_ass) {
+            ss << "Assistant: ";
+        }
     } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
         // deepseek-ai/deepseek-coder-33b-instruct
         for (auto message : chat) {
@@ -650,8 +708,8 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << " Ассистент:[SEP]";
         }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
-        // Bailing (Ling/Ring) template
+    }  else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
+        // Bailing (Ling) template
         for (auto message : chat) {
             std::string role(message->role);
 
@@ -664,33 +722,6 @@ int32_t llm_chat_apply_template(
             ss << "<role>" << role << "</role>" << message->content;
         }
 
-        if (add_ass) {
-            ss << "<role>ASSISTANT</role>";
-
-            if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
-                ss << "<think>";
-            }
-        }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
-        // Bailing2 (Ling 2.0) template
-        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
-
-        if (!has_system) {
-            ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
-        }
-
-        for (auto message : chat) {
-            std::string role(message->role);
-
-            if (role == "user") {
-                role = "HUMAN";
-            } else {
-                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
-            }
-
-            ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
-        }
-
         if (add_ass) {
             ss << "<role>ASSISTANT</role>";
         }
diff --git a/src/llama-chat.h b/src/llama-chat.h
index da1b7c47997..44baee6337d 100644
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -42,8 +42,6 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_MEGREZ,
     LLM_CHAT_TEMPLATE_YANDEX,
     LLM_CHAT_TEMPLATE_BAILING,
-    LLM_CHAT_TEMPLATE_BAILING_THINK,
-    LLM_CHAT_TEMPLATE_BAILING2,
     LLM_CHAT_TEMPLATE_LLAMA4,
     LLM_CHAT_TEMPLATE_SMOLVLM,
     LLM_CHAT_TEMPLATE_DOTS1,
@@ -53,6 +51,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_KIMI_K2,
     LLM_CHAT_TEMPLATE_SEED_OSS,
     LLM_CHAT_TEMPLATE_GROK_2,
+    LLM_CHAT_TEMPLATE_TEUKEN,
     LLM_CHAT_TEMPLATE_UNKNOWN,
 };
 
diff --git a/src/llama-vocab-sentencepiece.cpp b/src/llama-vocab-sentencepiece.cpp
new file mode 100644
index 00000000000..a5f2f399538
--- /dev/null
+++ b/src/llama-vocab-sentencepiece.cpp
@@ -0,0 +1,38 @@
+#include "sentencepiece_processor.h"
+//#include "filesystem.h"
+#include "llama-vocab-sentencepiece.h"
+#include <string>
+#include <vector>
+#include <memory>
+#include <stdexcept>
+
+
+
+std::unique_ptr<sentencepiece::SentencePieceProcessor> processor;
+
+int sp_init(const std::string& sp_binary)
+ {
+        if (processor.get()==NULL)
+        {
+                processor.reset(new sentencepiece::SentencePieceProcessor);
+                const auto status = processor->LoadFromSerializedProto(sp_binary);
+                if (!status.ok()) {
+                    //std::cerr << status.ToString() << std::endl;
+                    // error
+                     throw  std::invalid_argument("sentencepiece not initialized");
+                    return 0;
+               }
+        }
+        return 1;
+  }
+
+ int sp_encode(const std::string& str,std::vector<int32_t>& token_ids)
+ {
+        if (processor.get()==NULL)
+        {
+               throw  std::invalid_argument("sentencepiece not initialized");
+        }
+       
+        processor->Encode(str, &token_ids);
+        return 0;
+  }
diff --git a/src/llama-vocab-sentencepiece.h b/src/llama-vocab-sentencepiece.h
new file mode 100644
index 00000000000..04e87dd94b6
--- /dev/null
+++ b/src/llama-vocab-sentencepiece.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <memory>
+
+
+int sp_init(const std::string& model);
+int sp_encode(const std::string& str,std::vector<int32_t>& token_ids);
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 639fecbd317..c317e4fb826 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -4,7 +4,7 @@
 #include "gguf.h"
 #include "llama-impl.h"
 #include "llama-model-loader.h"
-
+#include "llama-vocab-sentencepiece.h"
 #include "unicode.h"
 
 #include <algorithm>
@@ -1103,6 +1103,44 @@ struct llm_tokenizer_ugm_session {
     const llm_tokenizer_ugm & tokenizer;
 };
 
+
+
+//
+// Spie tokenizer
+//
+
+struct llm_tokenizer_spie : llm_tokenizer {
+    llm_tokenizer_spie(const llama_vocab & vocab) {
+       //sp_init();
+    }
+};
+
+struct llm_tokenizer_spie_session {
+    llm_tokenizer_spie_session(const llama_vocab & vocab, const llm_tokenizer_spie & tokenizer) : vocab(vocab), tokenizer(tokenizer) {}
+
+    /* This implementation is based on SentencePiece optimized Viterbi algorithm for
+     * unigram language models. The general idea is to:
+     * - move along the input sequence in steps of one UTF code point,
+     * - at each step find all possible tokenizations of the prefix by
+     *   traversing the tokens trie,
+     * - for each tokenization store the best one so far (by higher score)
+     * - use the position in sequence after given token as an index to store
+     *   results
+     * - if there was no valid tokenization of the current UTF code point
+     *   then use unknown token with additional score penalty
+     * After processing the whole sequence we backtrack from the end to get
+     * the best tokenization.
+    */
+    void tokenize(const std::string & text, std::vector<llama_token> & output) {
+        sp_encode(text,output);
+    }
+
+    const llama_vocab & vocab;
+    const llm_tokenizer_spie & tokenizer;
+};
+
+
+
 //
 // RWKV tokenizer
 //
@@ -1592,6 +1630,8 @@ struct llama_vocab::impl {
 
     std::vector<char> precompiled_charsmap;
 
+    std::string sentencepiece_model;
+
     impl(const llama_vocab & vocab) : vocab(vocab) {
     }
 
@@ -1708,6 +1748,40 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             special_sep_id  = LLAMA_TOKEN_NULL;
             special_pad_id  = LLAMA_TOKEN_NULL;
             special_mask_id = LLAMA_TOKEN_NULL;
+       } else if (tokenizer_model == "sentencepiece") {
+            type = LLAMA_VOCAB_TYPE_SPIE;
+            // default special tokens
+            special_bos_id  = 1;
+            special_eos_id  = 2;
+            special_unk_id  = 0;
+            special_sep_id  = LLAMA_TOKEN_NULL;
+            special_pad_id  = LLAMA_TOKEN_NULL;
+            special_mask_id = LLAMA_TOKEN_NULL;
+
+            const int sentencepiece_model_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SENTENCEPIECE_MODEL).c_str());
+            if (sentencepiece_model_keyidx != -1) {
+                const gguf_type pc_type = gguf_get_arr_type(ctx, sentencepiece_model_keyidx);
+                GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
+
+                const size_t n_sentencepiece_model = gguf_get_arr_n(ctx, sentencepiece_model_keyidx);
+                const char * pc = (const char *) gguf_get_arr_data(ctx, sentencepiece_model_keyidx);
+                sentencepiece_model.assign(pc, pc + n_sentencepiece_model);
+                sp_init(sentencepiece_model);
+
+
+           
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+                // correct endiannes of data in precompiled_charsmap binary blob
+                uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
+                *xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
+                assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap);
+                size_t xcda_array_size = *xcda_blob_size / sizeof(uint32_t);
+                uint32_t * xcda_array = (uint32_t *) &precompiled_charsmap[sizeof(uint32_t)];
+                for (size_t i = 0; i < xcda_array_size; ++i) {
+                    xcda_array[i] = __builtin_bswap32(xcda_array[i]);
+                }
+#endif
+            }
         } else if (tokenizer_model == "bert") {
             type = LLAMA_VOCAB_TYPE_WPM;
 
@@ -1773,6 +1847,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
                 const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
                 precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
+
 #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
                 // correct endiannes of data in precompiled_charsmap binary blob
                 uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
@@ -2012,7 +2087,12 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
             add_bos = false;
             add_eos = true;
-        } else if (type == LLAMA_VOCAB_TYPE_RWKV) {
+        } else if (type == LLAMA_VOCAB_TYPE_SPIE) {
+            pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+            add_bos = false;
+            add_eos = false;
+        }
+        else if (type == LLAMA_VOCAB_TYPE_RWKV) {
             pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
             add_space_prefix = false;
             clean_spaces = false;
@@ -2523,6 +2603,7 @@ std::string llama_vocab::impl::type_name() const{
         case LLAMA_VOCAB_TYPE_UGM:    return "UGM";
         case LLAMA_VOCAB_TYPE_RWKV:   return "RWKV";
         case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2";
+        case LLAMA_VOCAB_TYPE_SPIE:   return "SPIE";
         default:                      return "unknown";
     }
 }
@@ -2567,6 +2648,7 @@ uint8_t llama_vocab::impl::token_to_byte(llama_token id) const {
     const auto & token_data = id_to_token.at(id);
     switch (get_type()) {
         case LLAMA_VOCAB_TYPE_SPM:
+        case LLAMA_VOCAB_TYPE_SPIE:
         case LLAMA_VOCAB_TYPE_UGM: {
             auto buf = token_data.text.substr(3, 2);
             return strtol(buf.c_str(), NULL, 16);
@@ -2603,6 +2685,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
         case LLAMA_VOCAB_TYPE_UGM:
             tokenizer = std::make_unique<llm_tokenizer_ugm>(vocab, precompiled_charsmap);
             break;
+       case LLAMA_VOCAB_TYPE_SPIE:
+            tokenizer = std::make_unique<llm_tokenizer_spie>(vocab);
+            break;
         case LLAMA_VOCAB_TYPE_RWKV:
             tokenizer = std::make_unique<llm_tokenizer_rwkv>(vocab);
             break;
@@ -2929,6 +3014,39 @@ std::vector<llama_token> llama_vocab::impl::tokenize(
                         "Are you sure this is what you want?\n", __FUNCTION__);
                 }
 
+                if (add_special && add_eos) {
+                    GGML_ASSERT(special_eos_id != LLAMA_TOKEN_NULL);
+                    output.push_back(special_eos_id);
+                }
+            } break;
+       case LLAMA_VOCAB_TYPE_SPIE:
+            {
+                if (add_special && add_bos) {
+                    GGML_ASSERT(special_bos_id != LLAMA_TOKEN_NULL);
+                    output.push_back(special_bos_id);
+                }
+
+                llm_tokenizer_spie_session session(vocab, *static_cast<const llm_tokenizer_spie *>(tokenizer.get()));
+
+                for (const auto & fragment : fragment_buffer) {
+                    if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
+                        std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
+#ifdef PRETOKENIZERDEBUG
+                        LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
+#endif
+                        session.tokenize(text, output);
+                    } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
+                        output.push_back(fragment.token);
+                    }
+                }
+
+                if (add_special && add_bos && output.size() >= 2 && output[1] == special_bos_id) {
+                    LLAMA_LOG_WARN(
+                        "%s: Added a BOS token to the prompt as specified by the model but the prompt "
+                        "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
+                        "Are you sure this is what you want?\n", __FUNCTION__);
+                }
+
                 if (add_special && add_eos) {
                     GGML_ASSERT(special_eos_id != LLAMA_TOKEN_NULL);
                     output.push_back(special_eos_id);
@@ -3016,6 +3134,7 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
         switch (get_type()) {
             case LLAMA_VOCAB_TYPE_WPM:
             case LLAMA_VOCAB_TYPE_SPM:
+            case LLAMA_VOCAB_TYPE_SPIE:
             case LLAMA_VOCAB_TYPE_UGM: {
                 // NOTE: we accept all unsupported token types,
                 // suppressing them like CONTROL tokens.
@@ -3304,6 +3423,7 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const {
     static const char * hex = "0123456789ABCDEF";
     switch (get_type()) {
         case LLAMA_VOCAB_TYPE_SPM:
+        case LLAMA_VOCAB_TYPE_SPIE:
         case LLAMA_VOCAB_TYPE_UGM: {
             const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
             auto token = pimpl->token_to_id.find(buf);
diff --git a/src/unicode.cpp b/src/unicode.cpp
index 65f36651715..908dde1da76 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -4,6 +4,7 @@
 
 #include "unicode.h"
 #include "unicode-data.h"
+#include "llama-vocab-sentencepiece.h"
 
 #include <algorithm>
 #include <cassert>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d9cc5e933f4..d8177a527ab 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -108,6 +108,48 @@ function(llama_build_and_test source)
     set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
 endfunction()
 
+# Builds and runs a test source file.
+# Optional args:
+# - NAME: name of the executable & test target (defaults to the source file name without extension)
+# - LABEL: label for the test (defaults to main)
+# - ARGS: arguments to pass to the test executable
+# - WORKING_DIRECTORY
+function(llama_build_and_test_teuken source)
+    include(CMakeParseArguments)
+    set(options)
+    set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
+    set(multiValueArgs ARGS)
+    cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if (NOT DEFINED LLAMA_TEST_LABEL)
+        set(LLAMA_TEST_LABEL "main")
+    endif()
+    if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
+        set(LLAMA_TEST_WORKING_DIRECTORY .)
+    endif()
+    if (DEFINED LLAMA_TEST_NAME)
+        set(TEST_TARGET ${LLAMA_TEST_NAME})
+    else()
+        get_filename_component(TEST_TARGET ${source} NAME_WE)
+    endif()
+
+    add_executable(${TEST_TARGET} ${source} get-model.cpp)
+    install(TARGETS ${TEST_TARGET} RUNTIME)
+
+    target_include_directories(${TEST_TARGET} PUBLIC "C:/NHKI/llama/llama.cpp/common" "C:/NHKI/llama/sentencepiece/src" "C:/NHKI/llama/sentencepiece")
+    target_link_directories(${TEST_TARGET} PUBLIC "C:/NHKI/llama/sentencepiece/build/src/Debug")
+
+    target_link_libraries(${TEST_TARGET} PRIVATE common sentencepiece )
+
+    add_test(
+        NAME ${TEST_TARGET}
+        WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
+        COMMAND $<TARGET_FILE:${TEST_TARGET}>
+        ${LLAMA_TEST_ARGS})
+
+    set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
+endfunction()
+
 # build test-tokenizer-0 target once and add many tests
 llama_build(test-tokenizer-0.cpp)
 
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index a5382ae3a35..a422043a583 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -97,7 +97,7 @@ int main(void) {
         },
         {
             /* .name= */ "OrionStarAI/Orion-14B-Chat",
-            /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
+            /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
             /* .expected_output= */       "Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
             /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
             /* .bos_token= */ "",
@@ -297,7 +297,16 @@ int main(void) {
             /* .expected_output_jinja= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
             /* .bos_token= */ "<seed:bos>",
             /* .eos_token= */ "<seed:eos>",
-        }
+        },
+        {
+            /* .name= */ "teuken",
+            // No template included in tokenizer_config.json, so this template likely needs to be manually set.
+            /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'System: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'User: ' + message['content'] + '\n'-}}{%- else -%}{{-'Assistant: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'Assistant:'-}}{%- endif -%}",
+            /* .expected_output= */ "System: You are a helpful assistant\nUser: Hello\nAssistant: Hi there</s>\nUser: Who are you\nAssistant:    I am an assistant   </s>\nUser: Another question\nAssistant:",
+            /* .expected_output_jinja= */ "",
+            /* .bos_token= */ "",
+            /* .eos_token= */ "",
+        },
     };
     std::vector<char> formatted_chat(1024);
     int32_t res;
@@ -321,6 +330,9 @@ int main(void) {
     for (const auto & test_case : test_cases) {
         printf("\n\n=== %s ===\n\n", test_case.name.c_str());
         formatted_chat.resize(1024);
+
+        printf("%s\n",test_case.template_str.c_str());
+
         res = llama_chat_apply_template(
             test_case.template_str.c_str(),
             conversation.data(),
@@ -331,13 +343,15 @@ int main(void) {
         );
         formatted_chat.resize(res);
         std::string output(formatted_chat.data(), formatted_chat.size());
-        if (output != test_case.expected_output) {
+        if (output != test_case.expected_output)
+        {
             printf("Expected:\n%s\n", test_case.expected_output.c_str());
             printf("-------------------------\n");
             printf("Actual:\n%s\n", output.c_str());
             fflush(stdout);
             assert(output == test_case.expected_output);
         }
+        return 0;
     }
 
     std::vector<common_chat_msg> messages;
diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp
index 9095826fa98..2d6cb29bb9f 100644
--- a/tests/test-model-load-cancel.cpp
+++ b/tests/test-model-load-cancel.cpp
@@ -1,27 +1,277 @@
+//#include "llama.h"
+//#include "get-model.h"
+//
+//#include <cstdlib>
+//
+//int main(int argc, char *argv[] ) {
+//    auto * model_path = "C:\\NHKI\\mymodell\\kilocal\\Teuken-7.5B-F16d.gguf";
+//    auto * file = fopen(model_path, "r");
+//    if (file == nullptr) {
+//        fprintf(stderr, "no model at '%s' found\n", model_path);
+//        return EXIT_FAILURE;
+//    }
+//
+//    fprintf(stderr, "using '%s'\n", model_path);
+//    fclose(file);
+//
+//    llama_backend_init();
+//    auto params = llama_model_params{};
+//    //params.use_mmap = false;
+//    params.vocab_only=true;
+//    params.main_gpu=-1;
+//    params.n_gpu_layers=0;
+//    params.progress_callback = [](float progress, void * ctx){
+//        (void) ctx;
+//        return progress > 0.50;
+//    };
+//    auto * model = llama_model_load_from_file(model_path, params);
+//
+//    model.
+//
+//    llama_backend_free();
+//    return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE;
+//}
+//
+
 #include "llama.h"
-#include "get-model.h"
+#include "common.h"
+#include "console.h"
+#include "../src/llama-vocab-sentencepiece.h"
+#include "../src/unicode.h"
 
-#include <cstdlib>
+#include <cassert>
+#include <codecvt>
+#include <cstdio>
+#include <cstring>
+#include <locale>
+#include <string>
+#include <thread>
+#include <vector>
+#include <atomic>
 
-int main(int argc, char *argv[] ) {
-    auto * model_path = get_model_or_exit(argc, argv);
-    auto * file = fopen(model_path, "r");
-    if (file == nullptr) {
-        fprintf(stderr, "no model at '%s' found\n", model_path);
-        return EXIT_FAILURE;
-    }
+int main(int argc, char ** argv) {
+
+    const std::string fname = "C:\\NHKI\\mymodell\\kilocal\\Teuken-7.5B-BF16-CM.gguf";
+
+    fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
 
-    fprintf(stderr, "using '%s'\n", model_path);
-    fclose(file);
+    llama_model * model;
+    llama_context * ctx;
 
     llama_backend_init();
-    auto params = llama_model_params{};
-    params.use_mmap = false;
-    params.progress_callback = [](float progress, void * ctx){
-        (void) ctx;
-        return progress > 0.50;
-    };
-    auto * model = llama_model_load_from_file(model_path, params);
+
+    // load the vocab
+    {
+        auto mparams = llama_model_default_params();
+
+        mparams.vocab_only = true;
+
+        model = llama_model_load_from_file(fname.c_str(), mparams);
+
+        if (model == NULL) {
+            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
+            return 1;
+        }
+
+        auto cparams = llama_context_default_params();
+
+        ctx = llama_init_from_model(model, cparams);
+
+        if (ctx == NULL) {
+            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
+            llama_model_free(model);
+            return 1;
+        }
+    }
+
+    const llama_vocab * vocab = llama_model_get_vocab(model);
+
+#ifdef _WIN32
+    // We need this for unicode console support
+    console::init(false, false);
+    atexit([]() { console::cleanup(); });
+#endif
+
+    const int n_vocab = llama_vocab_n_tokens(vocab);
+
+    std::string str="…";
+    str="n ⋅Ball";
+   // str="Hallo wer bist Du?";
+
+    std::vector<llama_token> tokens = common_tokenize(ctx, str, false, true);
+
+    std::vector<llama_token> tokensbos = common_tokenize(ctx, str,true, true);
+
+    {
+   std::vector<llama_chat_message> conversation {
+     {"user", "Hello, how are you?"},
+     {"assistant", "I'm doing great. How can I help you today?"},
+     {"user", "I'd like to show off how chat templating works!"},
+   };
+
+   const char* cl=llama_model_chat_template(model,NULL);
+
+   std::string formatted_chat;
+
+  formatted_chat.resize(1024);
+  bool add_generation_prompt=false;
+  int res = llama_chat_apply_template(
+            cl,
+            conversation.data(),
+            conversation.size(),
+            add_generation_prompt,
+            formatted_chat.data(),
+            formatted_chat.size()
+        );
+        formatted_chat.resize(res);
+        std::string output(formatted_chat.data(), formatted_chat.size());
+
+       std::string check= "System: A chat between a human and an artificial intelligence assistant.The assistant gives helpful and polite answers to the human's questions.\nUser: Hello, how are you?\nAssistant: I'm doing great. How can I help you today?</s>\nUser: I'd like to show off how chat templating works!\n";
+       if (check != output)
+       {
+           size_t z=check.size();
+           size_t y=output.size();
+           for (size_t i = 0; i < z && i < y; i++)
+           {
+               if (check[i] != output[i])
+               {
+                   char a=check[i];
+                   char b=output[i];
+               }
+           }
+       }
+    }
+
+     {
+
+     std::vector<llama_chat_message> conversation {
+        {"system", "DE"},
+        {"user", "Wie geht es dir?"},
+        {"assistant", "Mir geht es gut. Kann ich Dir helfen?"},
+        {"user", "Ich möchte gerne wissen wie chat templates funktionieren!"},
+     };
+
+   const char* cl=llama_model_chat_template(model,NULL);
+
+   std::string formatted_chat;
+
+  formatted_chat.resize(1024);
+  bool add_generation_prompt=false;
+  int res = llama_chat_apply_template(
+            cl,
+            conversation.data(),
+            conversation.size(),
+            add_generation_prompt,
+            formatted_chat.data(),
+            formatted_chat.size()
+        );
+        formatted_chat.resize(res);
+        std::string output(formatted_chat.data(), formatted_chat.size());
+
+       std::string check= "System: Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen.\nUser: Wie geht es dir?\nAssistant: Mir geht es gut. Kann ich Dir helfen?</s>\nUser: Ich möchte gerne wissen wie chat templates funktionieren!\n";
+       if (check != output)
+       {
+           size_t z=check.size();
+           size_t y=output.size();
+           for (size_t i = 0; i < z && i < y; i++)
+           {
+               if (check[i] != output[i])
+               {
+                   char a=check[i];
+                   char b=output[i];
+               }
+           }
+
+       }
+    }
+
+
+     
+     {
+
+     std::vector<llama_chat_message> conversation {
+        {"system", "DE"},
+        {"user", "Wie geht es dir?"},
+        {"assistant", "Mir geht es gut. Kann ich Dir helfen?"},
+        {"user", "Ich möchte gerne wissen wie chat templates funktionieren!"},
+     };
+
+   const char* cl=llama_model_chat_template(model,NULL);
+
+   std::string formatted_chat;
+
+  formatted_chat.resize(1024);
+  bool add_generation_prompt=true;
+  int res = llama_chat_apply_template(
+            cl,
+            conversation.data(),
+            conversation.size(),
+            add_generation_prompt,
+            formatted_chat.data(),
+            formatted_chat.size()
+        );
+        formatted_chat.resize(res);
+        std::string output(formatted_chat.data(), formatted_chat.size());
+
+       std::string check="System: Ein Gespräch zwischen einem Menschen und einem Assistenten mit künstlicher Intelligenz. Der Assistent gibt hilfreiche und höfliche Antworten auf die Fragen des Menschen.\nUser: Wie geht es dir?\nAssistant: Mir geht es gut. Kann ich Dir helfen?</s>\nUser: Ich möchte gerne wissen wie chat templates funktionieren!\nAssistant: ";
+
+       if (check != output)
+       {
+           size_t z=check.size();
+           size_t y=output.size();
+           for (size_t i = 0; i < z && i < y; i++)
+           {
+               if (check[i] != output[i])
+               {
+                   char a=check[i];
+                   char b=output[i];
+               }
+           }
+
+       }
+    }
+
+  //  std::vector<int> token_ids;
+  //  sp_encode(str,token_ids);
+
+    //{
+    //    sentencepiece::SentencePieceProcessor processor;
+    //    const auto status = processor.Load("C:/NHKI/mymodell/kilocal/teuken/tokenizer.model");
+    //    if (!status.ok()) {
+    //        //std::cerr << status.ToString() << std::endl;
+    //        // error
+    //    }
+    //    std::vector<int> token_ids;
+    //    processor.Encode(str, &token_ids);
+    //    for (const int id : token_ids) {
+    //        // std::cout << token << std::endl;
+    //    }
+    //}
+    //{
+    //    std::string filename="C:/NHKI/mymodell/kilocal/teuken/tokenizer.model";
+    //    auto input = sentencepiece::filesystem::NewReadableFile(filename, true);
+    //    std::string serialized;
+    //    if (!input->ReadAll(&serialized)) {
+    //        // hh
+    //    }
+    //    sentencepiece::SentencePieceProcessor processor;
+    //    const auto status = processor.LoadFromSerializedProto(serialized);
+    //    std::vector<int> token_ids;
+    //    processor.Encode(str, &token_ids);
+    //    for (const int id : token_ids) {
+    //        // std::cout << token << std::endl;
+    //    }
+    //}
+
+
+    //auto model_proto = std::make_unique<sentencepiece::ModelProto>();
+
+    //model_proto->ParseFromArray(serialized.data(), serialized.size())
+
+    llama_model_free(model);
+    llama_free(ctx);
+
     llama_backend_free();
-    return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE;
+
+    return 0;
 }