diff --git a/external/cpp-mcp b/external/cpp-mcp new file mode 160000 index 0000000..251f145 --- /dev/null +++ b/external/cpp-mcp @@ -0,0 +1 @@ +Subproject commit 251f145e0a2b8d402f6fc882494841de862aae47 diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll index af45d73..2d8e386 100644 Binary files a/external/genta-personal/bin/InferenceEngineLib.dll and b/external/genta-personal/bin/InferenceEngineLib.dll differ diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll index 62f57a8..8420ff0 100644 Binary files a/external/genta-personal/bin/InferenceEngineLibVulkan.dll and b/external/genta-personal/bin/InferenceEngineLibVulkan.dll differ diff --git a/external/genta-personal/include/job.h b/external/genta-personal/include/job.h index 03d9b85..9a9fd69 100644 --- a/external/genta-personal/include/job.h +++ b/external/genta-personal/include/job.h @@ -24,6 +24,7 @@ struct Job { bool hasError = false; std::string errorMessage; float tps = 0; + float tts = 0; std::atomic cancelRequested{ false }; CompletionParameters params; diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h index 06a2498..8c04afd 100644 --- a/external/genta-personal/include/types.h +++ b/external/genta-personal/include/types.h @@ -69,7 +69,7 @@ struct LoadingParameters bool warmup = false; int n_parallel = 1; int n_gpu_layers = 100; - int n_batch = 256; + int n_batch = 4096; }; #endif // TYPES_H \ No newline at end of file diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib index 62cc1c4..7acf848 100644 Binary files a/external/genta-personal/lib/InferenceEngineLib.lib and b/external/genta-personal/lib/InferenceEngineLib.lib differ diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib index d90855e..98fd6af 100644 Binary files a/external/genta-personal/lib/InferenceEngineLibVulkan.lib and b/external/genta-personal/lib/InferenceEngineLibVulkan.lib differ diff --git a/models/qwen-3-0.6b.json b/models/qwen-3-0.6b.json new file mode 100644 index 0000000..8061d86 --- /dev/null +++ b/models/qwen-3-0.6b.json @@ -0,0 +1,37 @@ +{ + "attention_heads": 16.0, + "author": "Alibaba", + "hidden_layers": 28.0, + "hidden_size": 1024.0, + "kv_heads": 16.0, + "name": "Qwen 3 0.6B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-0.6b/resolve/main/Qwen3-0.6B-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 0.6B/4-bit/Qwen3-0.6B-UD-Q4_K_XL.gguf", + "size": 0.37828725576400757, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-0.6b/resolve/main/Qwen3-0.6B-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 0.6B/8-bit/Qwen3-0.6B-UD-Q8_K_XL.gguf", + "size": 0.7863044142723083, + "type": "8-bit" + }, + "fp16": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-0.6b/resolve/main/Qwen3-0.6B-BF16.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 0.6B/fp16/Qwen3-0.6B-BF16.gguf", + "size": 1.115894079208374, + "type": "fp16" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-1.7b.json b/models/qwen-3-1.7b.json new file mode 100644 index 0000000..70dc6df --- /dev/null +++ b/models/qwen-3-1.7b.json @@ -0,0 +1,37 @@ +{ + "attention_heads": 16.0, + "author": "Alibaba", + "hidden_layers": 28.0, + "hidden_size": 2048.0, + "kv_heads": 16.0, + "name": "Qwen 3 1.7B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-1.7b/resolve/main/Qwen3-1.7B-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 1.7B/4-bit/Qwen3-1.7B-UD-Q4_K_XL.gguf", + "size": 1.0581648349761963, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-1.7b/resolve/main/Qwen3-1.7B-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 1.7B/8-bit/Qwen3-1.7B-UD-Q8_K_XL.gguf", + "size": 2.172386407852173, + "type": "8-bit" + }, + "fp16": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-1.7b/resolve/main/Qwen3-1.7B-BF16.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 1.7B/fp16/Qwen3-1.7B-BF16.gguf", + "size": 3.2105941772460938, + "type": "fp16" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-14b.json b/models/qwen-3-14b.json new file mode 100644 index 0000000..c8c96ae --- /dev/null +++ b/models/qwen-3-14b.json @@ -0,0 +1,37 @@ +{ + "attention_heads": 40.0, + "author": "Alibaba", + "hidden_layers": 40.0, + "hidden_size": 5120.0, + "kv_heads": 40.0, + "name": "Qwen 3 14B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-14b/resolve/main/Qwen3-14B-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 14B/4-bit/Qwen3-14B-UD-Q4_K_XL.gguf", + "size": 8.530745506286621, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-14b/resolve/main/Qwen3-14B-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 14B/8-bit/Qwen3-14B-UD-Q8_K_XL.gguf", + "size": 17.46654510498047, + "type": "8-bit" + }, + "fp16": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-14b/resolve/main/Qwen3-14B-BF16.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 14B/fp16/Qwen3-14B-BF16.gguf", + "size": 27.51445770263672, + "type": "fp16" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-30b-a3b.json b/models/qwen-3-30b-a3b.json new file mode 100644 index 0000000..3a8fd8d --- /dev/null +++ b/models/qwen-3-30b-a3b.json @@ -0,0 +1,28 @@ +{ + "attention_heads": 32.0, + "author": "Alibaba", + "hidden_layers": 48.0, + "hidden_size": 2048.0, + "kv_heads": 32.0, + "name": "Qwen 3 30B A3B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-30b/resolve/main/Qwen3-30B-A3B-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 30B A3B/4-bit/Qwen3-30B-A3B-UD-Q4_K_XL.gguf", + "size": 16.49899673461914, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-30b/resolve/main/Qwen3-30B-A3B-Q8_0.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 30B A3B/8-bit/Qwen3-30B-A3B-Q8_0.gguf", + "size": 30.253019332885742, + "type": "8-bit" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-32b.json b/models/qwen-3-32b.json new file mode 100644 index 0000000..347bc9a --- /dev/null +++ b/models/qwen-3-32b.json @@ -0,0 +1,28 @@ +{ + "attention_heads": 64.0, + "author": "Alibaba", + "hidden_layers": 64.0, + "hidden_size": 5120.0, + "kv_heads": 64.0, + "name": "Qwen 3 32B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-32b/resolve/main/Qwen3-32B-128K-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 32B/4-bit/Qwen3-32B-128K-UD-Q4_K_XL.gguf", + "size": 18.64667320251465, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-32b/resolve/main/Qwen3-32B-128K-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 32B/8-bit/Qwen3-32B-128K-UD-Q8_K_XL.gguf", + "size": 36.769561767578125, + "type": "8-bit" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-4b.json b/models/qwen-3-4b.json new file mode 100644 index 0000000..21c7256 --- /dev/null +++ b/models/qwen-3-4b.json @@ -0,0 +1,37 @@ +{ + "attention_heads": 32.0, + "author": "Alibaba", + "hidden_layers": 36.0, + "hidden_size": 2560.0, + "kv_heads": 32.0, + "name": "Qwen 3 4B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-4b/resolve/main/Qwen3-4B-128K-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 4B/4-bit/Qwen3-4B-128K-UD-Q4_K_XL.gguf", + "size": 2.371464729309082, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-4b/resolve/main/Qwen3-4B-128K-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 4B/8-bit/Qwen3-4B-128K-UD-Q8_K_XL.gguf", + "size": 4.709418773651123, + "type": "8-bit" + }, + "fp16": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-4b/resolve/main/Qwen3-4B-128K-BF16.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3 4B/fp16/Qwen3-4B-128K-BF16.gguf", + "size": 7.498343467712402, + "type": "fp16" + } + } +} \ No newline at end of file diff --git a/models/qwen-3-8b.json b/models/qwen-3-8b.json new file mode 100644 index 0000000..2580b89 --- /dev/null +++ b/models/qwen-3-8b.json @@ -0,0 +1,37 @@ +{ + "attention_heads": 32.0, + "author": "Alibaba", + "hidden_layers": 36.0, + "hidden_size": 4096.0, + "kv_heads": 32.0, + "name": "Qwen 3 8B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-8b/resolve/main/Qwen3-8B-128K-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3.0 8B/4-bit/Qwen3-8B-128K-UD-Q4_K_XL.gguf", + "size": 4.807184219360352, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-8b/resolve/main/Qwen3-8B-128K-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3.0 8B/8-bit/Qwen3-8B-128K-UD-Q8_K_XL.gguf", + "size": 10.080671310424805, + "type": "8-bit" + }, + "fp16": { + "downloadLink": "https://huggingface.co/kolosal/qwen3-8b/resolve/main/Qwen3-8B-128K-BF16.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/Qwen 3.0 8B/fp16/Qwen3-8B-128K-BF16.gguf", + "size": 15.262556076049805, + "type": "fp16" + } + } +} \ No newline at end of file diff --git a/models/qwq-32b.json b/models/qwq-32b.json new file mode 100644 index 0000000..23a4552 --- /dev/null +++ b/models/qwq-32b.json @@ -0,0 +1,28 @@ +{ + "attention_heads": 40.0, + "author": "Alibaba", + "hidden_layers": 64.0, + "hidden_size": 5120.0, + "kv_heads": 40.0, + "name": "QwQ 32B", + "variants": { + "4-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwq-32b/resolve/main/QwQ-32B-UD-Q4_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/QwQ 32B/4-bit/QwQ-32B-UD-Q4_K_XL.gguf", + "size": 18.680028915405273, + "type": "4-bit" + }, + "8-bit": { + "downloadLink": "https://huggingface.co/kolosal/qwq-32b/resolve/main/QwQ-32B-UD-Q8_K_XL.gguf", + "downloadProgress": 0.0, + "isDownloaded": false, + "lastSelected": 0, + "path": "models/QwQ 32B/8-bit/QwQ-32B-UD-Q8_K_XL.gguf", + "size": 36.07327651977539, + "type": "8-bit" + } + } +} \ No newline at end of file