diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll index 56890bd..52f1299 100644 Binary files a/external/genta-personal/bin/InferenceEngineLib.dll and b/external/genta-personal/bin/InferenceEngineLib.dll differ diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll index 07e8ca2..a0ad996 100644 Binary files a/external/genta-personal/bin/InferenceEngineLibVulkan.dll and b/external/genta-personal/bin/InferenceEngineLibVulkan.dll differ diff --git a/external/genta-personal/include/job.h b/external/genta-personal/include/job.h index 2aeaace..03d9b85 100644 --- a/external/genta-personal/include/job.h +++ b/external/genta-personal/include/job.h @@ -27,6 +27,8 @@ struct Job { std::atomic cancelRequested{ false }; CompletionParameters params; + int seqId; + bool isDecodingPrompt = true; int n_past; diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h index 49796ec..8654870 100644 --- a/external/genta-personal/include/types.h +++ b/external/genta-personal/include/types.h @@ -17,6 +17,7 @@ struct CompletionParameters float topP = 0.5f; bool streaming = false; std::string kvCacheFilePath = ""; + int seqId = -1; bool isValid() const; }; @@ -43,6 +44,7 @@ struct ChatCompletionParameters float topP = 0.5f; bool streaming = false; std::string kvCacheFilePath = ""; + int seqId = -1; bool isValid() const; }; diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib index 86c9cd4..70a0b7c 100644 Binary files a/external/genta-personal/lib/InferenceEngineLib.lib and b/external/genta-personal/lib/InferenceEngineLib.lib differ diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib index d477528..30ad435 100644 Binary files a/external/genta-personal/lib/InferenceEngineLibVulkan.lib and b/external/genta-personal/lib/InferenceEngineLibVulkan.lib differ diff --git a/include/chat/chat_manager.hpp b/include/chat/chat_manager.hpp index c937f08..a16b5d3 100644 --- a/include/chat/chat_manager.hpp +++ b/include/chat/chat_manager.hpp @@ -94,6 +94,7 @@ namespace Chat return std::async(std::launch::async, [this, newName]() { if (!validateChatName(newName)) { + std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl; return false; } @@ -101,17 +102,20 @@ namespace Chat if (!m_currentChatName) { + std::cerr << "[ChatManager] No current chat selected.\n"; return false; } if (m_chatNameToIndex.find(newName) != m_chatNameToIndex.end()) { + std::cerr << "[ChatManager] Chat with name " << newName << " already exists.\n"; return false; } size_t currentIdx = m_currentChatIndex; if (currentIdx >= m_chats.size()) { + std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl; return false; } diff --git a/include/model/model_manager.hpp b/include/model/model_manager.hpp index aea172f..73154ef 100644 --- a/include/model/model_manager.hpp +++ b/include/model/model_manager.hpp @@ -321,6 +321,7 @@ namespace Model ); if (kvCachePathOpt.has_value()) { completionParams.kvCacheFilePath = kvCachePathOpt.value().string(); + completionParams.seqId = currentChat.id; } return completionParams; @@ -363,6 +364,7 @@ namespace Model ); if (kvCachePathOpt.has_value()) { completionParams.kvCacheFilePath = kvCachePathOpt.value().string(); + completionParams.seqId = currentChat.id; } return completionParams; @@ -438,7 +440,7 @@ namespace Model return result; } - CompletionResult chatCompleteSync(const ChatCompletionParameters& params) + CompletionResult chatCompleteSync(const ChatCompletionParameters& params, const bool saveChat = true) { { std::shared_lock lock(m_mutex); @@ -475,8 +477,6 @@ namespace Model m_jobIds.push_back(jobId); } - auto& chatManager = Chat::ChatManager::getInstance(); - // Wait for the job to complete m_inferenceEngine->waitForJob(jobId); @@ -496,22 +496,26 @@ namespace Model } // Save the chat history - auto chatName = chatManager.getChatNameByJobId(jobId); - if (!chatManager.saveChat(chatName)) + if (saveChat) { - std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; - } + auto& chatManager = Chat::ChatManager::getInstance(); + auto chatName = chatManager.getChatNameByJobId(jobId); + if (!chatManager.saveChat(chatName)) + { + std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; + } - // Reset jobid tracking on chat manager - if (!chatManager.removeJobId(jobId)) - { - std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + // Reset jobid tracking on chat manager + if (!chatManager.removeJobId(jobId)) + { + std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + } } return result; } - int startCompletionJob(const CompletionParameters& params, std::function streamingCallback) + int startCompletionJob(const CompletionParameters& params, std::function streamingCallback, const bool saveChat = true) { { std::shared_lock lock(m_mutex); @@ -539,7 +543,7 @@ namespace Model m_jobIds.push_back(jobId); } - std::thread([this, jobId, streamingCallback]() { + std::thread([this, jobId, streamingCallback, saveChat]() { // Poll while job is running or until the engine says it's done while (true) { @@ -569,9 +573,12 @@ namespace Model // Reset jobid tracking on chat manager { - if (!Chat::ChatManager::getInstance().removeJobId(jobId)) + if (saveChat) { - std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + if (!Chat::ChatManager::getInstance().removeJobId(jobId)) + { + std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + } } } }).detach(); @@ -579,7 +586,7 @@ namespace Model return jobId; } - int startChatCompletionJob(const ChatCompletionParameters& params, std::function streamingCallback) + int startChatCompletionJob(const ChatCompletionParameters& params, std::function streamingCallback, const bool saveChat = true) { { std::shared_lock lock(m_mutex); @@ -607,10 +614,7 @@ namespace Model m_jobIds.push_back(jobId); } - std::thread([this, jobId, streamingCallback]() { - // Poll while job is running or until the engine says it's done - auto& chatManager = Chat::ChatManager::getInstance(); - + std::thread([this, jobId, streamingCallback, saveChat]() { while (true) { if (this->m_inferenceEngine->hasJobError(jobId)) break; @@ -637,20 +641,25 @@ namespace Model m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); } - // Save the chat history + if (saveChat) { - auto chatName = chatManager.getChatNameByJobId(jobId); - if (!chatManager.saveChat(chatName)) + auto& chatManager = Chat::ChatManager::getInstance(); + + // Save the chat history { - std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; + auto chatName = chatManager.getChatNameByJobId(jobId); + if (!chatManager.saveChat(chatName)) + { + std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; + } } - } - // Reset jobid tracking on chat manager - { - if (!chatManager.removeJobId(jobId)) + // Reset jobid tracking on chat manager { - std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + if (!chatManager.removeJobId(jobId)) + { + std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + } } } }).detach(); @@ -753,7 +762,7 @@ namespace Model params.streaming = false; // Invoke the synchronous chat completion method. - CompletionResult result = chatCompleteSync(params); + CompletionResult result = chatCompleteSync(params, false); // Map the engine’s result to our ChatCompletionResponse. ChatCompletionResponse response = convertToChatResponse(request, result); diff --git a/include/ui/chat/chat_window.hpp b/include/ui/chat/chat_window.hpp index 48ea786..53d66b3 100644 --- a/include/ui/chat/chat_window.hpp +++ b/include/ui/chat/chat_window.hpp @@ -279,6 +279,73 @@ class ChatWindow { } } + void generateChatTitle(const std::string& firstUserMessage) { + auto& modelManager = Model::ModelManager::getInstance(); + auto& chatManager = Chat::ChatManager::getInstance(); + + // Create parameters for title generation + ChatCompletionParameters titleParams; + + // Add a system prompt instructing the model to generate a short, descriptive title + const std::string titlePrompt = firstUserMessage + + "\n-----\n" + "Ignore all previous instructions. The preceding text is a conversation thread that needs a concise but descriptive 3 to 5 word title in natural English so that readers will be able to easily find it again. Do not add any quotation marks, formatting, or any symbol to the title. Respond only with the title text."; + + // Add the title prompt as a user message + titleParams.messages.push_back({ "user", titlePrompt }); + + // Configure title generation parameters + titleParams.maxNewTokens = 20; // Short title only needs few tokens + titleParams.temperature = 0.7; // Slightly creative but not too random + titleParams.streaming = false; // No need for streaming for a quick title + + // Use a separate thread to avoid blocking UI + std::thread([titleParams]() { + auto& modelManager = Model::ModelManager::getInstance(); + auto& chatManager = Chat::ChatManager::getInstance(); + + // Generate the title (synchronous call) + CompletionResult titleResult = modelManager.chatCompleteSync(titleParams, false); + + if (!titleResult.text.empty()) { + // Clean up the generated title + std::string newTitle = titleResult.text; + + // Trim whitespace and quotes + // Remove symbols and trim whitespace, and if the title contain text "Title:", remove it + auto trim = [](std::string& s) { + // Remove "Title:" if present + const std::string titlePrefix = "Title:"; + size_t pos = s.find(titlePrefix); + if (pos != std::string::npos) { + s.erase(pos, titlePrefix.length()); + } + + // Remove symbols except '+' and '-' + s.erase(std::remove_if(s.begin(), s.end(), [](char c) { + return std::ispunct(static_cast(c)) && c != '+' && c != '-'; + }), s.end()); + + // Trim whitespace + s.erase(0, s.find_first_not_of(" \t\n\r")); + if (!s.empty()) { + s.erase(s.find_last_not_of(" \t\n\r") + 1); + } + }; + + trim(newTitle); + + // Apply the new title if it's valid + if (!newTitle.empty()) { + if (!chatManager.renameCurrentChat(newTitle).get()) + { + std::cerr << "[ChatSection] Failed to rename chat to: " << newTitle << "\n"; + } + } + } + }).detach(); + } + // Render the row of buttons that allow the user to switch models or clear chat. void renderChatFeatureButtons(float baseX, float baseY) { Model::ModelManager& modelManager = Model::ModelManager::getInstance(); @@ -321,6 +388,9 @@ class ChatWindow { auto& currentChat = currentChatOpt.value(); + // Check if this is the first message in the chat + bool isFirstMessage = currentChat.messages.empty(); + // Append the user message. Chat::Message userMessage; userMessage.id = static_cast(currentChat.messages.size()) + 1; @@ -339,6 +409,11 @@ class ChatWindow { } modelManager.setModelGenerationInProgress(true); + + // If this is the first message, generate a title for the chat + if (isFirstMessage) { + generateChatTitle(message); + } } InputFieldConfig createInputFieldConfig( diff --git a/models/phi-4-14b.json b/models/phi-4-14b.json new file mode 100644 index 0000000..6f830f5 --- /dev/null +++ b/models/phi-4-14b.json @@ -0,0 +1,28 @@ +{ + "name": "Phi 4 14B", + "author": "Microsoft", + "fullPrecision": { + "type": "Full Precision", + "path": "models/phi-4-14b/fp16/phi-4-F16.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-F16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "quantized8Bit": { + "type": "8-bit Quantized", + "path": "models/phi-4-14b/int8/phi-4-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "quantized4Bit": { + "type": "4-bit Quantized", + "path": "models/phi-4-14b/int4/phi-4-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } +} \ No newline at end of file diff --git a/models/phi-4-mini-3.8b.json b/models/phi-4-mini-3.8b.json new file mode 100644 index 0000000..16097c6 --- /dev/null +++ b/models/phi-4-mini-3.8b.json @@ -0,0 +1,28 @@ +{ + "name": "Phi 4 Mini 3.8B", + "author": "Microsoft", + "fullPrecision": { + "type": "Full Precision", + "path": "models/phi-4-mini-3.8b/fp16/Phi-4-mini-instruct.BF16.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.BF16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "quantized8Bit": { + "type": "8-bit Quantized", + "path": "models/phi-4-mini-3.8b/int8/Phi-4-mini-instruct.Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "quantized4Bit": { + "type": "4-bit Quantized", + "path": "models/phi-4-mini-3.8b/int4/Phi-4-mini-instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } +} \ No newline at end of file diff --git a/server-test/python/openai_test.py b/server-test/python/openai_test.py index 879c4eb..290fae3 100644 --- a/server-test/python/openai_test.py +++ b/server-test/python/openai_test.py @@ -16,17 +16,21 @@ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Why anything to the power of zero is 1?"} ], - stream=True + stream=False ) -# Process the streaming response -print("Streaming response:") -full_response = "" -for chunk in stream: - if chunk.choices[0].delta.content is not None: - content = chunk.choices[0].delta.content - full_response += content - print(content, end="", flush=True) +# Process the full_response +print("Full response:") +print(stream.choices[0].message.content) -print("\n\nFull response:", full_response) +# Process streaming response +# print("Streaming response:") +# full_response = "" +# for chunk in stream: +# if chunk.choices[0].delta.content is not None: +# content = chunk.choices[0].delta.content +# full_response += content +# print(content, end="", flush=True) + +# print("\n\nFull response:", full_response)