diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll
index 56890bd..52f1299 100644
Binary files a/external/genta-personal/bin/InferenceEngineLib.dll and b/external/genta-personal/bin/InferenceEngineLib.dll differ
diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll
index 07e8ca2..a0ad996 100644
Binary files a/external/genta-personal/bin/InferenceEngineLibVulkan.dll and b/external/genta-personal/bin/InferenceEngineLibVulkan.dll differ
diff --git a/external/genta-personal/include/job.h b/external/genta-personal/include/job.h
index 2aeaace..03d9b85 100644
--- a/external/genta-personal/include/job.h
+++ b/external/genta-personal/include/job.h
@@ -27,6 +27,8 @@ struct Job {
     std::atomic<bool> cancelRequested{ false };
     CompletionParameters params;
 
+    int seqId;
+
     bool isDecodingPrompt = true;
 
     int n_past;
diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h
index 49796ec..8654870 100644
--- a/external/genta-personal/include/types.h
+++ b/external/genta-personal/include/types.h
@@ -17,6 +17,7 @@ struct CompletionParameters
 	float topP = 0.5f;
 	bool streaming = false;
 	std::string kvCacheFilePath = "";
+	int seqId = -1;
 
 	bool isValid() const;
 };
@@ -43,6 +44,7 @@ struct ChatCompletionParameters
 	float topP = 0.5f;
 	bool streaming = false;
 	std::string kvCacheFilePath = "";
+	int seqId = -1;
 
 	bool isValid() const;
 };
diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib
index 86c9cd4..70a0b7c 100644
Binary files a/external/genta-personal/lib/InferenceEngineLib.lib and b/external/genta-personal/lib/InferenceEngineLib.lib differ
diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib
index d477528..30ad435 100644
Binary files a/external/genta-personal/lib/InferenceEngineLibVulkan.lib and b/external/genta-personal/lib/InferenceEngineLibVulkan.lib differ
diff --git a/include/chat/chat_manager.hpp b/include/chat/chat_manager.hpp
index c937f08..a16b5d3 100644
--- a/include/chat/chat_manager.hpp
+++ b/include/chat/chat_manager.hpp
@@ -94,6 +94,7 @@ namespace Chat
             return std::async(std::launch::async, [this, newName]() {
                 if (!validateChatName(newName)) 
                 {
+					std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl;
                     return false;
                 }
 
@@ -101,17 +102,20 @@ namespace Chat
 
                 if (!m_currentChatName) 
                 {
+					std::cerr << "[ChatManager] No current chat selected.\n";
                     return false;
                 }
 
                 if (m_chatNameToIndex.find(newName) != m_chatNameToIndex.end()) 
                 {
+					std::cerr << "[ChatManager] Chat with name " << newName << " already exists.\n";
                     return false;
                 }
 
                 size_t currentIdx = m_currentChatIndex;
                 if (currentIdx >= m_chats.size()) 
                 {
+					std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl;
                     return false;
                 }
 
diff --git a/include/model/model_manager.hpp b/include/model/model_manager.hpp
index aea172f..73154ef 100644
--- a/include/model/model_manager.hpp
+++ b/include/model/model_manager.hpp
@@ -321,6 +321,7 @@ namespace Model
             );
             if (kvCachePathOpt.has_value()) {
                 completionParams.kvCacheFilePath = kvCachePathOpt.value().string();
+                completionParams.seqId = currentChat.id;
             }
 
             return completionParams;
@@ -363,6 +364,7 @@ namespace Model
             );
             if (kvCachePathOpt.has_value()) {
                 completionParams.kvCacheFilePath = kvCachePathOpt.value().string();
+                completionParams.seqId = currentChat.id;
             }
 
             return completionParams;
@@ -438,7 +440,7 @@ namespace Model
             return result;
         }
 
-        CompletionResult chatCompleteSync(const ChatCompletionParameters& params)
+        CompletionResult chatCompleteSync(const ChatCompletionParameters& params, const bool saveChat = true)
         {
             {
                 std::shared_lock<std::shared_mutex> lock(m_mutex);
@@ -475,8 +477,6 @@ namespace Model
                 m_jobIds.push_back(jobId);
             }
 
-            auto& chatManager = Chat::ChatManager::getInstance();
-
             // Wait for the job to complete
             m_inferenceEngine->waitForJob(jobId);
 
@@ -496,22 +496,26 @@ namespace Model
             }
 
             // Save the chat history
-            auto chatName = chatManager.getChatNameByJobId(jobId);
-            if (!chatManager.saveChat(chatName))
+            if (saveChat) 
             {
-                std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl;
-            }
+                auto& chatManager = Chat::ChatManager::getInstance();
+                auto chatName = chatManager.getChatNameByJobId(jobId);
+                if (!chatManager.saveChat(chatName))
+                {
+                    std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl;
+                }
 
-            // Reset jobid tracking on chat manager
-            if (!chatManager.removeJobId(jobId))
-            {
-                std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                // Reset jobid tracking on chat manager
+                if (!chatManager.removeJobId(jobId))
+                {
+                    std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                }
             }
 
             return result;
         }
 
-        int startCompletionJob(const CompletionParameters& params, std::function<void(const std::string&, const float, const int, const bool)> streamingCallback)
+        int startCompletionJob(const CompletionParameters& params, std::function<void(const std::string&, const float, const int, const bool)> streamingCallback, const bool saveChat = true)
         {
             {
                 std::shared_lock<std::shared_mutex> lock(m_mutex);
@@ -539,7 +543,7 @@ namespace Model
                 m_jobIds.push_back(jobId);
             }
 
-            std::thread([this, jobId, streamingCallback]() {
+            std::thread([this, jobId, streamingCallback, saveChat]() {
                 // Poll while job is running or until the engine says it's done
                 while (true)
                 {
@@ -569,9 +573,12 @@ namespace Model
 
                 // Reset jobid tracking on chat manager
                 {
-                    if (!Chat::ChatManager::getInstance().removeJobId(jobId))
+                    if (saveChat)
                     {
-                        std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                        if (!Chat::ChatManager::getInstance().removeJobId(jobId))
+                        {
+                            std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                        }
                     }
                 }
                 }).detach();
@@ -579,7 +586,7 @@ namespace Model
             return jobId;
         }
 
-        int startChatCompletionJob(const ChatCompletionParameters& params, std::function<void(const std::string&, const float, const int, const bool)> streamingCallback)
+        int startChatCompletionJob(const ChatCompletionParameters& params, std::function<void(const std::string&, const float, const int, const bool)> streamingCallback, const bool saveChat = true)
         {
             {
                 std::shared_lock<std::shared_mutex> lock(m_mutex);
@@ -607,10 +614,7 @@ namespace Model
                 m_jobIds.push_back(jobId);
             }
 
-            std::thread([this, jobId, streamingCallback]() {
-                // Poll while job is running or until the engine says it's done
-                auto& chatManager = Chat::ChatManager::getInstance();
-
+            std::thread([this, jobId, streamingCallback, saveChat]() {
                 while (true)
                 {
                     if (this->m_inferenceEngine->hasJobError(jobId)) break;
@@ -637,20 +641,25 @@ namespace Model
                     m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end());
                 }
 
-                // Save the chat history
+                if (saveChat)
                 {
-                    auto chatName = chatManager.getChatNameByJobId(jobId);
-                    if (!chatManager.saveChat(chatName))
+                    auto& chatManager = Chat::ChatManager::getInstance();
+
+                    // Save the chat history
                     {
-                        std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl;
+                        auto chatName = chatManager.getChatNameByJobId(jobId);
+                        if (!chatManager.saveChat(chatName))
+                        {
+                            std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl;
+                        }
                     }
-                }
 
-                // Reset jobid tracking on chat manager
-                {
-                    if (!chatManager.removeJobId(jobId))
+                    // Reset jobid tracking on chat manager
                     {
-                        std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                        if (!chatManager.removeJobId(jobId))
+                        {
+                            std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n";
+                        }
                     }
                 }
                 }).detach();
@@ -753,7 +762,7 @@ namespace Model
             params.streaming = false;
 
             // Invoke the synchronous chat completion method.
-            CompletionResult result = chatCompleteSync(params);
+            CompletionResult result = chatCompleteSync(params, false);
 
             // Map the engine’s result to our ChatCompletionResponse.
             ChatCompletionResponse response = convertToChatResponse(request, result);
diff --git a/include/ui/chat/chat_window.hpp b/include/ui/chat/chat_window.hpp
index 48ea786..53d66b3 100644
--- a/include/ui/chat/chat_window.hpp
+++ b/include/ui/chat/chat_window.hpp
@@ -279,6 +279,73 @@ class ChatWindow {
         }
     }
 
+    void generateChatTitle(const std::string& firstUserMessage) {
+        auto& modelManager = Model::ModelManager::getInstance();
+        auto& chatManager = Chat::ChatManager::getInstance();
+
+        // Create parameters for title generation
+        ChatCompletionParameters titleParams;
+
+        // Add a system prompt instructing the model to generate a short, descriptive title
+        const std::string titlePrompt = firstUserMessage +
+            "\n-----\n"
+            "Ignore all previous instructions. The preceding text is a conversation thread that needs a concise but descriptive 3 to 5 word title in natural English so that readers will be able to easily find it again. Do not add any quotation marks, formatting, or any symbol to the title. Respond only with the title text.";
+
+        // Add the title prompt as a user message
+        titleParams.messages.push_back({ "user", titlePrompt });
+
+        // Configure title generation parameters
+        titleParams.maxNewTokens = 20;  // Short title only needs few tokens
+        titleParams.temperature = 0.7;  // Slightly creative but not too random
+        titleParams.streaming = false;  // No need for streaming for a quick title
+
+        // Use a separate thread to avoid blocking UI
+        std::thread([titleParams]() {
+            auto& modelManager = Model::ModelManager::getInstance();
+            auto& chatManager = Chat::ChatManager::getInstance();
+
+            // Generate the title (synchronous call)
+            CompletionResult titleResult = modelManager.chatCompleteSync(titleParams, false);
+
+            if (!titleResult.text.empty()) {
+                // Clean up the generated title
+                std::string newTitle = titleResult.text;
+
+                // Trim whitespace and quotes
+                // Remove symbols and trim whitespace, and if the title contain text "Title:", remove it
+                auto trim = [](std::string& s) {
+                    // Remove "Title:" if present
+                    const std::string titlePrefix = "Title:";
+                    size_t pos = s.find(titlePrefix);
+                    if (pos != std::string::npos) {
+                        s.erase(pos, titlePrefix.length());
+                    }
+
+                    // Remove symbols except '+' and '-'
+                    s.erase(std::remove_if(s.begin(), s.end(), [](char c) {
+                        return std::ispunct(static_cast<unsigned char>(c)) && c != '+' && c != '-';
+                        }), s.end());
+
+                    // Trim whitespace
+                    s.erase(0, s.find_first_not_of(" \t\n\r"));
+                    if (!s.empty()) {
+                        s.erase(s.find_last_not_of(" \t\n\r") + 1);
+                    }
+                    };
+
+                trim(newTitle);
+
+                // Apply the new title if it's valid
+                if (!newTitle.empty()) {
+                    if (!chatManager.renameCurrentChat(newTitle).get())
+                    {
+						std::cerr << "[ChatSection] Failed to rename chat to: " << newTitle << "\n";
+                    }
+                }
+            }
+            }).detach();
+    }
+
     // Render the row of buttons that allow the user to switch models or clear chat.
     void renderChatFeatureButtons(float baseX, float baseY) {
 		Model::ModelManager& modelManager = Model::ModelManager::getInstance();
@@ -321,6 +388,9 @@ class ChatWindow {
 
         auto& currentChat = currentChatOpt.value();
 
+        // Check if this is the first message in the chat
+        bool isFirstMessage = currentChat.messages.empty();
+
         // Append the user message.
         Chat::Message userMessage;
         userMessage.id = static_cast<int>(currentChat.messages.size()) + 1;
@@ -339,6 +409,11 @@ class ChatWindow {
         }
 
         modelManager.setModelGenerationInProgress(true);
+
+        // If this is the first message, generate a title for the chat
+        if (isFirstMessage) {
+            generateChatTitle(message);
+        }
     }
 
     InputFieldConfig createInputFieldConfig(
diff --git a/models/phi-4-14b.json b/models/phi-4-14b.json
new file mode 100644
index 0000000..6f830f5
--- /dev/null
+++ b/models/phi-4-14b.json
@@ -0,0 +1,28 @@
+{
+  "name": "Phi 4 14B",
+  "author": "Microsoft",
+  "fullPrecision": {
+    "type": "Full Precision",
+    "path": "models/phi-4-14b/fp16/phi-4-F16.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-F16.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  },
+  "quantized8Bit": {
+    "type": "8-bit Quantized",
+    "path": "models/phi-4-14b/int8/phi-4-Q8_0.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q8_0.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  },
+  "quantized4Bit": {
+    "type": "4-bit Quantized",
+    "path": "models/phi-4-14b/int4/phi-4-Q4_K_M.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q4_K_M.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  }
+}
\ No newline at end of file
diff --git a/models/phi-4-mini-3.8b.json b/models/phi-4-mini-3.8b.json
new file mode 100644
index 0000000..16097c6
--- /dev/null
+++ b/models/phi-4-mini-3.8b.json
@@ -0,0 +1,28 @@
+{
+  "name": "Phi 4 Mini 3.8B",
+  "author": "Microsoft",
+  "fullPrecision": {
+    "type": "Full Precision",
+    "path": "models/phi-4-mini-3.8b/fp16/Phi-4-mini-instruct.BF16.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.BF16.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  },
+  "quantized8Bit": {
+    "type": "8-bit Quantized",
+    "path": "models/phi-4-mini-3.8b/int8/Phi-4-mini-instruct.Q8_0.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.Q8_0.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  },
+  "quantized4Bit": {
+    "type": "4-bit Quantized",
+    "path": "models/phi-4-mini-3.8b/int4/Phi-4-mini-instruct-Q4_K_M.gguf",
+    "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct-Q4_K_M.gguf",
+    "isDownloaded": false,
+    "downloadProgress": 0.0,
+    "lastSelected": 0
+  }
+}
\ No newline at end of file
diff --git a/server-test/python/openai_test.py b/server-test/python/openai_test.py
index 879c4eb..290fae3 100644
--- a/server-test/python/openai_test.py
+++ b/server-test/python/openai_test.py
@@ -16,17 +16,21 @@
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "Why anything to the power of zero is 1?"}
     ],
-    stream=True
+    stream=False
 )
 
-# Process the streaming response
-print("Streaming response:")
-full_response = ""
-for chunk in stream:
-    if chunk.choices[0].delta.content is not None:
-        content = chunk.choices[0].delta.content
-        full_response += content
-        print(content, end="", flush=True)
+# Process the full_response
+print("Full response:")
+print(stream.choices[0].message.content)
 
-print("\n\nFull response:", full_response)
+# Process streaming response
+# print("Streaming response:")
+# full_response = ""
+# for chunk in stream:
+#     if chunk.choices[0].delta.content is not None:
+#         content = chunk.choices[0].delta.content
+#         full_response += content
+#         print(content, end="", flush=True)
+
+# print("\n\nFull response:", full_response)