KolosalAI · rifkybujana · Mar 15, 2025 · Jan 15, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll
diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll
diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h
@@ -69,6 +69,7 @@ struct LoadingParameters
 	bool warmup = false;
 	int n_parallel = 1;
 	int n_gpu_layers = 100;
+	int n_batch = 4096;
 };
 
 #endif // TYPES_H
diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib
diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib
diff --git a/include/chat/chat_manager.hpp b/include/chat/chat_manager.hpp
@@ -92,53 +92,57 @@ namespace Chat
         std::future<bool> renameCurrentChat(const std::string& newName)
         {
             return std::async(std::launch::async, [this, newName]() {
-                if (!validateChatName(newName)) 
+                if (!validateChatName(newName))
                 {
-					std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl;
+                    std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl;
                     return false;
                 }
 
                 std::unique_lock<std::shared_mutex> lock(m_mutex);
 
-                if (!m_currentChatName) 
+                if (!m_currentChatName)
                 {
-					std::cerr << "[ChatManager] No current chat selected.\n";
+                    std::cerr << "[ChatManager] No current chat selected.\n";
                     return false;
                 }
 
-                if (m_chatNameToIndex.find(newName) != m_chatNameToIndex.end()) 
+                // Generate a unique name if the requested name already exists
+                std::string uniqueName = newName;
+                int counter = 1;
+
+                while (m_chatNameToIndex.find(uniqueName) != m_chatNameToIndex.end())
                 {
-					std::cerr << "[ChatManager] Chat with name " << newName << " already exists.\n";
-                    return false;
+                    uniqueName = newName + " (" + std::to_string(counter) + ")";
+                    counter++;
                 }
 
                 size_t currentIdx = m_currentChatIndex;
-                if (currentIdx >= m_chats.size()) 
+                if (currentIdx >= m_chats.size())
                 {
-					std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl;
+                    std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl;
                     return false;
                 }
 
                 std::string oldName = m_chats[currentIdx].name;
-                m_chats[currentIdx].name = newName;
+                m_chats[currentIdx].name = uniqueName;
                 m_chats[currentIdx].lastModified = static_cast<int>(std::time(nullptr));
-                
+
                 // Update indices
                 m_chatNameToIndex.erase(oldName);
-                m_chatNameToIndex[newName] = currentIdx;
-                m_currentChatName = newName;
+                m_chatNameToIndex[uniqueName] = currentIdx;
+                m_currentChatName = uniqueName;
 
                 // Save changes
                 auto chat = m_chats[currentIdx];
                 auto saveResult = m_persistence->saveChat(chat).get();
-                if (saveResult) 
+                if (saveResult)
                 {
                     m_persistence->deleteChat(oldName).get();
-                    m_persistence->renameKvChat(oldName, newName).get();
+                    m_persistence->renameKvChat(oldName, uniqueName).get();
                 }
 
                 return saveResult;
-            });
+                });
         }
 
 		std::future<bool> clearCurrentChat()
@@ -649,9 +653,7 @@ namespace Chat
         // Validation helpers
         static bool validateChatName(const std::string& name) 
         {
-            if (name.empty() || name.length() > 256) return false;
-            const std::string invalidChars = R"(<>:"/\|?*)";
-            return name.find_first_of(invalidChars) == std::string::npos;
+            return !(name.empty() || name.length() > 256);
         }
 
         void updateChatTimestamp(size_t chatIndex, int newTimestamp)

diff --git a/include/chat/chat_persistence.hpp b/include/chat/chat_persistence.hpp
@@ -183,12 +183,22 @@ namespace Chat
 
         std::filesystem::path getChatPath(const std::string& chatName) const override
         {
+			// remove characters that are not allowed in file names
+			std::string chatNameFiltered = chatName;
+			std::replace_if(chatNameFiltered.begin(), chatNameFiltered.end(),
+				[](char c) { return !std::isalnum(c); }, '_');
+
             return std::filesystem::absolute(
                 std::filesystem::path(m_basePath) / (chatName + ".chat"));
         }
 
         std::filesystem::path getKvChatPath(const std::string& chatName) const override
 		{
+            // remove characters that are not allowed in file names
+            std::string chatNameFiltered = chatName;
+            std::replace_if(chatNameFiltered.begin(), chatNameFiltered.end(),
+                [](char c) { return !std::isalnum(c); }, '_');
+
 			return std::filesystem::absolute(
 				std::filesystem::path(m_basePath) / (chatName + ".bin"));
 		}

diff --git a/include/common.hpp b/include/common.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <imgui.h>
 #include <chrono>
 #include <string>
 #include <sstream>

diff --git a/include/config.hpp b/include/config.hpp
@@ -66,7 +66,8 @@ namespace Config
 
     namespace InputField
     {
-        constexpr size_t TEXT_SIZE = 81920;
+		// max 64kb of text
+        constexpr size_t TEXT_SIZE = 64 * 1024;
 
         constexpr float CHILD_ROUNDING = 10.0F;
         constexpr float FRAME_ROUNDING = 12.0F;

diff --git a/include/model/model.hpp b/include/model/model.hpp
@@ -3,12 +3,14 @@
 #include <string>
 #include <json.hpp>
 #include <filesystem>
+#include <map>
+#include <atomic>
 
 using json = nlohmann::json;
 
 namespace Model
 {
-    // In model.hpp or the appropriate header:
+    // ModelVariant structure remains mostly the same
     struct ModelVariant {
         std::string type;
         std::string path;
@@ -48,18 +50,18 @@ namespace Model
         }
     };
 
-    inline void to_json(nlohmann::json &j, const ModelVariant &v)
+    inline void to_json(nlohmann::json& j, const ModelVariant& v)
     {
         j = nlohmann::json{
             {"type", v.type},
             {"path", v.path},
             {"downloadLink", v.downloadLink},
             {"isDownloaded", v.isDownloaded},
             {"downloadProgress", v.downloadProgress},
-            {"lastSelected", v.lastSelected}};
+            {"lastSelected", v.lastSelected} };
     }
 
-    inline void from_json(const nlohmann::json &j, ModelVariant &v)
+    inline void from_json(const nlohmann::json& j, ModelVariant& v)
     {
         j.at("type").get_to(v.type);
         j.at("path").get_to(v.path);
@@ -69,42 +71,54 @@ namespace Model
         j.at("lastSelected").get_to(v.lastSelected);
     }
 
+    // Refactored ModelData to use a map of variants
     struct ModelData
     {
         std::string name;
         std::string author;
-        ModelVariant fullPrecision;
-		ModelVariant quantized8Bit;
-        ModelVariant quantized4Bit;
-
-        ModelData(const std::string &name = "",
-			      const std::string& author = "",
-                  const ModelVariant &fullPrecision = ModelVariant(),
-                  const ModelVariant &quantized8Bit = ModelVariant(),
-                  const ModelVariant &quantized4Bit = ModelVariant())
-            : name(name)
-			, author(author)
-            , fullPrecision(fullPrecision)
-			, quantized8Bit(quantized8Bit)
-            , quantized4Bit(quantized4Bit) {}
+        std::map<std::string, ModelVariant> variants;
+
+        // Constructor with no variants
+        ModelData(const std::string& name = "", const std::string& author = "")
+            : name(name), author(author) {
+        }
+
+        // Add a variant to the model
+        void addVariant(const std::string& variantType, const ModelVariant& variant) {
+            variants[variantType] = variant;
+        }
+
+        // Check if a variant exists
+        bool hasVariant(const std::string& variantType) const {
+            return variants.find(variantType) != variants.end();
+        }
+
+        // Get a variant (const version)
+        const ModelVariant* getVariant(const std::string& variantType) const {
+            auto it = variants.find(variantType);
+            return (it != variants.end()) ? &(it->second) : nullptr;
+        }
+
+        // Get a variant (non-const version)
+        ModelVariant* getVariant(const std::string& variantType) {
+            auto it = variants.find(variantType);
+            return (it != variants.end()) ? &(it->second) : nullptr;
+        }
     };
 
-    inline void to_json(nlohmann::json &j, const ModelData &m)
+    inline void to_json(nlohmann::json& j, const ModelData& m)
     {
         j = nlohmann::json{
             {"name", m.name},
-			{"author", m.author},
-            {"fullPrecision", m.fullPrecision},
-			{"quantized8Bit", m.quantized8Bit},
-            {"quantized4Bit", m.quantized4Bit}};
+            {"author", m.author},
+            {"variants", m.variants}
+        };
     }
 
-    inline void from_json(const nlohmann::json &j, ModelData &m)
+    inline void from_json(const nlohmann::json& j, ModelData& m)
     {
         j.at("name").get_to(m.name);
-		j.at("author").get_to(m.author);
-        j.at("fullPrecision").get_to(m.fullPrecision);
-		j.at("quantized8Bit").get_to(m.quantized8Bit);
-        j.at("quantized4Bit").get_to(m.quantized4Bit);
+        j.at("author").get_to(m.author);
+        j.at("variants").get_to(m.variants);
     }
 } // namespace Model
diff --git a/include/model/model_loader_config_manager.hpp b/include/model/model_loader_config_manager.hpp
@@ -78,6 +78,7 @@ namespace Model
         bool getContinuousBatching() const { return config_.cont_batching; }
         bool getWarmup() const { return config_.warmup; }
         int getParallelCount() const { return config_.n_parallel; }
+		int getBatchSize() const { return config_.n_batch; }
         int getGpuLayers() const { return config_.n_gpu_layers; }
 
         // Setters
@@ -88,6 +89,7 @@ namespace Model
         void setContinuousBatching(bool enable) { config_.cont_batching = enable; }
         void setWarmup(bool enable) { config_.warmup = enable; }
         void setParallelCount(int count) { config_.n_parallel = count; }
+		void setBatchSize(int size) { config_.n_batch = size; }
         void setGpuLayers(int layers) { config_.n_gpu_layers = layers; }
 
     private: