KolosalAI · rifkybujana · Mar 7, 2025 · Jan 15, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/.gitignore b/.gitignore
@@ -24,4 +24,6 @@ build/*
 out/*
 
 # debugging files
-*.pdb
+*.pdb
+
+**/**/__pycache__/*
diff --git a/.gitmodules b/.gitmodules
@@ -10,3 +10,7 @@
 [submodule "external/imspinner"]
 	path = external/imspinner
 	url = https://github.com/dalerank/imspinner
+[submodule "kolosal-server"]
+	path = kolosal-server
+	url = https://github.com/genta-technology/kolosal-server
+	branch = dev
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -124,6 +124,7 @@ target_include_directories(kolosal_lib PUBLIC
     ${EXTERNAL_DIR}/imspinner
     ${CMAKE_SOURCE_DIR}/include
     ${CMAKE_SOURCE_DIR}/assets
+    ${CMAKE_SOURCE_DIR}/kolosal-server/include
     ${CURL_INCLUDE_DIR}
 )
 
@@ -165,6 +166,9 @@ else()
     )
 endif()
 
+# ==== Kolosal Server Shared Library ====
+add_subdirectory(${CMAKE_SOURCE_DIR}/kolosal-server)
+
 # ==== Main Executable ====
 if (DEBUG)
     add_executable(KolosalDesktop
@@ -178,7 +182,11 @@ else()
     )
 endif()
 
-target_link_libraries(KolosalDesktop PRIVATE kolosal_lib)
+# Link both the engine (kolosal_lib) and the Kolosal server shared library.
+target_link_libraries(KolosalDesktop PRIVATE 
+    kolosal_lib
+    kolosal_server
+)
 
 # ==== Post-Build Commands ====
 # Copy fonts
@@ -219,6 +227,15 @@ add_custom_command(
     "${EXTERNAL_DIR}/curl/bin" "$<TARGET_FILE_DIR:KolosalDesktop>"
 )
 
+# Copy Kolosal Server DLL
+add_custom_command(
+    TARGET KolosalDesktop POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "$<TARGET_FILE:kolosal_server>"
+        "$<TARGET_FILE_DIR:KolosalDesktop>"
+    COMMENT "Copying Kolosal Server DLL to output directory"
+)
+
 # Copy Inference Engine DLLs
 add_custom_command(
     TARGET KolosalDesktop POST_BUILD

diff --git a/CMakeSettings.json b/CMakeSettings.json
@@ -31,8 +31,7 @@
       "cmakeCommandArgs": "-DDEBUG=ON",
       "buildCommandArgs": "",
       "ctestCommandArgs": "",
-      "inheritEnvironments": [ "msvc_x64_x64" ],
-      "variables": []
+      "inheritEnvironments": [ "msvc_x64_x64" ]
     }
   ]
 }
diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll
diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll
diff --git a/external/genta-personal/include/inference.h b/external/genta-personal/include/inference.h
@@ -34,7 +34,7 @@ class INFERENCE_API InferenceEngine : public IInferenceEngine
 public:
 	explicit InferenceEngine();
 
-	bool loadModel(const char* engineDir, const int mainGpuId = -1);
+	bool loadModel(const char* engineDir, const LoadingParameters lParams, const int mainGpuId = -1);
 
 	bool unloadModel();
 

diff --git a/external/genta-personal/include/inference_interface.h b/external/genta-personal/include/inference_interface.h
@@ -17,7 +17,7 @@ class IInferenceEngine {
 public:
     virtual ~IInferenceEngine() = default;
 
-    virtual bool loadModel(const char* engineDir, const int mainGpuId = -1) = 0;
+    virtual bool loadModel(const char* engineDir, const LoadingParameters lParams, const int mainGpuId = -1) = 0;
     virtual bool unloadModel() = 0;
     virtual int submitCompletionsJob(const CompletionParameters& params) = 0;
     virtual int submitChatCompletionsJob(const ChatCompletionParameters& params) = 0;

diff --git a/external/genta-personal/include/job.h b/external/genta-personal/include/job.h
@@ -9,6 +9,11 @@
 #include <memory>
 #include <exception>
 
+#include "types.h"
+#include "llama.h"
+#include "common.h"
+#include "sampling.h"
+
 struct Job {
     int jobId;
     std::mutex mtx;
@@ -20,6 +25,27 @@ struct Job {
     std::string errorMessage;
     float tps = 0;
     std::atomic<bool> cancelRequested{ false };
+    CompletionParameters params;
+
+    bool isDecodingPrompt = true;
+
+    int n_past;
+    int n_remain;
+    int i_prompt;
+    int n_prompt;
+    size_t n_matching_session_tokens;
+
+    std::vector<llama_token> session_tokens;
+    std::vector<llama_token> embd_inp;
+    std::string path_session;
+    struct common_sampler* smpl = nullptr;
+    int batch_pos = 0;
+
+    ~Job() {
+        if (smpl) {
+            common_sampler_free(smpl);
+        }
+    }
 };
 
 #endif // JOB_H
diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h
@@ -57,4 +57,16 @@ struct CompletionResult
 	float tps;
 };
 
+struct LoadingParameters
+{
+	int n_ctx = 4096;
+	int n_keep = 2048;
+	bool use_mlock = true;
+	bool use_mmap = false;
+	bool cont_batching = true;
+	bool warmup = false;
+	int n_parallel = 1;
+	int n_gpu_layers = 100;
+};
+
 #endif // TYPES_H
diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib
diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib
diff --git a/include/config.hpp b/include/config.hpp
@@ -88,6 +88,13 @@ namespace Config
         constexpr float MAX_SIDEBAR_WIDTH = 400.0F;
     } // namespace ModelSettings
 
+	namespace DeploymentSettingsSidebar
+	{
+		constexpr float SIDEBAR_WIDTH = 200.0F;
+		constexpr float MIN_SIDEBAR_WIDTH = 200.0F;
+		constexpr float MAX_SIDEBAR_WIDTH = 400.0F;
+	} // namespace DeploymentSettingsSidebar
+
     namespace Color
     {
         constexpr ImVec4 TRANSPARENT_COL = ImVec4(0.0F, 0.0F, 0.0F, 0.0F);

diff --git a/include/model/model_loader_config_manager.hpp b/include/model/model_loader_config_manager.hpp
@@ -0,0 +1,113 @@
+#ifndef MODEL_LOADER_CONFIG_MANAGER_HPP
+#define MODEL_LOADER_CONFIG_MANAGER_HPP
+
+#include "model_loader_config_persistence.hpp"
+
+#include <string>
+#include <json.hpp>
+#include <types.h>
+#include <iostream>
+
+namespace Model
+{
+    /**
+     * @brief Class for managing LLM model loading configuration
+     */
+    class ModelLoaderConfigManager {
+    public:
+        /**
+         * @brief Get singleton instance of config manager
+         * @param configFilePath Path to the configuration file (optional on first call)
+         * @return Reference to the singleton instance
+         */
+        static ModelLoaderConfigManager& getInstance(const std::string& configFilePath = "")
+        {
+            static ModelLoaderConfigManager instance(configFilePath.empty() ? "model_config.json" : configFilePath);
+
+            if (!configFilePath.empty() && configFilePath != instance.configFilePath_) {
+                // Log a warning that the config file path is being ignored after initialization
+                std::cerr << "Warning: Config file path '" << configFilePath
+                    << "' is ignored as the instance is already initialized with '"
+                    << instance.configFilePath_ << "'" << std::endl;
+            }
+
+            return instance;
+        }
+
+        // Delete copy constructor and assignment operator
+        ModelLoaderConfigManager(const ModelLoaderConfigManager&) = delete;
+        ModelLoaderConfigManager& operator=(const ModelLoaderConfigManager&) = delete;
+
+        /**
+         * @brief Get the current configuration
+         * @return Reference to the current configuration
+         */
+        const LoadingParameters& getConfig() const {
+            return config_;
+        }
+
+        /**
+         * @brief Set a complete new configuration
+         * @param config The new configuration
+         */
+        void setConfig(const LoadingParameters& config) {
+            config_ = config;
+        }
+
+        /**
+         * @brief Save current configuration to disk
+         * @return true if successful, false otherwise
+         */
+        bool saveConfig() {
+            return persistence_.saveToFile(config_, configFilePath_);
+        }
+
+        /**
+         * @brief Load configuration from disk
+         * @return true if successful, false otherwise
+         */
+        bool loadConfig() {
+            return persistence_.loadFromFile(configFilePath_, config_);
+        }
+
+        // Getters
+        int getContextSize() const { return config_.n_ctx; }
+        int getKeepSize() const { return config_.n_keep; }
+        bool getUseMlock() const { return config_.use_mlock; }
+        bool getUseMmap() const { return config_.use_mmap; }
+        bool getContinuousBatching() const { return config_.cont_batching; }
+        bool getWarmup() const { return config_.warmup; }
+        int getParallelCount() const { return config_.n_parallel; }
+        int getGpuLayers() const { return config_.n_gpu_layers; }
+
+        // Setters
+        void setContextSize(int size) { config_.n_ctx = size; }
+        void setKeepSize(int size) { config_.n_keep = size; }
+        void setUseMlock(bool use) { config_.use_mlock = use; }
+        void setUseMmap(bool use) { config_.use_mmap = use; }
+        void setContinuousBatching(bool enable) { config_.cont_batching = enable; }
+        void setWarmup(bool enable) { config_.warmup = enable; }
+        void setParallelCount(int count) { config_.n_parallel = count; }
+        void setGpuLayers(int layers) { config_.n_gpu_layers = layers; }
+
+    private:
+        explicit ModelLoaderConfigManager(const std::string& configFilePath)
+            : configFilePath_(configFilePath) {
+            // Try loading from file, if it fails, use default values
+            if (!loadConfig()) {
+                std::cout << "Using default configuration values" << std::endl;
+            }
+        }
+
+        LoadingParameters config_;
+        std::string configFilePath_;
+        ModelLoaderConfigPersistence persistence_;
+    };
+
+	inline void initializeModelLoaderConfigManager(const std::string& configFilePath = "") {
+		ModelLoaderConfigManager::getInstance(configFilePath);
+	}
+
+} // namespace Model
+
+#endif // MODEL_LOADER_CONFIG_MANAGER_HPP
diff --git a/include/model/model_loader_config_persistence.hpp b/include/model/model_loader_config_persistence.hpp
@@ -0,0 +1,95 @@
+#ifndef MODEL_LOADER_CONFIG_PERSISTENCE_HPP
+#define MODEL_LOADER_CONFIG_PERSISTENCE_HPP
+
+#include <string>
+#include <json.hpp>
+#include <types.h>
+
+namespace Model
+{
+    class ModelLoaderConfigPersistence {
+    public:
+        /**
+         * @brief Save configuration to a JSON file
+         * @param config The model loader configuration
+         * @param filePath Path to save the configuration
+         * @return true if successful, false otherwise
+         */
+        bool saveToFile(const LoadingParameters& config, const std::string& filePath) {
+            try {
+                nlohmann::json j = configToJson(config);
+
+                std::ofstream file(filePath);
+                if (!file.is_open()) {
+                    std::cerr << "Error: Could not open file for writing: " << filePath << std::endl;
+                    return false;
+                }
+
+                file << j.dump(4); // Pretty print with 4 spaces indentation
+                file.close();
+
+                return true;
+            }
+            catch (const std::exception& e) {
+                std::cerr << "Error saving configuration: " << e.what() << std::endl;
+                return false;
+            }
+        }
+
+        /**
+         * @brief Load configuration from a JSON file
+         * @param filePath Path to the configuration file
+         * @param config The configuration to populate
+         * @return true if successful, false otherwise
+         */
+        bool loadFromFile(const std::string& filePath, LoadingParameters& config) {
+            try {
+                std::ifstream file(filePath);
+                if (!file.is_open()) {
+                    std::cerr << "Error: Could not open file for reading: " << filePath << std::endl;
+                    return false;
+                }
+
+                nlohmann::json j;
+                file >> j;
+                file.close();
+
+                jsonToConfig(j, config);
+                return true;
+            }
+            catch (const std::exception& e) {
+                std::cerr << "Error loading configuration: " << e.what() << std::endl;
+                return false;
+            }
+        }
+
+    private:
+        nlohmann::json configToJson(const LoadingParameters& config) {
+            nlohmann::json j;
+
+            j["n_ctx"] = config.n_ctx;
+            j["n_keep"] = config.n_keep;
+            j["use_mlock"] = config.use_mlock;
+            j["use_mmap"] = config.use_mmap;
+            j["cont_batching"] = config.cont_batching;
+            j["warmup"] = config.warmup;
+            j["n_parallel"] = config.n_parallel;
+            j["n_gpu_layers"] = config.n_gpu_layers;
+
+            return j;
+        }
+
+        void jsonToConfig(const nlohmann::json& json, LoadingParameters& config) {
+            if (json.contains("n_ctx")) config.n_ctx = json["n_ctx"];
+            if (json.contains("n_keep")) config.n_keep = json["n_keep"];
+            if (json.contains("use_mlock")) config.use_mlock = json["use_mlock"];
+            if (json.contains("use_mmap")) config.use_mmap = json["use_mmap"];
+            if (json.contains("cont_batching")) config.cont_batching = json["cont_batching"];
+            if (json.contains("warmup")) config.warmup = json["warmup"];
+            if (json.contains("n_parallel")) config.n_parallel = json["n_parallel"];
+            if (json.contains("n_gpu_layers")) config.n_gpu_layers = json["n_gpu_layers"];
+        }
+    };
+} // namespace Model
+
+#endif // MODEL_LOADER_CONFIG_PERSISTENCE_HPP
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,4 +24,6 @@ build/* @@
     out/*
     # debugging files
-    *.pdb
+    *.pdb
+    **/**/__pycache__/*