diff --git a/.gitignore b/.gitignore index ef0a665..c4cc200 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,6 @@ build/* out/* # debugging files -*.pdb \ No newline at end of file +*.pdb + +**/**/__pycache__/* \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index fa7941a..ee5f9a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,7 @@ [submodule "external/imspinner"] path = external/imspinner url = https://github.com/dalerank/imspinner +[submodule "kolosal-server"] + path = kolosal-server + url = https://github.com/genta-technology/kolosal-server + branch = dev diff --git a/CMakeLists.txt b/CMakeLists.txt index 2af203c..263118e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -124,6 +124,7 @@ target_include_directories(kolosal_lib PUBLIC ${EXTERNAL_DIR}/imspinner ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/assets + ${CMAKE_SOURCE_DIR}/kolosal-server/include ${CURL_INCLUDE_DIR} ) @@ -165,6 +166,9 @@ else() ) endif() +# ==== Kolosal Server Shared Library ==== +add_subdirectory(${CMAKE_SOURCE_DIR}/kolosal-server) + # ==== Main Executable ==== if (DEBUG) add_executable(KolosalDesktop @@ -178,7 +182,11 @@ else() ) endif() -target_link_libraries(KolosalDesktop PRIVATE kolosal_lib) +# Link both the engine (kolosal_lib) and the Kolosal server shared library. +target_link_libraries(KolosalDesktop PRIVATE + kolosal_lib + kolosal_server +) # ==== Post-Build Commands ==== # Copy fonts @@ -219,6 +227,15 @@ add_custom_command( "${EXTERNAL_DIR}/curl/bin" "$" ) +# Copy Kolosal Server DLL +add_custom_command( + TARGET KolosalDesktop POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "$" + "$" + COMMENT "Copying Kolosal Server DLL to output directory" +) + # Copy Inference Engine DLLs add_custom_command( TARGET KolosalDesktop POST_BUILD diff --git a/CMakeSettings.json b/CMakeSettings.json index 7a17477..f5ff921 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -31,8 +31,7 @@ "cmakeCommandArgs": "-DDEBUG=ON", "buildCommandArgs": "", "ctestCommandArgs": "", - "inheritEnvironments": [ "msvc_x64_x64" ], - "variables": [] + "inheritEnvironments": [ "msvc_x64_x64" ] } ] } \ No newline at end of file diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll index c16b57e..56890bd 100644 Binary files a/external/genta-personal/bin/InferenceEngineLib.dll and b/external/genta-personal/bin/InferenceEngineLib.dll differ diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll index 7a5ef37..07e8ca2 100644 Binary files a/external/genta-personal/bin/InferenceEngineLibVulkan.dll and b/external/genta-personal/bin/InferenceEngineLibVulkan.dll differ diff --git a/external/genta-personal/include/inference.h b/external/genta-personal/include/inference.h index e9b999d..2220251 100644 --- a/external/genta-personal/include/inference.h +++ b/external/genta-personal/include/inference.h @@ -34,7 +34,7 @@ class INFERENCE_API InferenceEngine : public IInferenceEngine public: explicit InferenceEngine(); - bool loadModel(const char* engineDir, const int mainGpuId = -1); + bool loadModel(const char* engineDir, const LoadingParameters lParams, const int mainGpuId = -1); bool unloadModel(); diff --git a/external/genta-personal/include/inference_interface.h b/external/genta-personal/include/inference_interface.h index ae5ec3e..541a40d 100644 --- a/external/genta-personal/include/inference_interface.h +++ b/external/genta-personal/include/inference_interface.h @@ -17,7 +17,7 @@ class IInferenceEngine { public: virtual ~IInferenceEngine() = default; - virtual bool loadModel(const char* engineDir, const int mainGpuId = -1) = 0; + virtual bool loadModel(const char* engineDir, const LoadingParameters lParams, const int mainGpuId = -1) = 0; virtual bool unloadModel() = 0; virtual int submitCompletionsJob(const CompletionParameters& params) = 0; virtual int submitChatCompletionsJob(const ChatCompletionParameters& params) = 0; diff --git a/external/genta-personal/include/job.h b/external/genta-personal/include/job.h index b22257d..2aeaace 100644 --- a/external/genta-personal/include/job.h +++ b/external/genta-personal/include/job.h @@ -9,6 +9,11 @@ #include #include +#include "types.h" +#include "llama.h" +#include "common.h" +#include "sampling.h" + struct Job { int jobId; std::mutex mtx; @@ -20,6 +25,27 @@ struct Job { std::string errorMessage; float tps = 0; std::atomic cancelRequested{ false }; + CompletionParameters params; + + bool isDecodingPrompt = true; + + int n_past; + int n_remain; + int i_prompt; + int n_prompt; + size_t n_matching_session_tokens; + + std::vector session_tokens; + std::vector embd_inp; + std::string path_session; + struct common_sampler* smpl = nullptr; + int batch_pos = 0; + + ~Job() { + if (smpl) { + common_sampler_free(smpl); + } + } }; #endif // JOB_H \ No newline at end of file diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h index decd41e..49796ec 100644 --- a/external/genta-personal/include/types.h +++ b/external/genta-personal/include/types.h @@ -57,4 +57,16 @@ struct CompletionResult float tps; }; +struct LoadingParameters +{ + int n_ctx = 4096; + int n_keep = 2048; + bool use_mlock = true; + bool use_mmap = false; + bool cont_batching = true; + bool warmup = false; + int n_parallel = 1; + int n_gpu_layers = 100; +}; + #endif // TYPES_H \ No newline at end of file diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib index 6afa937..86c9cd4 100644 Binary files a/external/genta-personal/lib/InferenceEngineLib.lib and b/external/genta-personal/lib/InferenceEngineLib.lib differ diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib index ee187ed..d477528 100644 Binary files a/external/genta-personal/lib/InferenceEngineLibVulkan.lib and b/external/genta-personal/lib/InferenceEngineLibVulkan.lib differ diff --git a/include/config.hpp b/include/config.hpp index bb882ae..27fef43 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -88,6 +88,13 @@ namespace Config constexpr float MAX_SIDEBAR_WIDTH = 400.0F; } // namespace ModelSettings + namespace DeploymentSettingsSidebar + { + constexpr float SIDEBAR_WIDTH = 200.0F; + constexpr float MIN_SIDEBAR_WIDTH = 200.0F; + constexpr float MAX_SIDEBAR_WIDTH = 400.0F; + } // namespace DeploymentSettingsSidebar + namespace Color { constexpr ImVec4 TRANSPARENT_COL = ImVec4(0.0F, 0.0F, 0.0F, 0.0F); diff --git a/include/model/model_loader_config_manager.hpp b/include/model/model_loader_config_manager.hpp new file mode 100644 index 0000000..b4c153c --- /dev/null +++ b/include/model/model_loader_config_manager.hpp @@ -0,0 +1,113 @@ +#ifndef MODEL_LOADER_CONFIG_MANAGER_HPP +#define MODEL_LOADER_CONFIG_MANAGER_HPP + +#include "model_loader_config_persistence.hpp" + +#include +#include +#include +#include + +namespace Model +{ + /** + * @brief Class for managing LLM model loading configuration + */ + class ModelLoaderConfigManager { + public: + /** + * @brief Get singleton instance of config manager + * @param configFilePath Path to the configuration file (optional on first call) + * @return Reference to the singleton instance + */ + static ModelLoaderConfigManager& getInstance(const std::string& configFilePath = "") + { + static ModelLoaderConfigManager instance(configFilePath.empty() ? "model_config.json" : configFilePath); + + if (!configFilePath.empty() && configFilePath != instance.configFilePath_) { + // Log a warning that the config file path is being ignored after initialization + std::cerr << "Warning: Config file path '" << configFilePath + << "' is ignored as the instance is already initialized with '" + << instance.configFilePath_ << "'" << std::endl; + } + + return instance; + } + + // Delete copy constructor and assignment operator + ModelLoaderConfigManager(const ModelLoaderConfigManager&) = delete; + ModelLoaderConfigManager& operator=(const ModelLoaderConfigManager&) = delete; + + /** + * @brief Get the current configuration + * @return Reference to the current configuration + */ + const LoadingParameters& getConfig() const { + return config_; + } + + /** + * @brief Set a complete new configuration + * @param config The new configuration + */ + void setConfig(const LoadingParameters& config) { + config_ = config; + } + + /** + * @brief Save current configuration to disk + * @return true if successful, false otherwise + */ + bool saveConfig() { + return persistence_.saveToFile(config_, configFilePath_); + } + + /** + * @brief Load configuration from disk + * @return true if successful, false otherwise + */ + bool loadConfig() { + return persistence_.loadFromFile(configFilePath_, config_); + } + + // Getters + int getContextSize() const { return config_.n_ctx; } + int getKeepSize() const { return config_.n_keep; } + bool getUseMlock() const { return config_.use_mlock; } + bool getUseMmap() const { return config_.use_mmap; } + bool getContinuousBatching() const { return config_.cont_batching; } + bool getWarmup() const { return config_.warmup; } + int getParallelCount() const { return config_.n_parallel; } + int getGpuLayers() const { return config_.n_gpu_layers; } + + // Setters + void setContextSize(int size) { config_.n_ctx = size; } + void setKeepSize(int size) { config_.n_keep = size; } + void setUseMlock(bool use) { config_.use_mlock = use; } + void setUseMmap(bool use) { config_.use_mmap = use; } + void setContinuousBatching(bool enable) { config_.cont_batching = enable; } + void setWarmup(bool enable) { config_.warmup = enable; } + void setParallelCount(int count) { config_.n_parallel = count; } + void setGpuLayers(int layers) { config_.n_gpu_layers = layers; } + + private: + explicit ModelLoaderConfigManager(const std::string& configFilePath) + : configFilePath_(configFilePath) { + // Try loading from file, if it fails, use default values + if (!loadConfig()) { + std::cout << "Using default configuration values" << std::endl; + } + } + + LoadingParameters config_; + std::string configFilePath_; + ModelLoaderConfigPersistence persistence_; + }; + + inline void initializeModelLoaderConfigManager(const std::string& configFilePath = "") { + ModelLoaderConfigManager::getInstance(configFilePath); + } + +} // namespace Model + +#endif // MODEL_LOADER_CONFIG_MANAGER_HPP \ No newline at end of file diff --git a/include/model/model_loader_config_persistence.hpp b/include/model/model_loader_config_persistence.hpp new file mode 100644 index 0000000..2ed61ba --- /dev/null +++ b/include/model/model_loader_config_persistence.hpp @@ -0,0 +1,95 @@ +#ifndef MODEL_LOADER_CONFIG_PERSISTENCE_HPP +#define MODEL_LOADER_CONFIG_PERSISTENCE_HPP + +#include +#include +#include + +namespace Model +{ + class ModelLoaderConfigPersistence { + public: + /** + * @brief Save configuration to a JSON file + * @param config The model loader configuration + * @param filePath Path to save the configuration + * @return true if successful, false otherwise + */ + bool saveToFile(const LoadingParameters& config, const std::string& filePath) { + try { + nlohmann::json j = configToJson(config); + + std::ofstream file(filePath); + if (!file.is_open()) { + std::cerr << "Error: Could not open file for writing: " << filePath << std::endl; + return false; + } + + file << j.dump(4); // Pretty print with 4 spaces indentation + file.close(); + + return true; + } + catch (const std::exception& e) { + std::cerr << "Error saving configuration: " << e.what() << std::endl; + return false; + } + } + + /** + * @brief Load configuration from a JSON file + * @param filePath Path to the configuration file + * @param config The configuration to populate + * @return true if successful, false otherwise + */ + bool loadFromFile(const std::string& filePath, LoadingParameters& config) { + try { + std::ifstream file(filePath); + if (!file.is_open()) { + std::cerr << "Error: Could not open file for reading: " << filePath << std::endl; + return false; + } + + nlohmann::json j; + file >> j; + file.close(); + + jsonToConfig(j, config); + return true; + } + catch (const std::exception& e) { + std::cerr << "Error loading configuration: " << e.what() << std::endl; + return false; + } + } + + private: + nlohmann::json configToJson(const LoadingParameters& config) { + nlohmann::json j; + + j["n_ctx"] = config.n_ctx; + j["n_keep"] = config.n_keep; + j["use_mlock"] = config.use_mlock; + j["use_mmap"] = config.use_mmap; + j["cont_batching"] = config.cont_batching; + j["warmup"] = config.warmup; + j["n_parallel"] = config.n_parallel; + j["n_gpu_layers"] = config.n_gpu_layers; + + return j; + } + + void jsonToConfig(const nlohmann::json& json, LoadingParameters& config) { + if (json.contains("n_ctx")) config.n_ctx = json["n_ctx"]; + if (json.contains("n_keep")) config.n_keep = json["n_keep"]; + if (json.contains("use_mlock")) config.use_mlock = json["use_mlock"]; + if (json.contains("use_mmap")) config.use_mmap = json["use_mmap"]; + if (json.contains("cont_batching")) config.cont_batching = json["cont_batching"]; + if (json.contains("warmup")) config.warmup = json["warmup"]; + if (json.contains("n_parallel")) config.n_parallel = json["n_parallel"]; + if (json.contains("n_gpu_layers")) config.n_gpu_layers = json["n_gpu_layers"]; + } + }; +} // namespace Model + +#endif // MODEL_LOADER_CONFIG_PERSISTENCE_HPP \ No newline at end of file diff --git a/include/model/model_manager.hpp b/include/model/model_manager.hpp index 15bd630..aea172f 100644 --- a/include/model/model_manager.hpp +++ b/include/model/model_manager.hpp @@ -2,7 +2,9 @@ #include "preset_manager.hpp" #include "model_persistence.hpp" +#include "model_loader_config_manager.hpp" +#include #include #include #include @@ -249,6 +251,35 @@ namespace Model // Inference Engine //-------------------------------------------------------------------------------------------- + ChatCompletionParameters buildChatCompletionParameters( + const ChatCompletionRequest& request) { + ChatCompletionParameters params; + + // Copy messages from the request + for (const auto& msg : request.messages) { + params.messages.push_back({ msg.role, msg.content }); + } + + // Map parameters from request to our format + if (request.seed.has_value()) { + params.randomSeed = request.seed.value(); + } + + if (request.max_tokens.has_value()) { + params.maxNewTokens = request.max_tokens.value(); + } + else { + // Use a reasonable default if not specified + params.maxNewTokens = 1024; + } + + params.temperature = request.temperature; + params.topP = request.top_p; + params.streaming = request.stream; + + return params; + } + ChatCompletionParameters buildChatCompletionParameters( const Chat::ChatHistory& currentChat, const std::string& userInput @@ -337,12 +368,6 @@ namespace Model return completionParams; } - void setStreamingCallback(std::function callback) - { - std::unique_lock lock(m_mutex); - m_streamingCallback = std::move(callback); - } - bool stopJob(int jobId) { std::shared_lock lock(m_mutex); @@ -355,7 +380,138 @@ namespace Model return true; } - int startCompletionJob(const CompletionParameters& params) + CompletionResult completeSync(const CompletionParameters& params) + { + { + std::shared_lock lock(m_mutex); + if (!m_inferenceEngine) + { + std::cerr << "[ModelManager] Inference engine is not initialized.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + if (!m_modelLoaded) + { + std::cerr << "[ModelManager] No model is currently loaded.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + } + + int jobId = m_inferenceEngine->submitCompletionsJob(params); + if (jobId < 0) { + std::cerr << "[ModelManager] Failed to submit completions job.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + + // Add job ID with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.push_back(jobId); + } + + // Wait for the job to complete + m_inferenceEngine->waitForJob(jobId); + + // Get the final result + CompletionResult result = m_inferenceEngine->getJobResult(jobId); + + // Check for errors + if (m_inferenceEngine->hasJobError(jobId)) { + std::cerr << "[ModelManager] Error in completion job: " + << m_inferenceEngine->getJobError(jobId) << std::endl; + } + + // Clean up with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); + } + + return result; + } + + CompletionResult chatCompleteSync(const ChatCompletionParameters& params) + { + { + std::shared_lock lock(m_mutex); + if (!m_inferenceEngine) + { + std::cerr << "[ModelManager] Inference engine is not initialized.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + if (!m_modelLoaded) + { + std::cerr << "[ModelManager] No model is currently loaded.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + } + + int jobId = m_inferenceEngine->submitChatCompletionsJob(params); + if (jobId < 0) { + std::cerr << "[ModelManager] Failed to submit chat completions job.\n"; + CompletionResult result; + result.text = ""; + result.tps = 0.0F; + return result; + } + + // Add job ID with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.push_back(jobId); + } + + auto& chatManager = Chat::ChatManager::getInstance(); + + // Wait for the job to complete + m_inferenceEngine->waitForJob(jobId); + + // Get the final result + CompletionResult result = m_inferenceEngine->getJobResult(jobId); + + // Check for errors + if (m_inferenceEngine->hasJobError(jobId)) { + std::cerr << "[ModelManager] Error in chat completion job: " + << m_inferenceEngine->getJobError(jobId) << std::endl; + } + + // Clean up with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); + } + + // Save the chat history + auto chatName = chatManager.getChatNameByJobId(jobId); + if (!chatManager.saveChat(chatName)) + { + std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; + } + + // Reset jobid tracking on chat manager + if (!chatManager.removeJobId(jobId)) + { + std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + } + + return result; + } + + int startCompletionJob(const CompletionParameters& params, std::function streamingCallback) { { std::shared_lock lock(m_mutex); @@ -377,44 +533,45 @@ namespace Model return -1; } - m_jobIds.push_back(jobId); + // Add job ID with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.push_back(jobId); + } - std::thread([this, jobId]() { + std::thread([this, jobId, streamingCallback]() { // Poll while job is running or until the engine says it's done - this->setModelGenerationInProgress(true); while (true) { if (this->m_inferenceEngine->hasJobError(jobId)) break; CompletionResult partial = this->m_inferenceEngine->getJobResult(jobId); + bool isFinished = this->m_inferenceEngine->isJobFinished(jobId); if (!partial.text.empty()) { - // Call the user�s callback - // (hold shared lock if needed to be thread-safe) - std::shared_lock lock(m_mutex); - if (m_streamingCallback) { - m_streamingCallback(partial.text, partial.tps, jobId); + // Call the user's callback (no need to lock for the callback) + if (streamingCallback) { + streamingCallback(partial.text, partial.tps, jobId, isFinished); } } - if (this->m_inferenceEngine->isJobFinished(jobId)) break; + if (isFinished) break; // Sleep briefly to avoid busy-waiting std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - this->setModelGenerationInProgress(false); - + // Remove job ID with proper synchronization { - // remove job id from m_jobIds - m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); + std::unique_lock lock(m_mutex); + m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); } - // Reset jobid tracking on chat manager to -1 + // Reset jobid tracking on chat manager { if (!Chat::ChatManager::getInstance().removeJobId(jobId)) { - std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; + std::cerr << "[ModelManager] Failed to remove job id from chat manager.\n"; } } }).detach(); @@ -422,7 +579,7 @@ namespace Model return jobId; } - int startChatCompletionJob(const ChatCompletionParameters& params) + int startChatCompletionJob(const ChatCompletionParameters& params, std::function streamingCallback) { { std::shared_lock lock(m_mutex); @@ -444,11 +601,14 @@ namespace Model return -1; } - m_jobIds.push_back(jobId); + // Add job ID with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.push_back(jobId); + } - std::thread([this, jobId]() { + std::thread([this, jobId, streamingCallback]() { // Poll while job is running or until the engine says it's done - this->setModelGenerationInProgress(true); auto& chatManager = Chat::ChatManager::getInstance(); while (true) @@ -456,38 +616,37 @@ namespace Model if (this->m_inferenceEngine->hasJobError(jobId)) break; CompletionResult partial = this->m_inferenceEngine->getJobResult(jobId); + bool isFinished = this->m_inferenceEngine->isJobFinished(jobId); if (!partial.text.empty()) { - // Call the user�s callback - std::shared_lock lock(m_mutex); - if (m_streamingCallback) { - m_streamingCallback(partial.text, partial.tps, jobId); + // Call the user's callback (no need to lock for the callback) + if (streamingCallback) { + streamingCallback(partial.text, partial.tps, jobId, isFinished); } } - if (this->m_inferenceEngine->isJobFinished(jobId)) break; + if (isFinished) break; // Sleep briefly to avoid busy-waiting std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - this->setModelGenerationInProgress(false); - - { - // remove job id from m_jobIds - m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); - } + // Remove job ID with proper synchronization + { + std::unique_lock lock(m_mutex); + m_jobIds.erase(std::remove(m_jobIds.begin(), m_jobIds.end(), jobId), m_jobIds.end()); + } - // save the chat history - { + // Save the chat history + { auto chatName = chatManager.getChatNameByJobId(jobId); if (!chatManager.saveChat(chatName)) { std::cerr << "[ModelManager] Failed to save chat: " << chatName << std::endl; } - } + } - // Reset jobid tracking on chat manager to -1 + // Reset jobid tracking on chat manager { if (!chatManager.removeJobId(jobId)) { @@ -501,6 +660,7 @@ namespace Model bool isJobFinished(int jobId) { + std::shared_lock lock(m_mutex); if (!m_inferenceEngine) { std::cerr << "[ModelManager] Inference engine is not initialized.\n"; @@ -511,26 +671,29 @@ namespace Model CompletionResult getJobResult(int jobId) { + std::shared_lock lock(m_mutex); if (!m_inferenceEngine) { std::cerr << "[ModelManager] Inference engine is not initialized.\n"; return { {}, "" }; } - return m_inferenceEngine->getJobResult(jobId); + return m_inferenceEngine->getJobResult(jobId); } bool hasJobError(int jobId) { + std::shared_lock lock(m_mutex); if (!m_inferenceEngine) { std::cerr << "[ModelManager] Inference engine is not initialized.\n"; return true; } - return m_inferenceEngine->hasJobError(jobId); + return m_inferenceEngine->hasJobError(jobId); } std::string getJobError(int jobId) { + std::shared_lock lock(m_mutex); if (!m_inferenceEngine) { std::cerr << "[ModelManager] Inference engine is not initialized.\n"; @@ -539,6 +702,318 @@ namespace Model return m_inferenceEngine->getJobError(jobId); } + //-------------------------------------------------------------------------------------------- + // Server management + //-------------------------------------------------------------------------------------------- + + bool startServer(const std::string& port) { + // Stop any existing server + kolosal::ServerAPI::instance().shutdown(); + + // Initialize logger + Logger::instance().setLogFile("model_server.log"); + Logger::instance().setLevel(LogLevel::SERVER_INFO); + Logger::logInfo("Starting model server on port %s", port.c_str()); + + // Set inference callbacks + kolosal::ServerAPI::instance().setInferenceCallback( + [this](const ChatCompletionRequest& request) { + return this->handleNonStreamingRequest(request); + } + ); + + kolosal::ServerAPI::instance().setStreamingInferenceCallback( + [this](const ChatCompletionRequest& request, + const std::string& requestId, + int chunkIndex, + ChatCompletionChunk& outputChunk) { + return this->handleStreamingRequest(request, requestId, chunkIndex, outputChunk); + } + ); + + // Initialize and start the server + if (!kolosal::ServerAPI::instance().init(port)) { + Logger::logError("Failed to start model server"); + return false; + } + + Logger::logInfo("Model server started successfully"); + return true; + } + + void stopServer() { + Logger::logInfo("Stopping model server"); + kolosal::ServerAPI::instance().shutdown(); + } + + ChatCompletionResponse handleNonStreamingRequest(const ChatCompletionRequest& request) { + // Build parameters from the incoming request. + ChatCompletionParameters params = buildChatCompletionParameters(request); + // (The parameters will include the messages and other fields.) + params.streaming = false; + + // Invoke the synchronous chat completion method. + CompletionResult result = chatCompleteSync(params); + + // Map the engine’s result to our ChatCompletionResponse. + ChatCompletionResponse response = convertToChatResponse(request, result); + return response; + } + + bool ModelManager::handleStreamingRequest( + const ChatCompletionRequest& request, + const std::string& requestId, + int chunkIndex, + ChatCompletionChunk& outputChunk) { + // Look up (or create) the StreamingContext for this requestId. + std::shared_ptr ctx; + { + std::unique_lock lock(m_streamContextsMutex); + auto it = m_streamingContexts.find(requestId); + if (it == m_streamingContexts.end()) { + // For the very first chunk (chunkIndex==0) we create a new context. + if (chunkIndex == 0) { + ctx = std::make_shared(); + m_streamingContexts[requestId] = ctx; + } + else { + // If no context and chunk index is not zero, something is wrong. + Logger::logError("[ModelManager] Streaming context not found for requestId: %s", + requestId.c_str()); + return false; + } + } + else { + ctx = it->second; + } + } + + // If this is the first call (chunkIndex 0), start the asynchronous job. + if (chunkIndex == 0) { + // Build parameters with streaming enabled. + ChatCompletionParameters params = buildChatCompletionParameters(request); + params.streaming = true; + + // Track the job ID and model name for this request + int jobId = -1; + + { + std::lock_guard lock(ctx->mtx); + ctx->model = request.model; + ctx->jobId = m_inferenceEngine->submitChatCompletionsJob(params); + jobId = ctx->jobId; + } + + if (jobId < 0) { + Logger::logError("[ModelManager] Failed to submit chat completions job for requestId: %s", + requestId.c_str()); + { + std::lock_guard lock(ctx->mtx); + ctx->error = true; + ctx->errorMessage = "Failed to start completion job"; + ctx->finished = true; + } + { + std::unique_lock lock(m_streamContextsMutex); + m_streamingContexts.erase(requestId); + } + return false; + } + + // Add job ID with proper synchronization to the global tracking + { + std::unique_lock lock(m_mutex); + m_jobIds.push_back(jobId); + } + + // Launch an asynchronous thread that polls the job and accumulates new text. + std::thread([this, jobId, requestId, ctx]() { + std::string lastText; + auto startTime = std::chrono::steady_clock::now(); + + try { + while (true) { + // Check if the job has an error + if (this->m_inferenceEngine->hasJobError(jobId)) { + std::string errorMsg = this->m_inferenceEngine->getJobError(jobId); + Logger::logError("[ModelManager] Streaming job error for jobId: %d - %s", + jobId, errorMsg.c_str()); + { + std::lock_guard lock(ctx->mtx); + ctx->error = true; + ctx->errorMessage = errorMsg; + ctx->finished = true; + } + ctx->cv.notify_all(); + break; + } + + // Get the current result and check if finished + CompletionResult partial = this->m_inferenceEngine->getJobResult(jobId); + bool isFinished = this->m_inferenceEngine->isJobFinished(jobId); + + // Compute delta text (only new text since last poll). + std::string newText; + if (partial.text.size() > lastText.size()) { + newText = partial.text.substr(lastText.size()); + lastText = partial.text; + } + + // If we have new text, add it to the chunks + if (!newText.empty()) { + { + std::lock_guard lock(ctx->mtx); + ctx->chunks.push_back(newText); + } + ctx->cv.notify_all(); + } + + // If the job is finished, set the finished flag and break + if (isFinished) { + auto endTime = std::chrono::steady_clock::now(); + auto durationMs = std::chrono::duration_cast( + endTime - startTime).count(); + + Logger::logInfo("[ModelManager] Streaming job %d completed in %lld ms", + jobId, durationMs); + + { + std::lock_guard lock(ctx->mtx); + ctx->finished = true; + } + ctx->cv.notify_all(); + break; + } + } + } + catch (const std::exception& e) { + Logger::logError("[ModelManager] Exception in streaming thread: %s", e.what()); + { + std::lock_guard lock(ctx->mtx); + ctx->error = true; + ctx->errorMessage = e.what(); + ctx->finished = true; + } + ctx->cv.notify_all(); + } + + // Clean up job ID tracking + { + std::unique_lock lock(this->m_mutex); + this->m_jobIds.erase( + std::remove(this->m_jobIds.begin(), this->m_jobIds.end(), jobId), + this->m_jobIds.end()); + } + + // We don't erase the streaming context here - that happens when the last chunk is requested + }).detach(); + } + + if (chunkIndex == 0) { + // First chunk - just send the role (OpenAI format) + outputChunk.id = requestId; + outputChunk.model = request.model; + + ChatCompletionChunkChoice choice; + choice.index = 0; + choice.delta.role = "assistant"; // Always "assistant" role for responses + choice.delta.content = ""; // Empty content in first chunk (just role) + choice.finish_reason = ""; // No finish reason yet + + outputChunk.choices.clear(); + outputChunk.choices.push_back(choice); + + // More chunks will follow + return true; + } + else { + // For chunkIndex > 0, wait for the (chunkIndex-1)-th text chunk or completion + std::unique_lock lock(ctx->mtx); + + // Wait with a timeout for better responsiveness + bool result = ctx->cv.wait_for(lock, std::chrono::seconds(30), [ctx, chunkIndex]() { + return (ctx->chunks.size() >= static_cast(chunkIndex)) || + ctx->finished || ctx->error; + }); + + if (!result) { + // If we timed out + Logger::logError("[ModelManager] Timeout waiting for chunk %d for requestId %s", + chunkIndex, requestId.c_str()); + + // Clean up and return error + std::unique_lock glock(m_streamContextsMutex); + m_streamingContexts.erase(requestId); + return false; + } + + // If an error occurred, clean up the context and signal termination + if (ctx->error) { + Logger::logError("[ModelManager] Error in streaming job for requestId %s: %s", + requestId.c_str(), ctx->errorMessage.c_str()); + + std::unique_lock glock(m_streamContextsMutex); + m_streamingContexts.erase(requestId); + return false; + } + + // If job is finished but we don't have this chunk, send a final chunk + if (ctx->chunks.size() < static_cast(chunkIndex) && ctx->finished) { + outputChunk.id = requestId; + outputChunk.model = ctx->model; + + ChatCompletionChunkChoice choice; + choice.index = 0; + choice.delta.content = ""; // Empty content + choice.finish_reason = "stop"; // Mark as final chunk + + outputChunk.choices.clear(); + outputChunk.choices.push_back(choice); + + // Clean up the context + { + std::unique_lock glock(m_streamContextsMutex); + m_streamingContexts.erase(requestId); + } + + return false; // No more chunks to send + } + + // Get the content for this chunk + std::string chunkContent = ctx->chunks[chunkIndex - 1]; + outputChunk.id = requestId; + outputChunk.model = ctx->model; + + ChatCompletionChunkChoice choice; + choice.index = 0; + choice.delta.content = chunkContent; + choice.finish_reason = ""; + + outputChunk.choices.clear(); + outputChunk.choices.push_back(choice); + + // Check if this is the last chunk + bool isLastChunk = ctx->finished && (ctx->chunks.size() == static_cast(chunkIndex)); + + if (isLastChunk) { + // Set finish reason for the last content chunk + choice.finish_reason = "stop"; + outputChunk.choices[0] = choice; + + // Clean up the context + { + std::unique_lock glock(m_streamContextsMutex); + m_streamingContexts.erase(requestId); + } + + return false; // No more chunks to send + } + + // More chunks to come + return true; + } + } + std::string getCurrentVariantForModel(const std::string& modelName) const { auto it = m_modelVariantMap.find(modelName); @@ -600,7 +1075,6 @@ namespace Model bool setModelGenerationInProgress(bool inProgress) { - std::unique_lock lock(m_mutex); m_modelGenerationInProgress = inProgress; return true; } @@ -1210,7 +1684,8 @@ namespace Model // Launch heavy loading in async task return std::async(std::launch::async, [this, modelDir]() { try { - bool success = m_inferenceEngine->loadModel(modelDir->c_str()); + bool success = m_inferenceEngine->loadModel(modelDir->c_str(), + ModelLoaderConfigManager::getInstance().getConfig()); { std::unique_lock lock(m_mutex); @@ -1273,7 +1748,13 @@ namespace Model void stopAllJobs() { - for (auto jobId : m_jobIds) + std::vector jobIdsCopy; + { + std::shared_lock lock(m_mutex); + jobIdsCopy = m_jobIds; + } + + for (auto jobId : jobIdsCopy) { stopJob(jobId); } @@ -1296,6 +1777,30 @@ namespace Model } } + static ChatCompletionResponse convertToChatResponse( + const ChatCompletionRequest& request, const CompletionResult& result) + { + ChatCompletionResponse response; + response.model = request.model; + + ChatCompletionChoice choice; + choice.index = 0; + choice.message.role = "assistant"; + choice.message.content = result.text; + // For simplicity we assume the response is complete. + choice.finish_reason = "stop"; + + response.choices.push_back(choice); + // For usage we make a simple estimate (adjust as needed) + response.usage.prompt_tokens = 0; + response.usage.completion_tokens = + static_cast(result.text.size() / 5); + response.usage.total_tokens = + response.usage.prompt_tokens + response.usage.completion_tokens; + + return response; + } + mutable std::shared_mutex m_mutex; std::unique_ptr m_persistence; std::vector m_models; @@ -1324,7 +1829,20 @@ namespace Model IInferenceEngine* m_inferenceEngine = nullptr; - std::function m_streamingCallback; + // Server related + struct StreamingContext { + std::mutex mtx; + std::condition_variable cv; + std::vector chunks; + std::string model; // Store model name + int jobId = -1; // Store job ID + std::string errorMessage; // Store error details + bool finished = false; + bool error = false; + }; + std::mutex m_streamContextsMutex; + std::unordered_map> + m_streamingContexts; }; inline void initializeModelManager() diff --git a/include/model/server_state_manager.hpp b/include/model/server_state_manager.hpp new file mode 100644 index 0000000..c58f77b --- /dev/null +++ b/include/model/server_state_manager.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include "model_manager.hpp" + +#include +#include +#include + +class ServerStateManager { +public: + static ServerStateManager& getInstance() { + static ServerStateManager instance; + return instance; + } + + // Server status + bool isServerRunning() const { return m_serverRunning; } + void setServerRunning(bool running) { m_serverRunning = running; } + + // Server port + int getServerPort() const { return m_serverPort; } + void setServerPort(int port) { m_serverPort = port; } + + // Get port as string for display and connection purposes + std::string getServerPortString() const { + return std::to_string(m_serverPort); + } + + // Model state observers + bool isModelLoadInProgress() const { + return Model::ModelManager::getInstance().isLoadInProgress(); + } + + bool isModelLoaded() const { + return Model::ModelManager::getInstance().isModelLoaded(); + } + + std::optional getCurrentModelName() const { + return Model::ModelManager::getInstance().getCurrentModelName(); + } + + // Model parameters change tracking + bool haveModelParamsChanged() const { return m_modelParamsChanged; } + void setModelParamsChanged(bool changed) { m_modelParamsChanged = changed; } + void resetModelParamsChanged() { m_modelParamsChanged = false; } + +private: + ServerStateManager() : m_serverRunning(false), m_serverPort(8080), m_modelParamsChanged(false) {} + + bool m_serverRunning; + int m_serverPort; + bool m_modelParamsChanged; +}; \ No newline at end of file diff --git a/include/ui/chat/chat_history.hpp b/include/ui/chat/chat_history.hpp index 15f762d..63688e5 100644 --- a/include/ui/chat/chat_history.hpp +++ b/include/ui/chat/chat_history.hpp @@ -198,6 +198,36 @@ class ChatHistoryRenderer { ImGui::EndGroup(); } + static void chatStreamingCallback(const std::string& partialOutput, const float tps, const int jobId, const bool isFinished) { + auto& chatManager = Chat::ChatManager::getInstance(); + auto& modelManager = Model::ModelManager::getInstance(); + std::string chatName = chatManager.getChatNameByJobId(jobId); + + if (isFinished) modelManager.setModelGenerationInProgress(false); + + auto chatOpt = chatManager.getChat(chatName); + if (chatOpt) { + Chat::ChatHistory chat = chatOpt.value(); + if (!chat.messages.empty() && chat.messages.back().role == "assistant") { + // Append to existing assistant message + chat.messages.back().content = partialOutput; + chat.messages.back().tps = tps; + chatManager.updateChat(chatName, chat); + } + else { + // Create new assistant message + Chat::Message assistantMsg; + assistantMsg.id = static_cast(chat.messages.size()) + 1; + assistantMsg.role = "assistant"; + assistantMsg.content = partialOutput; + assistantMsg.tps = tps; + assistantMsg.modelName = modelManager.getCurrentModelName().value_or("idk") + " | " + + modelManager.getCurrentVariantType(); + chatManager.addMessage(chatName, assistantMsg); + } + } + } + void regenerateResponse(int index) { Model::ModelManager& modelManager = Model::ModelManager::getInstance(); Chat::ChatManager& chatManager = Chat::ChatManager::getInstance(); @@ -282,10 +312,12 @@ class ChatHistoryRenderer { chatManager.getCurrentChat().value() ); - int jobId = modelManager.startChatCompletionJob(completionParams); + int jobId = modelManager.startChatCompletionJob(completionParams, chatStreamingCallback); if (!chatManager.setCurrentJobId(jobId)) { std::cerr << "[ChatSection] Failed to set the current job ID.\n"; } + + modelManager.setModelGenerationInProgress(true); } void renderMetadata(const Chat::Message& msg, int index, float bubbleWidth, float bubblePadding) diff --git a/include/ui/chat/chat_window.hpp b/include/ui/chat/chat_window.hpp index a59e38f..48ea786 100644 --- a/include/ui/chat/chat_window.hpp +++ b/include/ui/chat/chat_window.hpp @@ -249,6 +249,36 @@ class ChatWindow { } private: + static void chatStreamingCallback(const std::string& partialOutput, const float tps, const int jobId, const bool isFinished) { + auto& chatManager = Chat::ChatManager::getInstance(); + auto& modelManager = Model::ModelManager::getInstance(); + std::string chatName = chatManager.getChatNameByJobId(jobId); + + if (isFinished) modelManager.setModelGenerationInProgress(false); + + auto chatOpt = chatManager.getChat(chatName); + if (chatOpt) { + Chat::ChatHistory chat = chatOpt.value(); + if (!chat.messages.empty() && chat.messages.back().role == "assistant") { + // Append to existing assistant message + chat.messages.back().content = partialOutput; + chat.messages.back().tps = tps; + chatManager.updateChat(chatName, chat); + } + else { + // Create new assistant message + Chat::Message assistantMsg; + assistantMsg.id = static_cast(chat.messages.size()) + 1; + assistantMsg.role = "assistant"; + assistantMsg.content = partialOutput; + assistantMsg.tps = tps; + assistantMsg.modelName = modelManager.getCurrentModelName().value_or("idk") + " | " + + modelManager.getCurrentVariantType(); + chatManager.addMessage(chatName, assistantMsg); + } + } + } + // Render the row of buttons that allow the user to switch models or clear chat. void renderChatFeatureButtons(float baseX, float baseY) { Model::ModelManager& modelManager = Model::ModelManager::getInstance(); @@ -256,12 +286,19 @@ class ChatWindow { // Update the open-model manager button’s label dynamically. openModelManagerConfig.label = modelManager.getCurrentModelName().value_or("Select Model"); + openModelManagerConfig.tooltip = + modelManager.getCurrentModelName().value_or("Select Model"); if (modelManager.isLoadInProgress()) { openModelManagerConfig.label = "Loading Model..."; } + if (modelManager.isModelLoaded()) + { + openModelManagerConfig.icon = ICON_CI_SPARKLE_FILLED; + } + std::vector buttons = { openModelManagerConfig, clearChatButtonConfig }; Button::renderGroup(buttons, baseX, baseY); @@ -296,10 +333,12 @@ class ChatWindow { buildChatCompletionParameters(currentChat, message); auto& modelManager = Model::ModelManager::getInstance(); - int jobId = modelManager.startChatCompletionJob(completionParams); + int jobId = modelManager.startChatCompletionJob(completionParams, chatStreamingCallback); if (!chatManager.setCurrentJobId(jobId)) { std::cerr << "[ChatSection] Failed to set the current job ID.\n"; } + + modelManager.setModelGenerationInProgress(true); } InputFieldConfig createInputFieldConfig( diff --git a/include/ui/chat/model_manager_modal.hpp b/include/ui/chat/model_manager_modal.hpp index 7640498..fe93abd 100644 --- a/include/ui/chat/model_manager_modal.hpp +++ b/include/ui/chat/model_manager_modal.hpp @@ -270,6 +270,7 @@ class ModelCardRenderer { btnConfig.onClick = [variant, this]() { Model::ModelManager::getInstance().setPreferredVariant(m_model.name, variant); }; + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 4); Button::render(btnConfig); ImGui::SameLine(0.0f, 4.0f); @@ -280,6 +281,7 @@ class ModelCardRenderer { variantLabel.fontType = FontsManager::REGULAR; variantLabel.fontSize = FontsManager::SM; variantLabel.alignment = Alignment::LEFT; + ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 6); Label::render(variantLabel); }; diff --git a/include/ui/server/deployment_settings.hpp b/include/ui/server/deployment_settings.hpp new file mode 100644 index 0000000..f01e019 --- /dev/null +++ b/include/ui/server/deployment_settings.hpp @@ -0,0 +1,285 @@ +#pragma once + +#include "imgui.h" +#include "ui/widgets.hpp" +#include "model/model_loader_config_manager.hpp" + +#include +#include +#include + +namespace DeploymentSettingsConstants { + constexpr ImGuiWindowFlags SidebarFlags = + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoCollapse | + ImGuiWindowFlags_NoTitleBar | + ImGuiWindowFlags_NoBackground | + ImGuiWindowFlags_NoScrollbar; +} + +class ModelLoaderSettingsComponent { +public: + // Takes sidebarWidth by reference to always reflect the current width + ModelLoaderSettingsComponent(float& sidebarWidth) + : m_sidebarWidth(sidebarWidth) + { + // Initialize labels + m_contextSizeLabel = createLabel("Context Size", ICON_CI_BRACKET); + m_gpuLayersLabel = createLabel("GPU Layers", ICON_CI_CHIP); + m_systemSettingsLabel = createLabel("System Settings", ICON_CI_SERVER); + m_optimizationLabel = createLabel("Optimization Settings", ICON_CI_DASHBOARD); + } + + void render() { + auto& configManager = Model::ModelLoaderConfigManager::getInstance(); + auto& serverState = ServerStateManager::getInstance(); + + const float sliderWidth = m_sidebarWidth - 30; + + // n_ctx slider (context size) - using float for slider then converting back to int + { + int n_ctx = configManager.getContextSize(); + float n_ctx_float = static_cast(n_ctx); + Slider::render("##n_ctx", n_ctx_float, 1024.0f, 16384.0f, sliderWidth, "%.0f"); + int new_n_ctx = static_cast(n_ctx_float); + if (new_n_ctx != n_ctx) { + configManager.setContextSize(new_n_ctx); + configManager.saveConfig(); // Auto-save on change + serverState.setModelParamsChanged(true); // Mark params as changed + } + } + + // n_keep slider (keep size) - using float for slider then converting back to int + { + int n_keep = configManager.getKeepSize(); + float n_keep_float = static_cast(n_keep); + Slider::render("##n_keep", n_keep_float, 0.0f, static_cast(configManager.getContextSize()), sliderWidth, "%.0f"); + int new_n_keep = static_cast(n_keep_float); + if (new_n_keep != n_keep) { + configManager.setKeepSize(new_n_keep); + configManager.saveConfig(); // Auto-save on change + serverState.setModelParamsChanged(true); // Mark params as changed + } + } + + // n_gpu_layers slider - using float for slider then converting back to int + { + int n_gpu_layers = configManager.getGpuLayers(); + float n_gpu_layers_float = static_cast(n_gpu_layers); + Slider::render("##n_gpu_layers", n_gpu_layers_float, 0.0f, 100.0f, sliderWidth, "%.0f"); + int new_n_gpu_layers = static_cast(n_gpu_layers_float); + if (new_n_gpu_layers != n_gpu_layers) { + configManager.setGpuLayers(new_n_gpu_layers); + configManager.saveConfig(); // Auto-save on change + serverState.setModelParamsChanged(true); // Mark params as changed + } + } + + // use_mlock checkbox + renderCheckbox("Memory Lock", "##use_mlock", configManager.getUseMlock(), + [&configManager, &serverState](bool value) { + configManager.setUseMlock(value); + configManager.saveConfig(); + serverState.setModelParamsChanged(true); // Mark params as changed + }, + "Locks memory to prevent swapping to disk"); + + // use_mmap checkbox + renderCheckbox("Memory Map", "##use_mmap", configManager.getUseMmap(), + [&configManager, &serverState](bool value) { + configManager.setUseMmap(value); + configManager.saveConfig(); + serverState.setModelParamsChanged(true); // Mark params as changed + }, + "Use memory mapping for model weights"); + + // n_parallel input + ImGui::Spacing(); + int n_parallel = configManager.getParallelCount(); + IntInputField::render("##n_parallel", n_parallel, sliderWidth); + if (n_parallel != configManager.getParallelCount()) { + configManager.setParallelCount(n_parallel); + configManager.saveConfig(); + serverState.setModelParamsChanged(true); // Mark params as changed + } + + // cont_batching checkbox + renderCheckbox("Continuous Batching", "##cont_batching", configManager.getContinuousBatching(), + [&configManager, &serverState](bool value) { + configManager.setContinuousBatching(value); + configManager.saveConfig(); + serverState.setModelParamsChanged(true); // Mark params as changed + }, + "Enable continuous batching for better performance"); + + // warmup checkbox + renderCheckbox("Warmup", "##warmup", configManager.getWarmup(), + [&configManager, &serverState](bool value) { + configManager.setWarmup(value); + configManager.saveConfig(); + serverState.setModelParamsChanged(true); // Mark params as changed + }, + "Run model warmup at initialization"); + } + +private: + float& m_sidebarWidth; + LabelConfig m_contextSizeLabel; + LabelConfig m_gpuLayersLabel; + LabelConfig m_systemSettingsLabel; + LabelConfig m_optimizationLabel; + + LabelConfig createLabel(const std::string& text, const std::string& icon) { + LabelConfig label; + label.id = "##" + text + "_label"; + label.label = text; + label.icon = icon; + label.size = ImVec2(Config::Icon::DEFAULT_FONT_SIZE, 0); + label.fontType = FontsManager::BOLD; + return label; + } + + void renderCheckbox(const std::string& label, const std::string& id, bool value, std::function onChange, const std::string& tooltip = "") { + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 5.0f); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 8.0f); + + ButtonConfig btnConfig; + btnConfig.id = id; + btnConfig.icon = value ? ICON_CI_CHECK : ICON_CI_CLOSE; + btnConfig.textColor = value ? ImVec4(1, 1, 1, 1) : ImVec4(0.6f, 0.6f, 0.6f, 1.0f); + btnConfig.fontSize = FontsManager::SM; + btnConfig.size = ImVec2(24, 24); + btnConfig.backgroundColor = value ? Config::Color::PRIMARY : RGBAToImVec4(60, 60, 60, 255); + btnConfig.hoverColor = value ? RGBAToImVec4(53, 132, 228, 255) : RGBAToImVec4(80, 80, 80, 255); + btnConfig.activeColor = value ? RGBAToImVec4(26, 95, 180, 255) : RGBAToImVec4(100, 100, 100, 255); + btnConfig.onClick = [value, onChange]() { + onChange(!value); + }; + if (!tooltip.empty()) { + btnConfig.tooltip = tooltip; + } + Button::render(btnConfig); + + ImGui::SameLine(0.0f, 8.0f); + LabelConfig labelConfig; + labelConfig.id = id + "_label"; + labelConfig.label = label; + labelConfig.size = ImVec2(0, 0); + labelConfig.fontType = FontsManager::REGULAR; + labelConfig.fontSize = FontsManager::MD; + labelConfig.alignment = Alignment::LEFT; + + ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 8.0f); + Label::render(labelConfig); + + ImGui::Spacing(); + } +}; + +class ServerSettingsComponent { +public: + ServerSettingsComponent(float& sidebarWidth) + : m_sidebarWidth(sidebarWidth) + { + m_serverSettingsLabel = createLabel("Server Settings", ICON_CI_SERVER); + } + + void render() { + auto& serverState = ServerStateManager::getInstance(); + const float sliderWidth = m_sidebarWidth - 30; + + // Server status indicator + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 5.0f); + ImGui::TextUnformatted("Status:"); + ImGui::SameLine(); + + if (serverState.isServerRunning()) { + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 1.0f, 0.0f, 1.0f)); + ImGui::TextUnformatted("Running"); + } + else { + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1.0f, 0.5f, 0.0f, 1.0f)); + ImGui::TextUnformatted("Stopped"); + } + ImGui::PopStyleColor(); + ImGui::Spacing(); + ImGui::Separator(); + ImGui::Spacing(); + + int port = serverState.getServerPort(); + + // Make the port input read-only if server is running + ImGui::BeginDisabled(serverState.isServerRunning()); + + IntInputField::render("##server_port", port, sliderWidth); + if (port != serverState.getServerPort() && port > 0 && port <= 65535) { + serverState.setServerPort(port); + } + + ImGui::EndDisabled(); + + ImGui::Spacing(); + ImGui::Spacing(); + } + +private: + float& m_sidebarWidth; + LabelConfig m_serverSettingsLabel; + + LabelConfig createLabel(const std::string& text, const std::string& icon) { + LabelConfig label; + label.id = "##" + text + "_label"; + label.label = text; + label.icon = icon; + label.size = ImVec2(Config::Icon::DEFAULT_FONT_SIZE, 0); + label.fontType = FontsManager::BOLD; + return label; + } +}; + +class DeploymentSettingsSidebar { +public: + DeploymentSettingsSidebar() : + m_width(Config::DeploymentSettingsSidebar::SIDEBAR_WIDTH), + m_modelLoaderSettingsComponent(m_width), + m_serverSettingsComponent(m_width) { + } + + void render() { + ImGuiIO& io = ImGui::GetIO(); + const float sidebarHeight = io.DisplaySize.y - Config::TITLE_BAR_HEIGHT; + + // Right sidebar window + ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x - m_width, Config::TITLE_BAR_HEIGHT + 40), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(m_width, sidebarHeight), ImGuiCond_Always); + ImGui::SetNextWindowSizeConstraints( + ImVec2(Config::DeploymentSettingsSidebar::MIN_SIDEBAR_WIDTH, sidebarHeight), + ImVec2(Config::DeploymentSettingsSidebar::MAX_SIDEBAR_WIDTH, sidebarHeight) + ); + + ImGui::Begin("Deployment Settings", nullptr, DeploymentSettingsConstants::SidebarFlags); + + // Update the current sidebar width + m_width = ImGui::GetWindowSize().x; + + // Render scrollable content area + ImGui::BeginChild("##deployment_settings_content", ImVec2(0, 0), false, false); + + // Render server settings component first + m_serverSettingsComponent.render(); + + // Render model loader settings component + m_modelLoaderSettingsComponent.render(); + + ImGui::EndChild(); + + ImGui::End(); + } + + float getWidth() const { return m_width; } + +private: + float m_width = 0.0F; + ModelLoaderSettingsComponent m_modelLoaderSettingsComponent; + ServerSettingsComponent m_serverSettingsComponent; +}; \ No newline at end of file diff --git a/include/ui/server/server_logs.hpp b/include/ui/server/server_logs.hpp new file mode 100644 index 0000000..e9e7c22 --- /dev/null +++ b/include/ui/server/server_logs.hpp @@ -0,0 +1,269 @@ +#pragma once + +#include "imgui.h" +#include "ui/widgets.hpp" +#include "ui/chat/model_manager_modal.hpp" +#include "model/model_manager.hpp" +#include "model/server_state_manager.hpp" + +#include + +class ServerLogViewer { +public: + ServerLogViewer() { + m_logBuffer = "Server logs will be displayed here."; + m_lastLogUpdate = std::chrono::steady_clock::now(); + } + + ~ServerLogViewer() { + // Make sure to stop the server on destruction + if (ServerStateManager::getInstance().isServerRunning()) { + Model::ModelManager::getInstance().stopServer(); + } + } + + void render(const float sidebarWidth) { + ImGuiIO& io = ImGui::GetIO(); + Model::ModelManager& modelManager = Model::ModelManager::getInstance(); + ServerStateManager& serverState = ServerStateManager::getInstance(); + + ImGuiWindowFlags window_flags = ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | + ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoCollapse | + ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoBackground; + + ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0F); + ImGui::SetNextWindowPos(ImVec2(0, Config::TITLE_BAR_HEIGHT), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(io.DisplaySize.x - sidebarWidth, io.DisplaySize.y - Config::TITLE_BAR_HEIGHT), ImGuiCond_Always); + ImGui::Begin("Server Logs", nullptr, window_flags); + ImGui::PopStyleVar(); + + // Top bar with controls + { + // Start/Stop server button + ButtonConfig serverButtonConfig; + serverButtonConfig.id = "##server_toggle_button"; + + if (serverState.isServerRunning()) { + serverButtonConfig.label = "Stop Server"; + serverButtonConfig.icon = ICON_CI_DEBUG_STOP; + serverButtonConfig.tooltip = "Stop the server"; + } + else { + serverButtonConfig.label = "Start Server"; + serverButtonConfig.icon = ICON_CI_RUN; + serverButtonConfig.tooltip = "Start the server"; + } + + serverButtonConfig.size = ImVec2(150, 0); + serverButtonConfig.alignment = Alignment::CENTER; + serverButtonConfig.onClick = [this, &modelManager, &serverState]() { + toggleServer(modelManager, serverState); + }; + + // Model selection button + ButtonConfig selectModelButtonConfig; + selectModelButtonConfig.id = "##server_select_model_button"; + selectModelButtonConfig.label = + serverState.getCurrentModelName().value_or("Select Model"); + selectModelButtonConfig.tooltip = + serverState.getCurrentModelName().value_or("Select Model"); + selectModelButtonConfig.icon = ICON_CI_SPARKLE; + selectModelButtonConfig.size = ImVec2(180, 0); + selectModelButtonConfig.alignment = Alignment::CENTER; + selectModelButtonConfig.onClick = [this]() { + m_modelManagerModalOpen = true; + }; + + if (serverState.isModelLoadInProgress()) { + selectModelButtonConfig.label = "Loading Model..."; + serverButtonConfig.state = ButtonState::DISABLED; + } + + if (serverState.isModelLoaded()) { + selectModelButtonConfig.icon = ICON_CI_SPARKLE_FILLED; + } + else { + serverButtonConfig.state = ButtonState::DISABLED; // Can't start server without model + } + + std::vector buttonConfigs = { serverButtonConfig, selectModelButtonConfig }; + + // Add reload button if model params have changed + if (serverState.haveModelParamsChanged() && serverState.isModelLoaded()) { + ButtonConfig reloadModelButtonConfig; + reloadModelButtonConfig.id = "##reload_model_button"; + reloadModelButtonConfig.icon = ICON_CI_REFRESH; + reloadModelButtonConfig.tooltip = "Reload model with new parameters"; + reloadModelButtonConfig.size = ImVec2(24, 24); + reloadModelButtonConfig.alignment = Alignment::CENTER; + reloadModelButtonConfig.backgroundColor = ImVec4(0.2f, 0.2f, 0.2f, 1.0f); + reloadModelButtonConfig.onClick = [this, &modelManager, &serverState]() { + modelManager.switchModel( + modelManager.getCurrentModelName().value(), + modelManager.getCurrentVariantType() + ); + serverState.resetModelParamsChanged(); + }; + + // Disable the reload button if server is running or model is loading + if (serverState.isServerRunning() || serverState.isModelLoadInProgress()) { + reloadModelButtonConfig.state = ButtonState::DISABLED; + } + + buttonConfigs.push_back(reloadModelButtonConfig); + } + + Button::renderGroup(buttonConfigs, ImGui::GetCursorPosX(), ImGui::GetCursorPosY()); + + // Show API endpoint info if server is running + if (serverState.isServerRunning()) { + ImGui::SameLine(); + + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 40); + + ImGui::TextUnformatted("API Endpoint:"); + ImGui::SameLine(); + + std::string endpoint = "http://localhost:" + serverState.getServerPortString() + "/v1/chat/completions"; + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.4f, 0.8f, 1.0f, 1.0f)); + ImGui::TextUnformatted(endpoint.c_str()); + ImGui::PopStyleColor(); + + ImGui::SameLine(); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 2); + ButtonConfig copyButtonConfig; + copyButtonConfig.id = "##copy_endpoint_button"; + copyButtonConfig.icon = ICON_CI_COPY; + copyButtonConfig.tooltip = "Copy endpoint to clipboard"; + copyButtonConfig.size = ImVec2(24, 24); + copyButtonConfig.onClick = [endpoint]() { + ImGui::SetClipboardText(endpoint.c_str()); + }; + + Button::render(copyButtonConfig); + } + + m_modelManagerModal.render(m_modelManagerModalOpen); + } + + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 12); + + // Update log buffer from kolosal::Logger + updateLogBuffer(); + + // Log display area + { + InputFieldConfig input_cfg( + "##server_log_input", + ImVec2(-FLT_MIN, -FLT_MIN), + m_logBuffer, + m_isLogFocused + ); + + input_cfg.frameRounding = 4.0f; + input_cfg.flags = ImGuiInputTextFlags_ReadOnly; + input_cfg.backgroundColor = ImVec4(0.2f, 0.2f, 0.2f, 0.5f); + InputField::renderMultiline(input_cfg); + + // Auto-scroll to bottom + if (ImGui::GetScrollY() >= ImGui::GetScrollMaxY() - 20.0f) { + ImGui::SetScrollHereY(1.0f); + } + } + + ImGui::End(); + } + +private: + bool m_isLogFocused = false; + std::string m_logBuffer; + size_t m_lastLogIndex = 0; + std::chrono::steady_clock::time_point m_lastLogUpdate; + + ModelManagerModal m_modelManagerModal; + bool m_modelManagerModalOpen = false; + + void toggleServer(Model::ModelManager& modelManager, ServerStateManager& serverState) { + if (serverState.isServerRunning()) { + // Stop the server + modelManager.stopServer(); + serverState.setServerRunning(false); + } + else { + // Start the server + if (serverState.isModelLoaded()) { + if (modelManager.startServer(serverState.getServerPortString())) { + serverState.setServerRunning(true); + addToLogBuffer("Server started on port " + serverState.getServerPortString()); + } + else { + addToLogBuffer("Failed to start server on port " + serverState.getServerPortString()); + } + } + else { + addToLogBuffer("Error: Cannot start server without a loaded model"); + } + } + } + + void updateLogBuffer() { + // Check if it's time to update (limit updates to reduce performance impact) + auto now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - m_lastLogUpdate).count() < 100) { + return; + } + m_lastLogUpdate = now; + + // Get logs from the kolosal::Logger + const auto& logs = Logger::instance().getLogs(); + + // If there are new logs, add them to our buffer + if (logs.size() > m_lastLogIndex) { + for (size_t i = m_lastLogIndex; i < logs.size(); i++) { + const auto& entry = logs[i]; + std::string levelPrefix; + + switch (entry.level) { + case LogLevel::SERVER_ERROR: + levelPrefix = "[ERROR] "; + break; + case LogLevel::SERVER_WARNING: + levelPrefix = "[WARNING] "; + break; + case LogLevel::SERVER_INFO: + levelPrefix = "[INFO] "; + break; + case LogLevel::SERVER_DEBUG: + levelPrefix = "[DEBUG] "; + break; + default: + levelPrefix = "[LOG] "; + } + + addToLogBuffer(levelPrefix + entry.message); + } + + m_lastLogIndex = logs.size(); + } + } + + void addToLogBuffer(const std::string& message) { + // Add timestamp + auto now = std::chrono::system_clock::now(); + auto time_t = std::chrono::system_clock::to_time_t(now); + std::tm* tm = std::localtime(&time_t); + + char timestamp[32]; + std::strftime(timestamp, sizeof(timestamp), "[%H:%M:%S] ", tm); + + // Add to buffer with newline if not empty + if (!m_logBuffer.empty() && m_logBuffer != "Server logs will be displayed here.") { + m_logBuffer += "\n"; + } + else if (m_logBuffer == "Server logs will be displayed here.") { + m_logBuffer = ""; // Clear the initial message + } + + m_logBuffer += std::string(timestamp) + message; + } +}; \ No newline at end of file diff --git a/include/ui/tab_manager.hpp b/include/ui/tab_manager.hpp index 48f14a2..16f0bb2 100644 --- a/include/ui/tab_manager.hpp +++ b/include/ui/tab_manager.hpp @@ -1,11 +1,14 @@ #pragma once +#include "IconsCodicons.h" + #include "ui/chat/chat_history_sidebar.hpp" #include "ui/chat/preset_sidebar.hpp" #include "ui/chat/chat_window.hpp" +#include "ui/server/server_logs.hpp" +#include "ui/server/deployment_settings.hpp" #include "chat/chat_manager.hpp" - #include "model/model_manager.hpp" #include @@ -17,6 +20,8 @@ class ITab { virtual void render() = 0; virtual void onActivate() = 0; virtual void onDeactivate() = 0; + virtual const char* getTitle() const = 0; + virtual const char* getIcon() const = 0; }; // Update ChatTab to implement the new methods @@ -27,42 +32,8 @@ class ChatTab : public ITab { { } - void onActivate() override { - Model::ModelManager& modelManager = Model::ModelManager::getInstance(); - - modelManager.setStreamingCallback( - [&modelManager](const std::string& partialOutput, const float tps, const int jobId) { - auto& chatManager = Chat::ChatManager::getInstance(); - std::string chatName = chatManager.getChatNameByJobId(jobId); - - auto chatOpt = chatManager.getChat(chatName); - if (chatOpt) { - Chat::ChatHistory chat = chatOpt.value(); - if (!chat.messages.empty() && chat.messages.back().role == "assistant") { - // Append to existing assistant message - chat.messages.back().content = partialOutput; - chat.messages.back().tps = tps; - chatManager.updateChat(chatName, chat); - } - else { - // Create new assistant message - Chat::Message assistantMsg; - assistantMsg.id = static_cast(chat.messages.size()) + 1; - assistantMsg.role = "assistant"; - assistantMsg.content = partialOutput; - assistantMsg.tps = tps; - assistantMsg.modelName = modelManager.getCurrentModelName().value_or("idk") + " | " - + modelManager.getCurrentVariantType(); - chatManager.addMessage(chatName, assistantMsg); - } - } - } - ); - } - - void onDeactivate() override { - Model::ModelManager::getInstance().setStreamingCallback(nullptr); - } + void onActivate() override {} + void onDeactivate() override {} void render() override { chatHistorySidebar.render(); @@ -73,12 +44,43 @@ class ChatTab : public ITab { ); } + // Return a title for the Chat tab + const char* getTitle() const override { return "Chat"; } + + // Return the icon for the Chat tab + const char* getIcon() const override { return ICON_CI_COMMENT_DISCUSSION; } + private: ChatHistorySidebar chatHistorySidebar; ModelPresetSidebar modelPresetSidebar; ChatWindow chatWindow; }; +class ServerTab : public ITab { +public: + ServerTab() : serverLogViewer(), deploymentSettingsSidebar() + { + } + + void onActivate() override {} + void onDeactivate() override {} + + void render() override { + deploymentSettingsSidebar.render(); + serverLogViewer.render(deploymentSettingsSidebar.getWidth()); + } + + // Return a title for the Chat tab + const char* getTitle() const override { return "Server"; } + + // Return the icon for the Chat tab + const char* getIcon() const override { return ICON_CI_SERVER_PROCESS; } + +private: + ServerLogViewer serverLogViewer; + DeploymentSettingsSidebar deploymentSettingsSidebar; +}; + // Update TabManager to handle tab activation/deactivation class TabManager { public: @@ -110,6 +112,10 @@ class TabManager { } } + ITab* getTab(size_t index) const { return tabs.at(index).get(); } + const size_t getTabCount() const { return tabs.size(); } + const size_t getCurrentActiveTabIndex() const { return activeTabIndex; }; + private: std::vector> tabs; size_t activeTabIndex; diff --git a/include/ui/title_bar.hpp b/include/ui/title_bar.hpp index 2e4e2d9..84a82b2 100644 --- a/include/ui/title_bar.hpp +++ b/include/ui/title_bar.hpp @@ -7,6 +7,9 @@ #include "stb_image.h" #include "resource.h" +#include "tab_manager.hpp" +#include "widgets.hpp" + GLuint LoadTextureFromFile(const char* filename) { int width, height, channels; @@ -38,14 +41,14 @@ GLuint LoadTextureFromFile(const char* filename) return texture; } -void titleBar(void* handler) +void titleBar(void* handler, TabManager& tabManager) { #ifdef _WIN32 - // Cast the HWND - HWND hwnd = static_cast(handler); + // Cast the HWND + HWND hwnd = static_cast(handler); #else - // Cast the XID - XID xid = static_cast(handler); + // Cast the XID + XID xid = static_cast(handler); #endif ImGuiIO& io = ImGui::GetIO(); @@ -83,6 +86,56 @@ void titleBar(void* handler) } } + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 16.0f); + + // Render a button for each available tab + { + std::vector buttonConfigs; + + for (size_t i = 0; i < tabManager.getTabCount(); ++i) + { + ButtonConfig tabButtonConfig; + tabButtonConfig.id = "##" + (std::string)tabManager.getTab(i)->getTitle(); + tabButtonConfig.icon = tabManager.getTab(i)->getIcon(); + tabButtonConfig.size = ImVec2(24, 0); + tabButtonConfig.onClick = [i, &tabManager]() { tabManager.switchTab(i); }; + tabButtonConfig.tooltip = tabManager.getTab(i)->getTitle(); + if (tabManager.getCurrentActiveTabIndex() == i) + { + tabButtonConfig.state = ButtonState::ACTIVE; + } + else + { + tabButtonConfig.textColor = ImVec4(0.7f, 0.7f, 0.7f, 0.7f); + } + + buttonConfigs.push_back(tabButtonConfig); + } + + // Calculate background dimensions + float buttonHeight = 16.0f; + float totalWidth = buttonConfigs.size() * 24.0f + (buttonConfigs.size() - 2) * 10.0f + 6.0f; + float padding = 6.0f; + + // Calculate background position and size + ImVec2 pos = ImVec2(ImGui::GetCursorPosX(), ImGui::GetCursorPosY()); + ImVec2 size = ImVec2(totalWidth + padding * 2, buttonHeight + padding * 2); + + // Draw the background + ImDrawList* drawList = ImGui::GetWindowDrawList(); + drawList->AddRectFilled( + ImVec2(pos.x - padding, pos.y - padding), + ImVec2(pos.x + size.x, pos.y + size.y), + ImGui::ColorConvertFloat4ToU32(ImVec4(0.3f, 0.3f, 0.3f, 0.3f)), + 8.0f + ); + + // Render the buttons + Button::renderGroup(buttonConfigs, pos.x, pos.y); + + ImGui::SameLine(); + } + // Title Bar Buttons { float buttonWidth = 45.0f; // Adjust as needed diff --git a/include/ui/widgets.hpp b/include/ui/widgets.hpp index ef36447..c2fc39e 100644 --- a/include/ui/widgets.hpp +++ b/include/ui/widgets.hpp @@ -765,8 +765,6 @@ namespace Slider renderLabel.erase(std::remove(renderLabel.begin(), renderLabel.end(), '#'), renderLabel.end()); std::replace(renderLabel.begin(), renderLabel.end(), '_', ' '); - // Apply horizontal padding and render label - ImGui::SetCursorPosX(ImGui::GetCursorPosX() + paddingX); LabelConfig labelConfig; labelConfig.id = label; labelConfig.label = renderLabel; @@ -861,8 +859,6 @@ namespace IntInputField renderLabel.erase(std::remove(renderLabel.begin(), renderLabel.end(), '#'), renderLabel.end()); std::replace(renderLabel.begin(), renderLabel.end(), '_', ' '); - // Apply horizontal padding and render label - ImGui::SetCursorPosX(ImGui::GetCursorPosX() + paddingX); LabelConfig labelConfig; labelConfig.id = label; labelConfig.label = renderLabel; diff --git a/include/window/win32_window.hpp b/include/window/win32_window.hpp index 5f566f3..f42b600 100644 --- a/include/window/win32_window.hpp +++ b/include/window/win32_window.hpp @@ -39,11 +39,12 @@ class Win32Window : public Window { } } - void createWindow(int width, int height, const std::string& title) override + void createWindow(int width, int height, const std::string& title, const float tabButtonWidths) override { this->width = width; this->height = height; this->title = title; + this->tabButtonWidths = tabButtonWidths; hwnd = create_window(&Win32Window::WndProc, hInstance, this); if (!hwnd) { @@ -169,6 +170,7 @@ class Win32Window : public Window { int height; std::string title; bool should_close; + float tabButtonWidths; // Borderless window specific bool borderless; @@ -311,7 +313,8 @@ class Win32Window : public Window { } if ((cursor.y >= window.top && cursor.y < window.top + Config::TITLE_BAR_HEIGHT) && - (cursor.x <= window.right - 45 * 3)) { + ((cursor.x <= window.right - 45 * 3 && cursor.x >= window.left + /* logo width */ 40 + /* gap between logo and tab buttons */ 16 + this->tabButtonWidths) || + cursor.x <= window.left + /* logo width */ 40 + /* gap between logo and tab buttons */ 16)) { return HTCAPTION; } diff --git a/include/window/window.hpp b/include/window/window.hpp index f34cfac..5147db0 100644 --- a/include/window/window.hpp +++ b/include/window/window.hpp @@ -5,7 +5,7 @@ class Window { public: virtual ~Window() = default; - virtual void createWindow(int width, int height, const std::string& title) = 0; + virtual void createWindow(int width, int height, const std::string& title, const float tabButtonWidths) = 0; virtual void show() = 0; virtual void processEvents() = 0; virtual bool shouldClose() = 0; diff --git a/installer/script.nsi b/installer/script.nsi index 5aa2ea5..128f600 100644 --- a/installer/script.nsi +++ b/installer/script.nsi @@ -18,13 +18,13 @@ Var DefaultChatDir ;----------------------------------- ; Embed version info (metadata) ;----------------------------------- -VIProductVersion "0.1.1.0" +VIProductVersion "0.1.6.0" VIAddVersionKey "ProductName" "Kolosal AI Installer" VIAddVersionKey "CompanyName" "Genta Technology" VIAddVersionKey "FileDescription" "Kolosal AI Installer" VIAddVersionKey "LegalCopyright" "Copyright (C) 2025" -VIAddVersionKey "FileVersion" "0.1.1.0" -VIAddVersionKey "ProductVersion" "0.1.1.0" +VIAddVersionKey "FileVersion" "0.1.6.0" +VIAddVersionKey "ProductVersion" "0.1.6.0" VIAddVersionKey "OriginalFilename" "KolosalAI_Installer.exe" VIAddVersionKey "Comments" "Installer for Kolosal AI" VIAddVersionKey "Publisher" "Genta Technology" @@ -105,6 +105,9 @@ FunctionEnd ; Installation Section ;----------------------------------- Section "Kolosal AI" SecKolosalAI + ; Force overwrite of existing files so that EXE and DLL files are always replaced + SetOverwrite on + SetOutPath "$INSTDIR" ; Set write permissions diff --git a/kolosal-server b/kolosal-server new file mode 160000 index 0000000..ea06fc2 --- /dev/null +++ b/kolosal-server @@ -0,0 +1 @@ +Subproject commit ea06fc2ad047fc0143e7b0f24f6e46398398a0b6 diff --git a/models/qwen2.5-0.5b.json b/models/qwen2.5-0.5b.json index 45632b2..07feabb 100644 --- a/models/qwen2.5-0.5b.json +++ b/models/qwen2.5-0.5b.json @@ -1,9 +1,9 @@ { - "name": "Qwen 2.5 0.5B", + "name": "Qwen2.5 0.5B", "author": "Alibaba", "fullPrecision": { "type": "Full Precision", - "path": "models/qwen2.5-0.5b/int4/Qwen2.5-0.5B-Instruct-f16.gguf", + "path": "models/qwen2.5-0.5b/fp16/Qwen2.5-0.5B-Instruct-f16.gguf", "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-f16.gguf", "isDownloaded": false, "downloadProgress": 0.0, @@ -19,7 +19,7 @@ }, "quantized4Bit": { "type": "4-bit Quantized", - "path": "models/qwen2.5-0.5b/fp16/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", + "path": "models/qwen2.5-0.5b/int4/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", "isDownloaded": false, "downloadProgress": 0.0, diff --git a/models/qwen2.5-1.5b.json b/models/qwen2.5-1.5b.json index d798701..f9fc696 100644 --- a/models/qwen2.5-1.5b.json +++ b/models/qwen2.5-1.5b.json @@ -1,5 +1,5 @@ { - "name": "Qwen 2.5 1.5B", + "name": "Qwen2.5 1.5B", "author": "Alibaba", "fullPrecision": { "type": "Full Precision", diff --git a/models/qwen2.5-14b.json b/models/qwen2.5-14b.json index 31becb2..777490b 100644 --- a/models/qwen2.5-14b.json +++ b/models/qwen2.5-14b.json @@ -1,5 +1,5 @@ { - "name": "Qwen 2.5 14B", + "name": "Qwen2.5 14B", "author": "Alibaba", "fullPrecision": { "type": "Full Precision", diff --git a/models/qwen2.5-3b.json b/models/qwen2.5-3b.json index cccadf7..095c6dc 100644 --- a/models/qwen2.5-3b.json +++ b/models/qwen2.5-3b.json @@ -1,5 +1,5 @@ { - "name": "Qwen 2.5 3B", + "name": "Qwen2.5 3B", "author": "Alibaba", "fullPrecision": { "type": "Full Precision", diff --git a/models/qwen2.5-7b.json b/models/qwen2.5-7b.json index a5e87ca..90eaf06 100644 --- a/models/qwen2.5-7b.json +++ b/models/qwen2.5-7b.json @@ -1,5 +1,5 @@ { - "name": "Qwen 2.5 7B", + "name": "Qwen2.5 7B", "author": "Alibaba", "fullPrecision": { "type": "Full Precision", diff --git a/server-test/python/openai_test.py b/server-test/python/openai_test.py new file mode 100644 index 0000000..879c4eb --- /dev/null +++ b/server-test/python/openai_test.py @@ -0,0 +1,32 @@ +import openai +import os + +# Configure the client to use your local endpoint +client = openai.OpenAI( + base_url="http://localhost:8080/v1", + api_key="sk-dummy" # Using dummy API key as in the curl example +) + +print("Starting streaming request...\n") + +# Make a streaming request +stream = client.chat.completions.create( + model="claude-3-opus-20240229", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Why anything to the power of zero is 1?"} + ], + stream=True +) + +# Process the streaming response +print("Streaming response:") +full_response = "" +for chunk in stream: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + full_response += content + print(content, end="", flush=True) + +print("\n\nFull response:", full_response) + diff --git a/server-test/python/requirements.txt b/server-test/python/requirements.txt new file mode 100644 index 0000000..ec838c5 --- /dev/null +++ b/server-test/python/requirements.txt @@ -0,0 +1 @@ +openai diff --git a/source/main.cpp b/source/main.cpp index 4d06824..82a64cc 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -14,6 +14,7 @@ #include "chat/chat_manager.hpp" #include "model/preset_manager.hpp" #include "model/model_manager.hpp" +#include "model/model_loader_config_manager.hpp" #include "nfd.h" @@ -156,9 +157,15 @@ class Application public: Application() { + // Initialize the TabManager and add the ChatTab (other tabs can be added similarly) + tabManager = std::make_unique(); + tabManager->addTab(std::make_unique()); + tabManager->addTab(std::make_unique()); + // Create and show the window window = WindowFactory::createWindow(); - window->createWindow(Config::WINDOW_WIDTH, Config::WINDOW_HEIGHT, Config::WINDOW_TITLE); + window->createWindow(Config::WINDOW_WIDTH, Config::WINDOW_HEIGHT, Config::WINDOW_TITLE, + tabManager->getTabCount() * 24.0f + (tabManager->getTabCount() - 2) * 10.0f + 6.0f + 12.0f); window->show(); // Create and initialize the OpenGL context @@ -175,6 +182,7 @@ class Application Chat::initializeChatManager(); Model::initializePresetManager(); Model::initializeModelManager(); + Model::initializeModelLoaderConfigManager("model_loader_config.json"); // Initialize Native File Dialog NFD_Init(); @@ -188,10 +196,6 @@ class Application // Create the window state transition manager transitionManager = std::make_unique(*window); - - // Initialize the TabManager and add the ChatTab (other tabs can be added similarly) - tabManager = std::make_unique(); - tabManager->addTab(std::make_unique()); } int run() @@ -208,7 +212,7 @@ class Application StartNewFrame(); // Render the custom title bar - titleBar(window->getNativeHandle()); + titleBar(window->getNativeHandle(), *tabManager); // Render the currently active tab (chat tab in this example) tabManager->renderCurrentTab();