diff --git a/external/genta-personal/bin/InferenceEngineLib.dll b/external/genta-personal/bin/InferenceEngineLib.dll index 52f1299..0ae0518 100644 Binary files a/external/genta-personal/bin/InferenceEngineLib.dll and b/external/genta-personal/bin/InferenceEngineLib.dll differ diff --git a/external/genta-personal/bin/InferenceEngineLibVulkan.dll b/external/genta-personal/bin/InferenceEngineLibVulkan.dll index a0ad996..d8e404b 100644 Binary files a/external/genta-personal/bin/InferenceEngineLibVulkan.dll and b/external/genta-personal/bin/InferenceEngineLibVulkan.dll differ diff --git a/external/genta-personal/include/types.h b/external/genta-personal/include/types.h index 8654870..6771f0a 100644 --- a/external/genta-personal/include/types.h +++ b/external/genta-personal/include/types.h @@ -69,6 +69,7 @@ struct LoadingParameters bool warmup = false; int n_parallel = 1; int n_gpu_layers = 100; + int n_batch = 4096; }; #endif // TYPES_H \ No newline at end of file diff --git a/external/genta-personal/lib/InferenceEngineLib.lib b/external/genta-personal/lib/InferenceEngineLib.lib index 70a0b7c..62cc1c4 100644 Binary files a/external/genta-personal/lib/InferenceEngineLib.lib and b/external/genta-personal/lib/InferenceEngineLib.lib differ diff --git a/external/genta-personal/lib/InferenceEngineLibVulkan.lib b/external/genta-personal/lib/InferenceEngineLibVulkan.lib index 30ad435..d90855e 100644 Binary files a/external/genta-personal/lib/InferenceEngineLibVulkan.lib and b/external/genta-personal/lib/InferenceEngineLibVulkan.lib differ diff --git a/include/chat/chat_manager.hpp b/include/chat/chat_manager.hpp index a16b5d3..27b315e 100644 --- a/include/chat/chat_manager.hpp +++ b/include/chat/chat_manager.hpp @@ -92,53 +92,57 @@ namespace Chat std::future renameCurrentChat(const std::string& newName) { return std::async(std::launch::async, [this, newName]() { - if (!validateChatName(newName)) + if (!validateChatName(newName)) { - std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl; + std::cerr << "[ChatManager] [ERROR] " << newName << " is not valid" << std::endl; return false; } std::unique_lock lock(m_mutex); - if (!m_currentChatName) + if (!m_currentChatName) { - std::cerr << "[ChatManager] No current chat selected.\n"; + std::cerr << "[ChatManager] No current chat selected.\n"; return false; } - if (m_chatNameToIndex.find(newName) != m_chatNameToIndex.end()) + // Generate a unique name if the requested name already exists + std::string uniqueName = newName; + int counter = 1; + + while (m_chatNameToIndex.find(uniqueName) != m_chatNameToIndex.end()) { - std::cerr << "[ChatManager] Chat with name " << newName << " already exists.\n"; - return false; + uniqueName = newName + " (" + std::to_string(counter) + ")"; + counter++; } size_t currentIdx = m_currentChatIndex; - if (currentIdx >= m_chats.size()) + if (currentIdx >= m_chats.size()) { - std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl; + std::cerr << "[ChatManager] Invalid chat index: " << currentIdx << std::endl; return false; } std::string oldName = m_chats[currentIdx].name; - m_chats[currentIdx].name = newName; + m_chats[currentIdx].name = uniqueName; m_chats[currentIdx].lastModified = static_cast(std::time(nullptr)); - + // Update indices m_chatNameToIndex.erase(oldName); - m_chatNameToIndex[newName] = currentIdx; - m_currentChatName = newName; + m_chatNameToIndex[uniqueName] = currentIdx; + m_currentChatName = uniqueName; // Save changes auto chat = m_chats[currentIdx]; auto saveResult = m_persistence->saveChat(chat).get(); - if (saveResult) + if (saveResult) { m_persistence->deleteChat(oldName).get(); - m_persistence->renameKvChat(oldName, newName).get(); + m_persistence->renameKvChat(oldName, uniqueName).get(); } return saveResult; - }); + }); } std::future clearCurrentChat() @@ -649,9 +653,7 @@ namespace Chat // Validation helpers static bool validateChatName(const std::string& name) { - if (name.empty() || name.length() > 256) return false; - const std::string invalidChars = R"(<>:"/\|?*)"; - return name.find_first_of(invalidChars) == std::string::npos; + return !(name.empty() || name.length() > 256); } void updateChatTimestamp(size_t chatIndex, int newTimestamp) diff --git a/include/chat/chat_persistence.hpp b/include/chat/chat_persistence.hpp index b6c88ed..c08e867 100644 --- a/include/chat/chat_persistence.hpp +++ b/include/chat/chat_persistence.hpp @@ -183,12 +183,22 @@ namespace Chat std::filesystem::path getChatPath(const std::string& chatName) const override { + // remove characters that are not allowed in file names + std::string chatNameFiltered = chatName; + std::replace_if(chatNameFiltered.begin(), chatNameFiltered.end(), + [](char c) { return !std::isalnum(c); }, '_'); + return std::filesystem::absolute( std::filesystem::path(m_basePath) / (chatName + ".chat")); } std::filesystem::path getKvChatPath(const std::string& chatName) const override { + // remove characters that are not allowed in file names + std::string chatNameFiltered = chatName; + std::replace_if(chatNameFiltered.begin(), chatNameFiltered.end(), + [](char c) { return !std::isalnum(c); }, '_'); + return std::filesystem::absolute( std::filesystem::path(m_basePath) / (chatName + ".bin")); } diff --git a/include/common.hpp b/include/common.hpp index fcfa927..bc1acf5 100644 --- a/include/common.hpp +++ b/include/common.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/include/config.hpp b/include/config.hpp index 27fef43..9686f1b 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -66,7 +66,8 @@ namespace Config namespace InputField { - constexpr size_t TEXT_SIZE = 81920; + // max 64kb of text + constexpr size_t TEXT_SIZE = 64 * 1024; constexpr float CHILD_ROUNDING = 10.0F; constexpr float FRAME_ROUNDING = 12.0F; diff --git a/include/model/model.hpp b/include/model/model.hpp index 1915009..b8d7fd9 100644 --- a/include/model/model.hpp +++ b/include/model/model.hpp @@ -3,12 +3,14 @@ #include #include #include +#include +#include using json = nlohmann::json; namespace Model { - // In model.hpp or the appropriate header: + // ModelVariant structure remains mostly the same struct ModelVariant { std::string type; std::string path; @@ -48,7 +50,7 @@ namespace Model } }; - inline void to_json(nlohmann::json &j, const ModelVariant &v) + inline void to_json(nlohmann::json& j, const ModelVariant& v) { j = nlohmann::json{ {"type", v.type}, @@ -56,10 +58,10 @@ namespace Model {"downloadLink", v.downloadLink}, {"isDownloaded", v.isDownloaded}, {"downloadProgress", v.downloadProgress}, - {"lastSelected", v.lastSelected}}; + {"lastSelected", v.lastSelected} }; } - inline void from_json(const nlohmann::json &j, ModelVariant &v) + inline void from_json(const nlohmann::json& j, ModelVariant& v) { j.at("type").get_to(v.type); j.at("path").get_to(v.path); @@ -69,42 +71,54 @@ namespace Model j.at("lastSelected").get_to(v.lastSelected); } + // Refactored ModelData to use a map of variants struct ModelData { std::string name; std::string author; - ModelVariant fullPrecision; - ModelVariant quantized8Bit; - ModelVariant quantized4Bit; - - ModelData(const std::string &name = "", - const std::string& author = "", - const ModelVariant &fullPrecision = ModelVariant(), - const ModelVariant &quantized8Bit = ModelVariant(), - const ModelVariant &quantized4Bit = ModelVariant()) - : name(name) - , author(author) - , fullPrecision(fullPrecision) - , quantized8Bit(quantized8Bit) - , quantized4Bit(quantized4Bit) {} + std::map variants; + + // Constructor with no variants + ModelData(const std::string& name = "", const std::string& author = "") + : name(name), author(author) { + } + + // Add a variant to the model + void addVariant(const std::string& variantType, const ModelVariant& variant) { + variants[variantType] = variant; + } + + // Check if a variant exists + bool hasVariant(const std::string& variantType) const { + return variants.find(variantType) != variants.end(); + } + + // Get a variant (const version) + const ModelVariant* getVariant(const std::string& variantType) const { + auto it = variants.find(variantType); + return (it != variants.end()) ? &(it->second) : nullptr; + } + + // Get a variant (non-const version) + ModelVariant* getVariant(const std::string& variantType) { + auto it = variants.find(variantType); + return (it != variants.end()) ? &(it->second) : nullptr; + } }; - inline void to_json(nlohmann::json &j, const ModelData &m) + inline void to_json(nlohmann::json& j, const ModelData& m) { j = nlohmann::json{ {"name", m.name}, - {"author", m.author}, - {"fullPrecision", m.fullPrecision}, - {"quantized8Bit", m.quantized8Bit}, - {"quantized4Bit", m.quantized4Bit}}; + {"author", m.author}, + {"variants", m.variants} + }; } - inline void from_json(const nlohmann::json &j, ModelData &m) + inline void from_json(const nlohmann::json& j, ModelData& m) { j.at("name").get_to(m.name); - j.at("author").get_to(m.author); - j.at("fullPrecision").get_to(m.fullPrecision); - j.at("quantized8Bit").get_to(m.quantized8Bit); - j.at("quantized4Bit").get_to(m.quantized4Bit); + j.at("author").get_to(m.author); + j.at("variants").get_to(m.variants); } } // namespace Model \ No newline at end of file diff --git a/include/model/model_loader_config_manager.hpp b/include/model/model_loader_config_manager.hpp index b4c153c..d7678b0 100644 --- a/include/model/model_loader_config_manager.hpp +++ b/include/model/model_loader_config_manager.hpp @@ -78,6 +78,7 @@ namespace Model bool getContinuousBatching() const { return config_.cont_batching; } bool getWarmup() const { return config_.warmup; } int getParallelCount() const { return config_.n_parallel; } + int getBatchSize() const { return config_.n_batch; } int getGpuLayers() const { return config_.n_gpu_layers; } // Setters @@ -88,6 +89,7 @@ namespace Model void setContinuousBatching(bool enable) { config_.cont_batching = enable; } void setWarmup(bool enable) { config_.warmup = enable; } void setParallelCount(int count) { config_.n_parallel = count; } + void setBatchSize(int size) { config_.n_batch = size; } void setGpuLayers(int layers) { config_.n_gpu_layers = layers; } private: diff --git a/include/model/model_manager.hpp b/include/model/model_manager.hpp index 73154ef..7605658 100644 --- a/include/model/model_manager.hpp +++ b/include/model/model_manager.hpp @@ -31,13 +31,14 @@ typedef void (DestroyInferenceEngineFunc)(IInferenceEngine*); namespace Model { + static std::atomic seqCounter; class ModelManager { public: - static ModelManager &getInstance() + static ModelManager &getInstance(const bool async = true) { - static ModelManager instance(std::make_unique("models")); + static ModelManager instance(std::make_unique("models"), async); return instance; } @@ -247,6 +248,16 @@ namespace Model return variant ? variant->downloadProgress : 0.0; } + bool isAnyVariantDownloaded(int modelIndex) const { + const ModelData& model = m_models[modelIndex]; + for (const auto& [variant, _] : model.variants) { + if (isModelDownloaded(modelIndex, variant)) { + return true; + } + } + return false; + } + //-------------------------------------------------------------------------------------------- // Inference Engine //-------------------------------------------------------------------------------------------- @@ -277,6 +288,11 @@ namespace Model params.topP = request.top_p; params.streaming = request.stream; + // set seqId to be the current timestamp + auto now = std::chrono::system_clock::now(); + auto timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + params.seqId = static_cast(timestamp * 1000 + seqCounter++); + return params; } @@ -1060,11 +1076,11 @@ namespace Model if (modelIndex == m_currentModelIndex && variantType == m_currentVariantType) { - unloadModel(); + unloadModel(); } - // Call the persistence layer to delete the file. - auto fut = m_persistence->deleteModelVariant(m_models[modelIndex], *variant); + // Call the persistence layer to delete the file - passing the variant type instead of the variant + auto fut = m_persistence->deleteModelVariant(m_models[modelIndex], variantType); fut.get(); // Wait for deletion to complete. return true; } @@ -1107,7 +1123,7 @@ namespace Model } private: - explicit ModelManager(std::unique_ptr persistence) + explicit ModelManager(std::unique_ptr persistence, const bool async = true) : m_persistence(std::move(persistence)) , m_currentModelName(std::nullopt) , m_currentModelIndex(0) @@ -1117,7 +1133,27 @@ namespace Model , m_modelLoaded(false) , m_modelGenerationInProgress(false) { - startAsyncInitialization(); + if (async) + { + startAsyncInitialization(); + return; + } + + // Load inference engine backend and models synchronously + loadModels(); + bool useVulkan = useVulkanBackend(); + std::string backendName = "InferenceEngineLib.dll"; + if (useVulkan) + { + backendName = "InferenceEngineLibVulkan.dll"; + } + + if (!loadInferenceEngineDynamically(backendName.c_str())) + { + std::cerr << "[ModelManager] Failed to load inference engine for backend: " + << backendName << std::endl; + return; + } } ~ModelManager() @@ -1208,7 +1244,6 @@ namespace Model auto loadedModels = m_persistence->loadAllModels().get(); // Merge any duplicate models by name. - // (If duplicate files exist, we merge by choosing the variant with the higher lastSelected value.) std::unordered_map mergedModels; for (auto& model : loadedModels) { @@ -1219,49 +1254,53 @@ namespace Model } else { - // For each variant, update if the new one was used more recently. - if (model.fullPrecision.lastSelected > it->second.fullPrecision.lastSelected) - it->second.fullPrecision = model.fullPrecision; - if (model.quantized8Bit.lastSelected > it->second.quantized8Bit.lastSelected) - it->second.quantized8Bit = model.quantized8Bit; - if (model.quantized4Bit.lastSelected > it->second.quantized4Bit.lastSelected) - it->second.quantized4Bit = model.quantized4Bit; + // Merge variants based on last selected time + for (auto& [type, variant] : model.variants) + { + auto existingIt = it->second.variants.find(type); + if (existingIt == it->second.variants.end() || + variant.lastSelected > existingIt->second.lastSelected) + { + it->second.variants[type] = variant; + } + } } } // Rebuild the models vector. std::vector models; + models.reserve(mergedModels.size()); for (auto& pair : mergedModels) { models.push_back(pair.second); } - // Check and fix each variant�s download status. + // Check and fix each variant's download status. for (auto& model : models) { - checkAndFixDownloadStatus(model.fullPrecision); - checkAndFixDownloadStatus(model.quantized8Bit); - checkAndFixDownloadStatus(model.quantized4Bit); + for (auto& [type, variant] : model.variants) + { + checkAndFixDownloadStatus(variant); + } } - // Update internal state (including the variant map) under lock. + // Update internal state under lock. { std::unique_lock lock(m_mutex); m_models = std::move(models); m_modelNameToIndex.clear(); m_modelVariantMap.clear(); - // For each model, choose the "best" variant based on lastSelected, - // but prioritize the downloaded state and the desired order: - // first check 8-bit Quantized, then 4-bit Quantized, and lastly Full Precision. + // For each model, choose the "best" variant based on lastSelected and downloaded state for (size_t i = 0; i < m_models.size(); ++i) { m_modelNameToIndex[m_models[i].name] = i; int bestEffectiveValue = -1; std::string bestVariant; - // Helper lambda to calculate effective value. - auto checkVariant = [&](const ModelVariant& variant) { + // Check each variant + for (const auto& [type, variant] : m_models[i].variants) + { int effectiveValue = variant.lastSelected; if (variant.isDownloaded) { @@ -1270,21 +1309,20 @@ namespace Model if (effectiveValue > bestEffectiveValue) { bestEffectiveValue = effectiveValue; - bestVariant = variant.type; + bestVariant = type; } - }; + } - // Check in the desired order: 8-bit, then 4-bit, then full precision. - checkVariant(m_models[i].quantized8Bit); - checkVariant(m_models[i].quantized4Bit); - checkVariant(m_models[i].fullPrecision); + // If no variant was ever selected, default to first variant or empty + if (bestVariant.empty() && !m_models[i].variants.empty()) + { + bestVariant = m_models[i].variants.begin()->first; + } - // If no variant was ever selected, default to "8-bit Quantized". - if (bestVariant.empty()) + if (!bestVariant.empty()) { - bestVariant = "8-bit Quantized"; + m_modelVariantMap[m_models[i].name] = bestVariant; } - m_modelVariantMap[m_models[i].name] = bestVariant; } } @@ -1295,18 +1333,18 @@ namespace Model for (size_t i = 0; i < m_models.size(); ++i) { const auto& model = m_models[i]; - // Order is arbitrary; adjust as needed. - const ModelVariant* variants[] = { &model.quantized8Bit, &model.quantized4Bit, &model.fullPrecision }; - for (const ModelVariant* variant : variants) + + for (const auto& [type, variant] : model.variants) { - if (variant->isDownloaded && variant->lastSelected > maxLastSelected) + if (variant.isDownloaded && variant.lastSelected > maxLastSelected) { - maxLastSelected = variant->lastSelected; + maxLastSelected = variant.lastSelected; selectedModelIndex = i; - selectedVariantType = variant->type; + selectedVariantType = type; } } } + { std::unique_lock lock(m_mutex); if (maxLastSelected >= 0) @@ -1347,31 +1385,33 @@ namespace Model } } - ModelVariant *getVariantLocked(size_t modelIndex, const std::string &variantType) const + ModelVariant* getVariantLocked(size_t modelIndex, const std::string& variantType) { if (modelIndex >= m_models.size()) return nullptr; - const auto &model = m_models[modelIndex]; - if (variantType == "Full Precision") - { - return const_cast(&model.fullPrecision); + auto& model = m_models[modelIndex]; + auto it = model.variants.find(variantType); + if (it != model.variants.end()) { + return &it->second; } - else if (variantType == "8-bit Quantized") - { - return const_cast(&model.quantized8Bit); - } - else if (variantType == "4-bit Quantized") - { - return const_cast(&model.quantized4Bit); + return nullptr; + } + + const ModelVariant* getVariantLocked(size_t modelIndex, const std::string& variantType) const + { + if (modelIndex >= m_models.size()) + return nullptr; + + const auto& model = m_models[modelIndex]; + auto it = model.variants.find(variantType); + if (it != model.variants.end()) { + return &it->second; } - else - { - return nullptr; - } + return nullptr; } - void startDownloadAsyncLocked(size_t modelIndex, const std::string &variantType) + void startDownloadAsyncLocked(size_t modelIndex, const std::string& variantType) { if (modelIndex >= m_models.size()) return; @@ -1380,12 +1420,10 @@ namespace Model if (!variant) return; - ModelData* model = &m_models[modelIndex]; - variant->downloadProgress = 0.01f; // 0% looks like no progress - // Begin the asynchronous download. - auto downloadFuture = m_persistence->downloadModelVariant(*model, *variant); + // Begin the asynchronous download - passing the variant type rather than the variant itself + auto downloadFuture = m_persistence->downloadModelVariant(m_models[modelIndex], variantType); // Chain a continuation that waits for the download to complete. m_downloadFutures.emplace_back(std::async(std::launch::async, @@ -1654,8 +1692,10 @@ namespace Model return false; } +#ifdef DEBUG std::cout << "[ModelManager] Successfully loaded inference engine from: " << backendName << std::endl; +#endif m_inferenceEngine = m_createInferenceEnginePtr(); if (!m_inferenceEngine) { @@ -1772,16 +1812,11 @@ namespace Model void cancelAllDownloads() { std::unique_lock lock(m_mutex); for (auto& model : m_models) { - // For each variant, if it�s still in progress (i.e. download progress is between 0 and 100) - // set the cancel flag. - if (model.fullPrecision.downloadProgress > 0.0 && model.fullPrecision.downloadProgress < 100.0) { - model.fullPrecision.cancelDownload = true; - } - if (model.quantized8Bit.downloadProgress > 0.0 && model.quantized8Bit.downloadProgress < 100.0) { - model.quantized8Bit.cancelDownload = true; - } - if (model.quantized4Bit.downloadProgress > 0.0 && model.quantized4Bit.downloadProgress < 100.0) { - model.quantized4Bit.cancelDownload = true; + for (auto& [type, variant] : model.variants) { + // If download is in progress (between 0 and 100), set cancel flag + if (variant.downloadProgress > 0.0 && variant.downloadProgress < 100.0) { + variant.cancelDownload = true; + } } } } @@ -1854,9 +1889,9 @@ namespace Model m_streamingContexts; }; - inline void initializeModelManager() + inline void initializeModelManager(const bool async = true) { - ModelManager::getInstance(); + ModelManager::getInstance(async); } inline void initializeModelManagerWithCustomPersistence(std::unique_ptr persistence) diff --git a/include/model/model_persistence.hpp b/include/model/model_persistence.hpp index c054802..40168a1 100644 --- a/include/model/model_persistence.hpp +++ b/include/model/model_persistence.hpp @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include namespace Model { @@ -16,15 +18,15 @@ namespace Model public: virtual ~IModelPersistence() = default; virtual std::future> loadAllModels() = 0; - virtual std::future downloadModelVariant(ModelData& modelData, ModelVariant& variant) = 0; + virtual std::future downloadModelVariant(ModelData& modelData, const std::string& variantType) = 0; virtual std::future saveModelData(const ModelData& modelData) = 0; - virtual std::future deleteModelVariant(ModelData& modelData, ModelVariant& variant) = 0; + virtual std::future deleteModelVariant(ModelData& modelData, const std::string& variantType) = 0; }; class FileModelPersistence : public IModelPersistence { public: - explicit FileModelPersistence(const std::string &basePath) + explicit FileModelPersistence(const std::string& basePath) : m_basePath(basePath) { if (!std::filesystem::exists(m_basePath)) @@ -37,14 +39,14 @@ namespace Model { return std::async(std::launch::async, [this]() -> std::vector { std::vector models; - try + try { - for (const auto& entry : std::filesystem::directory_iterator(m_basePath)) + for (const auto& entry : std::filesystem::directory_iterator(m_basePath)) { - if (entry.path().extension() == ".json") + if (entry.path().extension() == ".json") { std::ifstream file(entry.path()); - if (file.is_open()) + if (file.is_open()) { nlohmann::json j; file >> j; @@ -52,16 +54,26 @@ namespace Model } } } - } catch (...) + } + catch (...) { // Return whatever was read successfully. } return models; }); } - std::future downloadModelVariant(ModelData& modelData, ModelVariant& variant) override + std::future downloadModelVariant(ModelData& modelData, const std::string& variantType) override { - return std::async(std::launch::async, [&variant, &modelData, this]() { + return std::async(std::launch::async, [this, &modelData, variantType]() { + // Check if variant exists + auto variantIter = modelData.variants.find(variantType); + if (variantIter == modelData.variants.end()) { + std::cerr << "[FileModelPersistence] Error: Variant '" << variantType << "' not found in model '" << modelData.name << "'\n"; + return; + } + + ModelVariant& variant = variantIter->second; + // Reset cancellation flag at the start. variant.cancelDownload = false; @@ -125,36 +137,21 @@ namespace Model file << j.dump(4); file.close(); } - }); + }); } - static size_t write_data(void* ptr, size_t size, size_t nmemb, void* userdata) + std::future deleteModelVariant(ModelData& modelData, const std::string& variantType) override { - std::ofstream* stream = static_cast(userdata); - size_t written = 0; - if (stream->is_open()) - { - stream->write(static_cast(ptr), size * nmemb); - written = size * nmemb; - } - return written; - } + return std::async(std::launch::async, [this, &modelData, variantType]() { + // Check if variant exists + auto variantIter = modelData.variants.find(variantType); + if (variantIter == modelData.variants.end()) { + std::cerr << "[FileModelPersistence] Error: Variant '" << variantType << "' not found in model '" << modelData.name << "'\n"; + return; + } - static int progress_callback(void* ptr, curl_off_t total, curl_off_t now, curl_off_t, curl_off_t) - { - ModelVariant* variant = static_cast(ptr); - if (total > 0) { - variant->downloadProgress = static_cast(now) / static_cast(total) * 100.0; - } - // If cancel flag is set, abort the transfer. - if (variant->cancelDownload) - return 1; // non-zero return value signals curl to abort - return 0; - } + ModelVariant& variant = variantIter->second; - std::future deleteModelVariant(ModelData& modelData, ModelVariant& variant) override - { - return std::async(std::launch::async, [this, &modelData, &variant]() { // Check if the file exists and attempt to remove it. if (std::filesystem::exists(variant.path)) { @@ -166,7 +163,7 @@ namespace Model << ": " << e.what() << "\n"; } } - // Reset the variant’s state so that it can be redownloaded. + // Reset the variant's state so that it can be redownloaded. variant.isDownloaded = false; variant.downloadProgress = 0.0; variant.lastSelected = 0; @@ -177,6 +174,30 @@ namespace Model } private: + static size_t write_data(void* ptr, size_t size, size_t nmemb, void* userdata) + { + std::ofstream* stream = static_cast(userdata); + size_t written = 0; + if (stream->is_open()) + { + stream->write(static_cast(ptr), size * nmemb); + written = size * nmemb; + } + return written; + } + + static int progress_callback(void* ptr, curl_off_t total, curl_off_t now, curl_off_t, curl_off_t) + { + ModelVariant* variant = static_cast(ptr); + if (total > 0) { + variant->downloadProgress = static_cast(now) / static_cast(total) * 100.0; + } + // If cancel flag is set, abort the transfer. + if (variant->cancelDownload) + return 1; // non-zero return value signals curl to abort + return 0; + } + std::string m_basePath; }; } // namespace Model \ No newline at end of file diff --git a/include/model/preset.hpp b/include/model/preset.hpp index 8fca4e8..c439291 100644 --- a/include/model/preset.hpp +++ b/include/model/preset.hpp @@ -1,5 +1,7 @@ #pragma once +#include "config.hpp" + #include #include @@ -43,13 +45,19 @@ namespace Model : id(id) , lastModified(lastModified) , name(name) - , systemPrompt(systemPrompt) + , systemPrompt("") , temperature(temperature) , top_p(top_p) , top_k(top_k) , random_seed(random_seed) , min_length(min_length) - , max_new_tokens(max_new_tokens) {} + , max_new_tokens(max_new_tokens) + { + // Pre-allocate with a reasonable reserve size + // This helps prevent reallocations and memory fragmentation + this->systemPrompt.reserve(Config::InputField::TEXT_SIZE); // Reserve 4KB initially + this->systemPrompt = systemPrompt; // Then assign the value + } bool operator==(const ModelPreset& other) const { diff --git a/include/model/preset_persistence.hpp b/include/model/preset_persistence.hpp index 145cbc3..d0a0efa 100644 --- a/include/model/preset_persistence.hpp +++ b/include/model/preset_persistence.hpp @@ -68,18 +68,37 @@ namespace Model { std::filesystem::path filePath = getPresetPath(preset.name); - nlohmann::json j = preset; + // Open file before JSON serialization to fail early if file can't be opened std::ofstream file(filePath); if (!file.is_open()) { + std::cerr << "[PRESET PERSISTENCE] [ERROR] Failed to open file for writing: " << filePath.string() << std::endl; + return false; + } + + // Serialize to JSON with better exception handling + nlohmann::json j; + try { + j = preset; + } + catch (const std::exception& e) { + std::cerr << "[PRESET PERSISTENCE] [ERROR] JSON serialization failed: " << e.what() << std::endl; + return false; + } + + // Write to file with exception handling + try { + file << j.dump(4); + } + catch (const std::exception& e) { + std::cerr << "[PRESET PERSISTENCE] [ERROR] Failed to write JSON to file: " << e.what() << std::endl; return false; } - file << j.dump(4); return true; } - catch (const std::exception&) + catch (const std::exception& e) { - // Log error + std::cerr << "[PRESET PERSISTENCE] [ERROR] Failed to save preset: " << e.what() << std::endl; return false; } } diff --git a/include/ui/chat/chat_window.hpp b/include/ui/chat/chat_window.hpp index 53d66b3..c89f80a 100644 --- a/include/ui/chat/chat_window.hpp +++ b/include/ui/chat/chat_window.hpp @@ -321,11 +321,6 @@ class ChatWindow { s.erase(pos, titlePrefix.length()); } - // Remove symbols except '+' and '-' - s.erase(std::remove_if(s.begin(), s.end(), [](char c) { - return std::ispunct(static_cast(c)) && c != '+' && c != '-'; - }), s.end()); - // Trim whitespace s.erase(0, s.find_first_not_of(" \t\n\r")); if (!s.empty()) { diff --git a/include/ui/chat/model_manager_modal.hpp b/include/ui/chat/model_manager_modal.hpp index fe93abd..8aabaac 100644 --- a/include/ui/chat/model_manager_modal.hpp +++ b/include/ui/chat/model_manager_modal.hpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ModelManagerConstants { constexpr float cardWidth = 200.0f; @@ -15,6 +16,8 @@ namespace ModelManagerConstants { constexpr float cardSpacing = 10.0f; constexpr float padding = 16.0f; constexpr float modalVerticalScale = 0.9f; + constexpr float sectionSpacing = 20.0f; + constexpr float sectionHeaderHeight = 30.0f; } class DeleteModelModalComponent { @@ -87,13 +90,13 @@ class DeleteModelModalComponent { class ModelCardRenderer { public: ModelCardRenderer(int index, const Model::ModelData& modelData, - std::function onDeleteRequested) - : m_index(index), m_model(modelData), m_onDeleteRequested(onDeleteRequested) + std::function onDeleteRequested, std::string id = "") + : m_index(index), m_model(modelData), m_onDeleteRequested(onDeleteRequested), m_id(id) { - selectButton.id = "##select" + std::to_string(m_index); + selectButton.id = "##select" + std::to_string(m_index) + m_id; selectButton.size = ImVec2(ModelManagerConstants::cardWidth - 18, 0); - deleteButton.id = "##delete" + std::to_string(m_index); + deleteButton.id = "##delete" + std::to_string(m_index) + m_id; deleteButton.size = ImVec2(24, 0); deleteButton.backgroundColor = RGBAToImVec4(200, 50, 50, 255); deleteButton.hoverColor = RGBAToImVec4(220, 70, 70, 255); @@ -104,14 +107,14 @@ class ModelCardRenderer { m_onDeleteRequested(m_index, currentVariant); }; - authorLabel.id = "##modelAuthor" + std::to_string(m_index); + authorLabel.id = "##modelAuthor" + std::to_string(m_index) + m_id; authorLabel.label = m_model.author; authorLabel.size = ImVec2(0, 0); authorLabel.fontType = FontsManager::ITALIC; authorLabel.fontSize = FontsManager::SM; authorLabel.alignment = Alignment::LEFT; - nameLabel.id = "##modelName" + std::to_string(m_index); + nameLabel.id = "##modelName" + std::to_string(m_index) + m_id; nameLabel.label = m_model.name; nameLabel.size = ImVec2(0, 0); nameLabel.fontType = FontsManager::BOLD; @@ -127,7 +130,7 @@ class ModelCardRenderer { ImGui::PushStyleColor(ImGuiCol_ChildBg, RGBAToImVec4(26, 26, 26, 255)); ImGui::PushStyleVar(ImGuiStyleVar_ChildRounding, 8.0f); - std::string childName = "ModelCard" + std::to_string(m_index); + std::string childName = "ModelCard" + std::to_string(m_index) + m_id; ImGui::BeginChild(childName.c_str(), ImVec2(ModelManagerConstants::cardWidth, ModelManagerConstants::cardHeight), true); renderHeader(); @@ -154,7 +157,7 @@ class ModelCardRenderer { ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 12); float fraction = static_cast(progress) / 100.0f; - ProgressBar::render(fraction, ImVec2(ModelManagerConstants::cardWidth - 18, 6)); + ProgressBar::render(fraction, ImVec2(ModelManagerConstants::cardWidth - 18, 6)); ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 4); } else { @@ -172,7 +175,7 @@ class ModelCardRenderer { } else { bool isLoadingSelected = isSelected && Model::ModelManager::getInstance().isLoadInProgress(); - bool isUnloading = isSelected && Model::ModelManager::getInstance().isUnloadInProgress(); + bool isUnloading = isSelected && Model::ModelManager::getInstance().isUnloadInProgress(); // Configure button label and base state if (isLoadingSelected || isUnloading) { @@ -206,7 +209,7 @@ class ModelCardRenderer { selectButton.borderColor = RGBAToImVec4(172, 131, 255, 255 / 4); selectButton.borderSize = 1.0f; selectButton.state = ButtonState::NORMAL; - selectButton.tooltip = "Click to unload model from memory"; + selectButton.tooltip = "Click to unload model from memory"; selectButton.onClick = [this]() { Model::ModelManager::getInstance().unloadModel(); }; @@ -228,9 +231,9 @@ class ModelCardRenderer { ImGui::SetCursorPosX(ImGui::GetCursorPosX() + ImGui::GetContentRegionAvail().x - 24 - 2); if (isSelected && Model::ModelManager::getInstance().isLoadInProgress()) - deleteButton.state = ButtonState::DISABLED; - else - deleteButton.state = ButtonState::NORMAL; + deleteButton.state = ButtonState::DISABLED; + else + deleteButton.state = ButtonState::NORMAL; Button::render(deleteButton); } @@ -250,6 +253,7 @@ class ModelCardRenderer { private: int m_index; + std::string m_id; const Model::ModelData& m_model; std::function m_onDeleteRequested; @@ -259,7 +263,28 @@ class ModelCardRenderer { } void renderVariantOptions(const std::string& currentVariant) { - auto renderVariant = [this, ¤tVariant](const std::string& variant, const std::string& label) { + LabelConfig variantLabel; + variantLabel.id = "##variantLabel" + std::to_string(m_index); + variantLabel.label = "Model Variants"; + variantLabel.size = ImVec2(0, 0); + variantLabel.fontType = FontsManager::REGULAR; + variantLabel.fontSize = FontsManager::SM; + variantLabel.alignment = Alignment::LEFT; + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 2); + Label::render(variantLabel); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 4); + + // Calculate the height for the scrollable area + // Card height minus header space minus button space at bottom + const float variantAreaHeight = 100.0f; // Adjust this value based on your layout needs + + // Create a scrollable child window for variants + ImGui::BeginChild(("##VariantScroll" + std::to_string(m_index)).c_str(), + ImVec2(ModelManagerConstants::cardWidth - 18, variantAreaHeight), + false); + + // Helper function to render a single variant option + auto renderVariant = [this, ¤tVariant](const std::string& variant) { ButtonConfig btnConfig; btnConfig.id = "##" + variant + std::to_string(m_index); btnConfig.icon = (currentVariant == variant) ? ICON_CI_CHECK : ICON_CI_CLOSE; @@ -270,26 +295,30 @@ class ModelCardRenderer { btnConfig.onClick = [variant, this]() { Model::ModelManager::getInstance().setPreferredVariant(m_model.name, variant); }; - ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 4); + ImGui::SetCursorPosX(ImGui::GetCursorPosX() + 4); Button::render(btnConfig); ImGui::SameLine(0.0f, 4.0f); LabelConfig variantLabel; variantLabel.id = "##" + variant + "Label" + std::to_string(m_index); - variantLabel.label = label; + variantLabel.label = variant; variantLabel.size = ImVec2(0, 0); variantLabel.fontType = FontsManager::REGULAR; variantLabel.fontSize = FontsManager::SM; variantLabel.alignment = Alignment::LEFT; - ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 6); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 6); Label::render(variantLabel); }; - renderVariant("Full Precision", "Use Full Precision"); - ImGui::Spacing(); - renderVariant("8-bit Quantized", "Use 8-bit quantization"); - ImGui::Spacing(); - renderVariant("4-bit Quantized", "Use 4-bit quantization"); + // Iterate through all variants in the model + for (const auto& [variant, variantData] : m_model.variants) { + // For each variant, render a button + renderVariant(variant); + ImGui::Spacing(); + } + + // End the scrollable area + ImGui::EndChild(); } ButtonConfig deleteButton; @@ -298,6 +327,15 @@ class ModelCardRenderer { LabelConfig authorLabel; }; +struct SortableModel { + int index; + std::string name; + + bool operator<(const SortableModel& other) const { + return name < other.name; + } +}; + // TODO: Fix the nested modal // when i tried to make the delete modal rendered on top of the model modal, it simply // either didn't show up at all, or the model modal closed, and the entire application @@ -305,9 +343,55 @@ class ModelCardRenderer { // Time wasted: 18 hours. class ModelManagerModal { public: - ModelManagerModal() = default; + ModelManagerModal() : m_searchText(""), m_shouldFocusSearch(false) {} void render(bool& showDialog) { + auto& manager = Model::ModelManager::getInstance(); + + // Update sorted models when: + // - The modal is opened for the first time + // - A model is downloaded, deleted, or its status changed + bool needsUpdate = false; + + if (showDialog && !m_wasShowing) { + // Modal just opened - refresh the model list + needsUpdate = true; + // Focus the search field when the modal is opened + m_shouldFocusSearch = true; + } + + // Check for changes in download status + const auto& models = manager.getModels(); + if (models.size() != m_lastModelCount) { + // The model count changed + needsUpdate = true; + } + + // Check for changes in downloaded status + if (!needsUpdate) { + std::unordered_set currentDownloaded; + + for (size_t i = 0; i < models.size(); ++i) { + // Check if ANY variant is downloaded instead of just the current one + if (manager.isAnyVariantDownloaded(static_cast(i))) { + currentDownloaded.insert(models[i].name); // Don't need to add variant to the key + } + } + + if (currentDownloaded != m_lastDownloadedStatus) { + needsUpdate = true; + m_lastDownloadedStatus = std::move(currentDownloaded); + } + } + + if (needsUpdate) { + updateSortedModels(); + m_lastModelCount = models.size(); + filterModels(); // Apply the current search filter to the updated models + } + + m_wasShowing = showDialog; + ImVec2 windowSize = ImGui::GetWindowSize(); if (windowSize.x == 0) windowSize = ImGui::GetMainViewport()->Size; const float targetWidth = windowSize.x; @@ -324,19 +408,134 @@ class ModelManagerModal { auto renderCards = [numCards, this]() { auto& manager = Model::ModelManager::getInstance(); const auto& models = manager.getModels(); - for (size_t i = 0; i < models.size(); ++i) { - if (i % numCards == 0) { - ImGui::SetCursorPos(ImVec2(ModelManagerConstants::padding, - ImGui::GetCursorPosY() + (i > 0 ? ModelManagerConstants::cardSpacing : 0))); + + // Render search field at the top + renderSearchField(); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ModelManagerConstants::sectionSpacing); + + LabelConfig downloadedSectionLabel; + downloadedSectionLabel.id = "##downloadedModelsHeader"; + downloadedSectionLabel.label = "Downloaded Models"; + downloadedSectionLabel.size = ImVec2(0, 0); + downloadedSectionLabel.fontSize = FontsManager::LG; + downloadedSectionLabel.alignment = Alignment::LEFT; + + ImGui::SetCursorPos(ImVec2(ModelManagerConstants::padding, ImGui::GetCursorPosY())); + Label::render(downloadedSectionLabel); + + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 10.0f); + + // Count downloaded models and check if we have any + bool hasDownloadedModels = false; + int downloadedCardCount = 0; + + // First pass to check if we have any downloaded models + for (const auto& sortableModel : m_filteredModels) { + // Check if ANY variant is downloaded instead of just current variant + if (manager.isAnyVariantDownloaded(sortableModel.index)) { + hasDownloadedModels = true; + break; + } + } + + // Render downloaded models + if (hasDownloadedModels) { + for (const auto& sortableModel : m_filteredModels) { + // Check if ANY variant is downloaded instead of just current variant + if (manager.isAnyVariantDownloaded(sortableModel.index)) { + if (downloadedCardCount % numCards == 0) { + ImGui::SetCursorPos(ImVec2(ModelManagerConstants::padding, + ImGui::GetCursorPosY() + (downloadedCardCount > 0 ? ModelManagerConstants::cardSpacing : 0))); + } + + ModelCardRenderer card(sortableModel.index, models[sortableModel.index], + [this](int index, const std::string& variant) { + m_deleteModal.setModel(index, variant); + m_deleteModalOpen = true; + }, "downloaded"); + card.render(); + + if ((downloadedCardCount + 1) % numCards != 0) { + ImGui::SameLine(0.0f, ModelManagerConstants::cardSpacing); + } + + downloadedCardCount++; + } } - ModelCardRenderer card(static_cast(i), models[i], - [this](int index, const std::string& variant) { - m_deleteModal.setModel(index, variant); - m_deleteModalOpen = true; - }); - card.render(); - if ((i + 1) % numCards != 0 && i < models.size() - 1) { - ImGui::SameLine(0.0f, ModelManagerConstants::cardSpacing); + + // Add spacing before the next section + if (downloadedCardCount % numCards != 0) { + ImGui::NewLine(); + } + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ModelManagerConstants::sectionSpacing); + } + else { + // Show a message if no downloaded models + LabelConfig noModelsLabel; + noModelsLabel.id = "##noDownloadedModels"; + noModelsLabel.label = m_searchText.empty() ? + "No downloaded models yet. Download models from the section below." : + "No downloaded models match your search. Try a different search term."; + noModelsLabel.size = ImVec2(0, 0); + noModelsLabel.fontType = FontsManager::ITALIC; + noModelsLabel.fontSize = FontsManager::MD; + noModelsLabel.alignment = Alignment::LEFT; + + ImGui::SetCursorPosX(ModelManagerConstants::padding); + Label::render(noModelsLabel); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ModelManagerConstants::sectionSpacing); + } + + // Separator between sections + ImGui::SetCursorPosX(ModelManagerConstants::padding); + ImGui::PushStyleColor(ImGuiCol_Separator, ImVec4(0.3f, 0.3f, 0.3f, 0.5f)); + ImGui::Separator(); + ImGui::PopStyleColor(); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 10.0f); + + // Render "Available Models" section header + LabelConfig availableSectionLabel; + availableSectionLabel.id = "##availableModelsHeader"; + availableSectionLabel.label = "Available Models"; + availableSectionLabel.size = ImVec2(0, 0); + availableSectionLabel.fontSize = FontsManager::LG; + availableSectionLabel.alignment = Alignment::LEFT; + + ImGui::SetCursorPosX(ModelManagerConstants::padding); + Label::render(availableSectionLabel); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 10.0f); + + // Check if we have any available models that match the search + if (m_filteredModels.empty() && !m_searchText.empty()) { + LabelConfig noModelsLabel; + noModelsLabel.id = "##noAvailableModels"; + noModelsLabel.label = "No models match your search. Try a different search term."; + noModelsLabel.size = ImVec2(0, 0); + noModelsLabel.fontType = FontsManager::ITALIC; + noModelsLabel.fontSize = FontsManager::MD; + noModelsLabel.alignment = Alignment::LEFT; + + ImGui::SetCursorPosX(ModelManagerConstants::padding); + Label::render(noModelsLabel); + } + else { + // Render all models (available for download) + for (size_t i = 0; i < m_filteredModels.size(); ++i) { + if (i % numCards == 0) { + ImGui::SetCursorPos(ImVec2(ModelManagerConstants::padding, + ImGui::GetCursorPosY() + (i > 0 ? ModelManagerConstants::cardSpacing : 0))); + } + + ModelCardRenderer card(m_filteredModels[i].index, models[m_filteredModels[i].index], + [this](int index, const std::string& variant) { + m_deleteModal.setModel(index, variant); + m_deleteModalOpen = true; + }); + card.render(); + + if ((i + 1) % numCards != 0 && i < m_filteredModels.size() - 1) { + ImGui::SameLine(0.0f, ModelManagerConstants::cardSpacing); + } } } }; @@ -351,15 +550,27 @@ class ModelManagerModal { config.padding = ImVec2(ModelManagerConstants::padding, 8.0f); ModalWindow::render(config); - // Render the delete modal if it’s open. + // Render the delete modal if it's open. if (m_deleteModalOpen) { m_deleteModal.render(m_deleteModalOpen); + + // Mark for update on next frame after deletion + if (!m_deleteModalOpen && m_wasDeleteModalOpen) { + m_needsUpdateAfterDelete = true; + } } if (m_wasDeleteModalOpen && !m_deleteModalOpen) { showDialog = true; ImGui::OpenPopup(config.id.c_str()); } + + if (m_needsUpdateAfterDelete && !m_deleteModalOpen) { + updateSortedModels(); + filterModels(); // Apply search filter after updating models + m_needsUpdateAfterDelete = false; + } + m_wasDeleteModalOpen = m_deleteModalOpen; if (!ImGui::IsPopupOpen(config.id.c_str())) { @@ -370,7 +581,106 @@ class ModelManagerModal { private: DeleteModelModalComponent m_deleteModal; bool m_deleteModalOpen = false; - - // This flag tracks if the delete modal was open on the previous frame. bool m_wasDeleteModalOpen = false; + bool m_wasShowing = false; + bool m_needsUpdateAfterDelete = false; + size_t m_lastModelCount = 0; + std::unordered_set m_lastDownloadedStatus; + std::vector m_sortedModels; + std::vector m_filteredModels; // New: Filtered list of models based on search + + // Search related variables + std::string m_searchText; + bool m_shouldFocusSearch; + + void updateSortedModels() { + auto& manager = Model::ModelManager::getInstance(); + const auto& models = manager.getModels(); + + // Clear and rebuild the sorted model list + m_sortedModels.clear(); + m_sortedModels.reserve(models.size()); + + for (size_t i = 0; i < models.size(); ++i) { + // Store the index and name directly, avoiding storing pointers + m_sortedModels.push_back({ static_cast(i), models[i].name }); + } + + // Sort models alphabetically by name + std::sort(m_sortedModels.begin(), m_sortedModels.end()); + + // Initialize filtered models with all models when sort is updated + filterModels(); + } + + // Filter models based on search text + void filterModels() { + m_filteredModels.clear(); + auto& manager = Model::ModelManager::getInstance(); + const auto& models = manager.getModels(); + + if (m_searchText.empty()) { + // If no search term, show all models + m_filteredModels = m_sortedModels; + return; + } + + // Convert search text to lowercase for case-insensitive comparison + std::string searchLower = m_searchText; + std::transform(searchLower.begin(), searchLower.end(), searchLower.begin(), + [](unsigned char c) { return std::tolower(c); }); + + // Filter models based on name OR author containing the search text + for (const auto& model : m_sortedModels) { + // Get the model data using the stored index + const auto& modelData = models[model.index]; + + // Convert name and author to lowercase for case-insensitive comparison + std::string nameLower = modelData.name; + std::transform(nameLower.begin(), nameLower.end(), nameLower.begin(), + [](unsigned char c) { return std::tolower(c); }); + + std::string authorLower = modelData.author; + std::transform(authorLower.begin(), authorLower.end(), authorLower.begin(), + [](unsigned char c) { return std::tolower(c); }); + + // Add model to filtered results if either name OR author contains the search text + if (nameLower.find(searchLower) != std::string::npos || + authorLower.find(searchLower) != std::string::npos) { + m_filteredModels.push_back(model); + } + } + } + + // New method: Render search field + void renderSearchField() { + ImGui::SetCursorPosX(ModelManagerConstants::padding); + + // Create and configure search input field + InputFieldConfig searchConfig( + "##modelSearch", + ImVec2(ImGui::GetContentRegionAvail().x, 32.0f), + m_searchText, + m_shouldFocusSearch + ); + searchConfig.placeholderText = "Search models..."; + searchConfig.processInput = [this](const std::string& text) { + // No need to handle submission specifically as we'll filter on every change + }; + + // Style the search field + searchConfig.backgroundColor = RGBAToImVec4(34, 34, 34, 255); + searchConfig.hoverColor = RGBAToImVec4(44, 44, 44, 255); + searchConfig.activeColor = RGBAToImVec4(54, 54, 54, 255); + + // Render the search field + InputField::render(searchConfig); + + // Filter models whenever search text changes + static std::string lastSearch; + if (lastSearch != m_searchText) { + lastSearch = m_searchText; + filterModels(); + } + } }; \ No newline at end of file diff --git a/include/ui/chat/preset_sidebar.hpp b/include/ui/chat/preset_sidebar.hpp index b7021e9..9c8435a 100644 --- a/include/ui/chat/preset_sidebar.hpp +++ b/include/ui/chat/preset_sidebar.hpp @@ -135,7 +135,15 @@ class PresetSelectionComponent { saveConfig.size = ImVec2(m_sidebarWidth / 2 - 15, 0); saveConfig.onClick = [&]() { if (Model::PresetManager::getInstance().hasUnsavedChanges()) { - Model::PresetManager::getInstance().saveCurrentPreset().get(); + try { + bool success = Model::PresetManager::getInstance().saveCurrentPreset().get(); + if (!success) { + std::cerr << "[PresetSelectionComponent] [ERROR] Failed to save preset.\n"; + } + } + catch (const std::exception& e) { + std::cerr << "[PresetSelectionComponent] [ERROR] " << e.what() << "\n"; + } } }; @@ -170,21 +178,37 @@ class SamplingSettingsComponent { if (!currentPresetOpt) return; auto& currentPreset = currentPresetOpt->get(); - // Render the system prompt label and multi-line input field. - ImGui::Spacing(); ImGui::Spacing(); + // Create a temporary buffer with sufficient capacity + static std::string tempSystemPrompt(Config::InputField::TEXT_SIZE, '\0'); + + // On first render or when preset changes, copy current value to the buffer + static int lastPresetId = -1; + if (lastPresetId != currentPreset.id) { + tempSystemPrompt = currentPreset.systemPrompt; + lastPresetId = currentPreset.id; + } + + // Render the system prompt label and multi-line input field + ImGui::Spacing(); ImGui::Spacing(); Label::render(m_systemPromptLabel); ImGui::Spacing(); ImGui::Spacing(); + InputFieldConfig inputConfig( "##systemprompt", ImVec2(m_sidebarWidth - 20, 100), - currentPreset.systemPrompt, + tempSystemPrompt, // Use the temporary buffer instead m_focusSystemPrompt ); inputConfig.placeholderText = "Enter your system prompt here..."; - inputConfig.processInput = [&](const std::string& input) { + inputConfig.processInput = [¤tPreset](const std::string& input) { + // Copy the input to our temporary buffer first + tempSystemPrompt = input; + + // Then safely update the preset's system prompt currentPreset.systemPrompt = input; }; + InputField::renderMultiline(inputConfig); // Render the model settings label and sampling sliders/inputs. diff --git a/include/ui/server/deployment_settings.hpp b/include/ui/server/deployment_settings.hpp index f01e019..4a59aea 100644 --- a/include/ui/server/deployment_settings.hpp +++ b/include/ui/server/deployment_settings.hpp @@ -44,6 +44,20 @@ class ModelLoaderSettingsComponent { int new_n_ctx = static_cast(n_ctx_float); if (new_n_ctx != n_ctx) { configManager.setContextSize(new_n_ctx); + + // Adjust dependent parameters if n_ctx decreased + if (new_n_ctx < n_ctx) { + // Check and adjust n_batch if needed + if (configManager.getBatchSize() > new_n_ctx) { + configManager.setBatchSize(new_n_ctx); + } + + // Check and adjust n_keep if needed + if (configManager.getKeepSize() > new_n_ctx) { + configManager.setKeepSize(new_n_ctx); + } + } + configManager.saveConfig(); // Auto-save on change serverState.setModelParamsChanged(true); // Mark params as changed } @@ -103,6 +117,19 @@ class ModelLoaderSettingsComponent { serverState.setModelParamsChanged(true); // Mark params as changed } + // n_batch slider (max number of tokens to process at each iteration) - using float for slider then converting back to int + { + int n_batch = configManager.getBatchSize(); + float n_batch_float = static_cast(n_batch); + Slider::render("##n_batch", n_batch_float, 1.0f, configManager.getContextSize(), sliderWidth, "%.0f"); + int new_n_batch = static_cast(n_batch_float); + if (new_n_batch != n_batch) { + configManager.setBatchSize(new_n_batch); + configManager.saveConfig(); // Auto-save on change + serverState.setModelParamsChanged(true); // Mark params as changed + } + } + // cont_batching checkbox renderCheckbox("Continuous Batching", "##cont_batching", configManager.getContinuousBatching(), [&configManager, &serverState](bool value) { diff --git a/include/window/win32_window.hpp b/include/window/win32_window.hpp index f42b600..8243c3d 100644 --- a/include/window/win32_window.hpp +++ b/include/window/win32_window.hpp @@ -14,6 +14,7 @@ #include "config.hpp" #include "window.hpp" +#include "window_composition_attribute.hpp" extern IMGUI_IMPL_API LRESULT ImGui_ImplWin32_WndProcHandler(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); @@ -27,9 +28,10 @@ class Win32Window : public Window { , height(720) , should_close(false) , borderless(true) - , borderless_shadow(true) + , borderless_shadow(false) , borderless_drag(false) - , borderless_resize(true) {} + , borderless_resize(true) { + } ~Win32Window() { @@ -52,7 +54,137 @@ class Win32Window : public Window { } set_borderless(borderless); - set_borderless_shadow(borderless_shadow); + + // Apply visual effect (acrylic or fallback) + applyVisualEffect(); + } + + void applyVisualEffect() + { + if (!hwnd) return; + + bool acrylicApplied = false; + + // Try to apply acrylic effect first + HMODULE hUser = GetModuleHandle(TEXT("user32.dll")); + if (hUser) + { + pfnSetWindowCompositionAttribute setWindowCompositionAttribute = + (pfnSetWindowCompositionAttribute)GetProcAddress(hUser, "SetWindowCompositionAttribute"); + + if (setWindowCompositionAttribute && isAcrylicSupported()) + { + // Create accent policy for acrylic blur + ACCENT_POLICY accent{ ACCENT_ENABLE_ACRYLICBLURBEHIND, 0, 0, 0 }; + + // Set the gradient color ($AABBGGRR format) + accent.GradientColor = 0xB3000000; // Semi-transparent dark color + + // Apply the acrylic effect + WINDOWCOMPOSITIONATTRIBDATA data; + data.Attrib = WCA_ACCENT_POLICY; + data.pvData = &accent; + data.cbData = sizeof(accent); + + if (setWindowCompositionAttribute(hwnd, &data)) { + acrylicApplied = true; + } + } + } + + // If acrylic effect failed or isn't supported, apply fallback with system accent color + if (!acrylicApplied && hUser) + { + pfnSetWindowCompositionAttribute setWindowCompositionAttribute = + (pfnSetWindowCompositionAttribute)GetProcAddress(hUser, "SetWindowCompositionAttribute"); + + if (setWindowCompositionAttribute) + { + // Use system accent color as fallback + DWORD systemAccentColor = getSystemAccentColor(); + + ACCENT_POLICY accent{ ACCENT_ENABLE_GRADIENT, 0, systemAccentColor, 0 }; + + WINDOWCOMPOSITIONATTRIBDATA data; + data.Attrib = WCA_ACCENT_POLICY; + data.pvData = &accent; + data.cbData = sizeof(accent); + + setWindowCompositionAttribute(hwnd, &data); + } + } + + // Apply rounded corners + applyRoundedCorners(); + } + + void applyRoundedCorners() + { + // Set rounded corners. + // For Windows 11, try to use DWMWA_WINDOW_CORNER_PREFERENCE if available; otherwise, fallback to SetWindowRgn. + typedef enum _DWM_WINDOW_CORNER_PREFERENCE { + DWMWCP_DEFAULT = 0, + DWMWCP_DONOTROUND = 1, + DWMWCP_ROUND = 2, + DWMWCP_ROUNDSMALL = 3 + } DWM_WINDOW_CORNER_PREFERENCE; + + // The DWMWA_WINDOW_CORNER_PREFERENCE attribute is 33. + const DWORD DWMWA_WINDOW_CORNER_PREFERENCE = 33; + DWM_WINDOW_CORNER_PREFERENCE preference = DWMWCP_ROUND; // Or use DWMWCP_ROUNDSMALL per your taste + + HRESULT hr = DwmSetWindowAttribute(hwnd, DWMWA_WINDOW_CORNER_PREFERENCE, &preference, sizeof(preference)); + if (SUCCEEDED(hr)) { + // Successfully applied system-managed rounded corners on supported systems. + } + else { + // Fallback: use SetWindowRgn to create rounded window corners. + RECT rect; + if (GetClientRect(hwnd, &rect)) { + int width = rect.right - rect.left; + int height = rect.bottom - rect.top; + HRGN hRgn = CreateRoundRectRgn(0, 0, width, height, Config::WINDOW_CORNER_RADIUS, Config::WINDOW_CORNER_RADIUS); + if (hRgn) { + SetWindowRgn(hwnd, hRgn, TRUE); + } + } + } + } + + bool isAcrylicSupported() + { + // Check if DWM composition is enabled (required for acrylic) + BOOL compositionEnabled = FALSE; + if (!SUCCEEDED(DwmIsCompositionEnabled(&compositionEnabled)) || !compositionEnabled) { + return false; + } + + // On versions before Windows 10 1803, acrylic is not available + // Could use feature detection, but for simplicity, we'll just try to apply it + // and let the fallback handle failure cases + return true; + } + + DWORD getSystemAccentColor() + { + // Default color (dark with some transparency) in case we can't get the system color + DWORD defaultColor = 0xB3000000; + + // Try to get the Windows accent color + DWORD colorizationColor = 0; + BOOL opaqueBlend = FALSE; + if (SUCCEEDED(DwmGetColorizationColor(&colorizationColor, &opaqueBlend))) + { + // Convert from ARGB to AABBGGRR format + BYTE a = 0xB3; // Use a fixed alpha for semi-transparency + BYTE r = (colorizationColor >> 16) & 0xFF; + BYTE g = (colorizationColor >> 8) & 0xFF; + BYTE b = colorizationColor & 0xFF; + + return (a << 24) | (b << 16) | (g << 8) | r; + } + + return defaultColor; } void show() override @@ -146,6 +278,16 @@ class Win32Window : public Window { window->is_window_active = (wParam != WA_INACTIVE); break; } + case WM_SIZE: { + // Reapply visual effect when the window is resized + window->applyVisualEffect(); + break; + } + case WM_DWMCOLORIZATIONCOLORCHANGED: { + // System accent color changed, reapply visual effect + window->applyVisualEffect(); + break; + } case WM_CLOSE: { window->should_close = true; return 0; @@ -239,7 +381,7 @@ class Win32Window : public Window { throw last_error("failed to register window class"); } return wcx.lpszClassName; - }(); + }(); return window_class_name; } diff --git a/include/window/window_composition_attribute.hpp b/include/window/window_composition_attribute.hpp new file mode 100644 index 0000000..a39f889 --- /dev/null +++ b/include/window/window_composition_attribute.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +typedef enum _WINDOWCOMPOSITIONATTRIB +{ + WCA_UNDEFINED = 0, + WCA_NCRENDERING_ENABLED = 1, + WCA_NCRENDERING_POLICY = 2, + WCA_TRANSITIONS_FORCEDISABLED = 3, + WCA_ALLOW_NCPAINT = 4, + WCA_CAPTION_BUTTON_BOUNDS = 5, + WCA_NONCLIENT_RTL_LAYOUT = 6, + WCA_FORCE_ICONIC_REPRESENTATION = 7, + WCA_EXTENDED_FRAME_BOUNDS = 8, + WCA_HAS_ICONIC_BITMAP = 9, + WCA_THEME_ATTRIBUTES = 10, + WCA_NCRENDERING_EXILED = 11, + WCA_NCADORNMENTINFO = 12, + WCA_EXCLUDED_FROM_LIVEPREVIEW = 13, + WCA_VIDEO_OVERLAY_ACTIVE = 14, + WCA_FORCE_ACTIVEWINDOW_APPEARANCE = 15, + WCA_DISALLOW_PEEK = 16, + WCA_CLOAK = 17, + WCA_CLOAKED = 18, + WCA_ACCENT_POLICY = 19, + WCA_FREEZE_REPRESENTATION = 20, + WCA_EVER_UNCLOAKED = 21, + WCA_VISUAL_OWNER = 22, + WCA_LAST = 23 +} WINDOWCOMPOSITIONATTRIB; + +typedef struct _WINDOWCOMPOSITIONATTRIBDATA +{ + WINDOWCOMPOSITIONATTRIB Attrib; + PVOID pvData; + SIZE_T cbData; +} WINDOWCOMPOSITIONATTRIBDATA; + +typedef enum _ACCENT_STATE +{ + ACCENT_DISABLED = 0, + ACCENT_ENABLE_GRADIENT = 1, + ACCENT_ENABLE_TRANSPARENTGRADIENT = 2, + ACCENT_ENABLE_BLURBEHIND = 3, + ACCENT_ENABLE_ACRYLICBLURBEHIND = 4, + ACCENT_INVALID_STATE = 5 +} ACCENT_STATE; + +typedef struct _ACCENT_POLICY +{ + ACCENT_STATE AccentState; + DWORD AccentFlags; + DWORD GradientColor; + DWORD AnimationId; +} ACCENT_POLICY; + +typedef BOOL(WINAPI* pfnSetWindowCompositionAttribute)(HWND, WINDOWCOMPOSITIONATTRIBDATA*); \ No newline at end of file diff --git a/installer/script.nsi b/installer/script.nsi index 6c2ace1..0a0ad76 100644 --- a/installer/script.nsi +++ b/installer/script.nsi @@ -7,6 +7,8 @@ ;----------------------------------- !include "MUI2.nsh" !include "FileFunc.nsh" +!include "LogicLib.nsh" +!include "nsProcess.nsh" ;----------------------------------- ; Variables @@ -14,17 +16,21 @@ Var StartMenuFolder Var ChatHistoryDir Var DefaultChatDir +Var OldVersion +Var NewVersion +Var IsUpgrade ;----------------------------------- ; Embed version info (metadata) ;----------------------------------- -VIProductVersion "0.1.6.0" +!define VERSION "0.1.7.0" +VIProductVersion "${VERSION}" VIAddVersionKey "ProductName" "Kolosal AI Installer" VIAddVersionKey "CompanyName" "Genta Technology" VIAddVersionKey "FileDescription" "Kolosal AI Installer" VIAddVersionKey "LegalCopyright" "Copyright (C) 2025" -VIAddVersionKey "FileVersion" "0.1.6.0" -VIAddVersionKey "ProductVersion" "0.1.6.0" +VIAddVersionKey "FileVersion" "${VERSION}" +VIAddVersionKey "ProductVersion" "${VERSION}" VIAddVersionKey "OriginalFilename" "KolosalAI_Installer.exe" VIAddVersionKey "Comments" "Installer for Kolosal AI" VIAddVersionKey "Publisher" "Genta Technology" @@ -68,8 +74,46 @@ RequestExecutionLevel admin !define MUI_STARTMENUPAGE_DEFAULTFOLDER "Kolosal AI" Function .onInit + ; Initialize default chat directory StrCpy $DefaultChatDir "$LOCALAPPDATA\KolosalAI\ChatHistory" StrCpy $ChatHistoryDir $DefaultChatDir + + ; Check for previous installation + StrCpy $IsUpgrade "false" + ReadRegStr $R0 HKLM "Software\KolosalAI" "Install_Dir" + ReadRegStr $OldVersion HKLM "Software\KolosalAI" "Version" + StrCpy $NewVersion "${VERSION}" + + ${If} $R0 != "" + StrCpy $IsUpgrade "true" + + ; Detect if the application is running + ${nsProcess::FindProcess} "KolosalDesktop.exe" $R1 + ${If} $R1 == 0 + MessageBox MB_OKCANCEL|MB_ICONEXCLAMATION \ + "Kolosal AI is currently running. Please close it before continuing.$\n$\nPress OK to automatically close the application and continue with the update, or Cancel to abort installation." \ + IDCANCEL abort + + ; Kill the process if user chose to continue + ${nsProcess::KillProcess} "KolosalDesktop.exe" $R1 + Sleep 2000 ; Give it time to fully terminate + ${EndIf} + ${EndIf} + + Return + +abort: + Abort "Installation aborted. Please close Kolosal AI and run the installer again." +FunctionEnd + +Function ChatHistoryDirectoryPre + StrCpy $ChatHistoryDir $DefaultChatDir + !undef MUI_DIRECTORYPAGE_VARIABLE + !define MUI_DIRECTORYPAGE_VARIABLE $ChatHistoryDir + !undef MUI_PAGE_HEADER_TEXT + !define MUI_PAGE_HEADER_TEXT "${CHATHISTORY_TITLE}" + !undef MUI_PAGE_HEADER_SUBTEXT + !define MUI_PAGE_HEADER_SUBTEXT "${CHATHISTORY_SUBTITLE}" FunctionEnd ; Page order @@ -91,23 +135,30 @@ FunctionEnd !insertmacro MUI_LANGUAGE "English" -Function ChatHistoryDirectoryPre - StrCpy $ChatHistoryDir $DefaultChatDir - !undef MUI_DIRECTORYPAGE_VARIABLE - !define MUI_DIRECTORYPAGE_VARIABLE $ChatHistoryDir - !undef MUI_PAGE_HEADER_TEXT - !define MUI_PAGE_HEADER_TEXT "${CHATHISTORY_TITLE}" - !undef MUI_PAGE_HEADER_SUBTEXT - !define MUI_PAGE_HEADER_SUBTEXT "${CHATHISTORY_SUBTITLE}" -FunctionEnd - ;----------------------------------- ; Installation Section ;----------------------------------- Section "Kolosal AI" SecKolosalAI - ; Force overwrite of existing files so that EXE and DLL files are always replaced + ; Force overwrite of existing files SetOverwrite on + ; If this is an upgrade, remove old files first (except chat history) + ${If} $IsUpgrade == "true" + ; Display upgrade message + DetailPrint "Upgrading from version $OldVersion to $NewVersion" + + ; Remove previous program files but keep the directory structure + RMDir /r "$INSTDIR\assets" + RMDir /r "$INSTDIR\fonts" + RMDir /r "$INSTDIR\models" + Delete "$INSTDIR\*.dll" + Delete "$INSTDIR\*.exe" + Delete "$INSTDIR\LICENSE" + + ; Small delay to ensure all files are released + Sleep 1000 + ${EndIf} + SetOutPath "$INSTDIR" ; Set write permissions @@ -137,9 +188,11 @@ Section "Kolosal AI" SecKolosalAI SetOutPath "$INSTDIR\models" File /r "models\*.*" - ; Create chat history directory - CreateDirectory "$ChatHistoryDir" - AccessControl::GrantOnFile "$ChatHistoryDir" "(S-1-5-32-545)" "FullAccess" + ; Create chat history directory if it doesn't exist + ${If} $IsUpgrade == "false" + CreateDirectory "$ChatHistoryDir" + AccessControl::GrantOnFile "$ChatHistoryDir" "(S-1-5-32-545)" "FullAccess" + ${EndIf} SetOutPath "$INSTDIR" @@ -156,9 +209,11 @@ Section "Kolosal AI" SecKolosalAI ; Write registry information WriteRegStr HKLM "SOFTWARE\KolosalAI" "Install_Dir" "$INSTDIR" WriteRegStr HKLM "SOFTWARE\KolosalAI" "ChatHistory_Dir" "$ChatHistoryDir" + WriteRegStr HKLM "SOFTWARE\KolosalAI" "Version" "${VERSION}" WriteRegStr HKCU "Software\KolosalAI" "Install_Dir" "$INSTDIR" WriteRegStr HKCU "Software\KolosalAI" "ChatHistory_Dir" "$ChatHistoryDir" + WriteRegStr HKCU "Software\KolosalAI" "Version" "${VERSION}" ; Write uninstaller registry information WriteRegStr HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayName" "Kolosal AI" @@ -166,21 +221,40 @@ Section "Kolosal AI" SecKolosalAI WriteRegStr HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "InstallLocation" "$INSTDIR" WriteRegStr HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "Publisher" "Genta Technology" WriteRegStr HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayIcon" "$INSTDIR\assets\icon.ico" + WriteRegStr HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayVersion" "${VERSION}" WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayName" "Kolosal AI" WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "UninstallString" "$INSTDIR\Uninstall.exe" WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "InstallLocation" "$INSTDIR" WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "Publisher" "Genta Technology" WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayIcon" "$INSTDIR\assets\icon.ico" + WriteRegStr HKCU "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" "DisplayVersion" "${VERSION}" ; Create uninstaller WriteUninstaller "$INSTDIR\Uninstall.exe" + + ; Clean temporary files that might be left from previous versions + RMDir /r "$TEMP\KolosalAI" + Delete "$TEMP\KolosalAI_*.log" + Delete "$TEMP\KolosalAI_*.tmp" SectionEnd ;----------------------------------- ; Uninstall Section ;----------------------------------- Section "Uninstall" + ; Check if the application is running before uninstalling + ${nsProcess::FindProcess} "KolosalDesktop.exe" $R1 + ${If} $R1 == 0 + MessageBox MB_OKCANCEL|MB_ICONEXCLAMATION \ + "Kolosal AI is currently running. Please close it before continuing.$\n$\nPress OK to automatically close the application and continue with uninstallation, or Cancel to abort." \ + IDCANCEL abortUninstall + + ; Kill the process if user chose to continue + ${nsProcess::KillProcess} "KolosalDesktop.exe" $R1 + Sleep 2000 ; Give it time to fully terminate + ${EndIf} + ; Retrieve Start Menu folder from registry !insertmacro MUI_STARTMENU_GETFOLDER Application $StartMenuFolder @@ -211,6 +285,15 @@ keepChatHistory: ; Remove registry keys DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" DeleteRegKey HKLM "Software\KolosalAI" + DeleteRegKey HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\KolosalAI" + DeleteRegKey HKCU "Software\KolosalAI" + + ${nsProcess::Unload} + Goto done + +abortUninstall: + Abort "Uninstallation aborted. Please close Kolosal AI and try again." noRemove: -SectionEnd +done: +SectionEnd \ No newline at end of file diff --git a/models/bahasa-ai-4b.json b/models/bahasa-ai-4b.json new file mode 100644 index 0000000..ea1e4ce --- /dev/null +++ b/models/bahasa-ai-4b.json @@ -0,0 +1,30 @@ +{ + "name": "Bahasa AI 4B", + "author": "Alibaba, Bahasa AI", + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/bahasa-ai-4b/fp16/Bahasalab_Bahasa-4b-chat_f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/bahasa-ai-4b/resolve/main/Bahasalab_Bahasa-4b-chat_f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/bahasa-ai-4b/int8/Bahasalab_Bahasa-4b-chat_q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/bahasa-ai-4b/resolve/main/Bahasalab_Bahasa-4b-chat_q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/bahasa-ai-4b/int4/Bahasalab_Bahasa-4b-chat_q4_k_m.gguf", + "downloadLink": "https://huggingface.co/kolosal/bahasa-ai-4b/resolve/main/Bahasalab_Bahasa-4b-chat_q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } + } +} \ No newline at end of file diff --git a/models/deepseek-r1-llama-8b.json b/models/deepseek-r1-llama-8b.json index bdb6266..6cd5cea 100644 --- a/models/deepseek-r1-llama-8b.json +++ b/models/deepseek-r1-llama-8b.json @@ -1,28 +1,30 @@ { "name": "Deepseek R1 Llama 8B", "author": "Deepseek AI", - "fullPrecision": { - "type": "Full Precision", - "path": "models/deepseek-r1-llama-8b/fp16/DeepSeek-R1-Distill-Llama-8B-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/deepseek-r1-llama-8b/int8/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/deepseek-r1-llama-8b/int4/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/deepseek-r1-llama-8b/fp16/DeepSeek-R1-Distill-Llama-8B-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/deepseek-r1-llama-8b/int8/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/deepseek-r1-llama-8b/int4/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Llama-8B/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/deepseek-r1-qwen2.5-1.5b.json b/models/deepseek-r1-qwen2.5-1.5b.json index 1ba16da..b078ba5 100644 --- a/models/deepseek-r1-qwen2.5-1.5b.json +++ b/models/deepseek-r1-qwen2.5-1.5b.json @@ -1,28 +1,30 @@ { "name": "Deepseek R1 Qwen2.5 1.5B", "author": "Deepseek AI", - "fullPrecision": { - "type": "Full Precision", - "path": "models/deepseek-r1-qwen-1.5b/fp16/DeepSeek-R1-Distill-Qwen-1.5B-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/deepseek-r1-qwen-1.5b/int8/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/deepseek-r1-qwen-1.5b/int4/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/deepseek-r1-qwen-1.5b/fp16/DeepSeek-R1-Distill-Qwen-1.5B-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/deepseek-r1-qwen-1.5b/int8/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/deepseek-r1-qwen-1.5b/int4/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-1.5B/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/deepseek-r1-qwen2.5-14b.json b/models/deepseek-r1-qwen2.5-14b.json index ec3c3a0..d696ba1 100644 --- a/models/deepseek-r1-qwen2.5-14b.json +++ b/models/deepseek-r1-qwen2.5-14b.json @@ -1,28 +1,30 @@ { "name": "Deepseek R1 Qwen2.5 14B", "author": "Deepseek AI", - "fullPrecision": { - "type": "Full Precision", - "path": "models/deepseek-r1-qwen-14b/fp16/DeepSeek-R1-Distill-Qwen-14B-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/deepseek-r1-qwen-14b/int8/DeepSeek-R1-Distill-Qwen-14B-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/deepseek-r1-qwen-14b/int4/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/deepseek-r1-qwen-14b/fp16/DeepSeek-R1-Distill-Qwen-14B-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/deepseek-r1-qwen-14b/int8/DeepSeek-R1-Distill-Qwen-14B-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/deepseek-r1-qwen-14b/int4/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-14B/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/deepseek-r1-qwen2.5-7b.json b/models/deepseek-r1-qwen2.5-7b.json index 2bf4c5d..01c6619 100644 --- a/models/deepseek-r1-qwen2.5-7b.json +++ b/models/deepseek-r1-qwen2.5-7b.json @@ -1,28 +1,30 @@ { "name": "Deepseek R1 Qwen2.5 7B", "author": "Deepseek AI", - "fullPrecision": { - "type": "Full Precision", - "path": "models/deepseek-r1-qwen-7b/fp16/DeepSeek-R1-Distill-Qwen-7B-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/deepseek-r1-qwen-7b/int8/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/deepseek-r1-qwen-7b/int4/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/deepseek-r1-qwen-7b/fp16/DeepSeek-R1-Distill-Qwen-7B-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/deepseek-r1-qwen-7b/int8/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/deepseek-r1-qwen-7b/int4/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/Deepseek-R1-Qwen-7B/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/gemma-2-2b.json b/models/gemma-2-2b.json index 72812f7..747b01b 100644 --- a/models/gemma-2-2b.json +++ b/models/gemma-2-2b.json @@ -1,28 +1,30 @@ { "name": "Gemma 2 2B", "author": "Google", - "fullPrecision": { - "type": "Full Precision", - "path": "models/gemma-2-2b/fp16/gemma-2-2b-it-f32.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-f32.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/gemma-2-2b/int8/gemma-2-2b-it-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/gemma-2-2b/int4/gemma-2-2b-it-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-2-2b/fp16/gemma-2-2b-it-f32.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-f32.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-2-2b/int8/gemma-2-2b-it-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-2-2b/int4/gemma-2-2b-it-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-2b/resolve/main/gemma-2-2b-it-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/gemma-2-9b-sahabat.json b/models/gemma-2-9b-sahabat.json index 421fec0..074152c 100644 --- a/models/gemma-2-9b-sahabat.json +++ b/models/gemma-2-9b-sahabat.json @@ -1,28 +1,30 @@ { "name": "Gemma 2 9B Sahabat AI", "author": "Google, GoTo", - "fullPrecision": { - "type": "Full Precision", - "path": "models/gemma-2-sahabat-ai/fp16/gemma2-9b-cpt-sahabatai-v1-instruct.bf16.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.bf16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/gemma-2-sahabat-ai/int8/gemma2-9b-cpt-sahabatai-v1-instruct.Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/gemma-2-sahabat-ai/int4/gemma2-9b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-2-sahabat-ai/fp16/gemma2-9b-cpt-sahabatai-v1-instruct.bf16.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.bf16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-2-sahabat-ai/int8/gemma2-9b-cpt-sahabatai-v1-instruct.Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-2-sahabat-ai/int4/gemma2-9b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b-sahabat-ai/resolve/main/gemma2-9b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/gemma-2-9b.json b/models/gemma-2-9b.json index e0a7235..fed6330 100644 --- a/models/gemma-2-9b.json +++ b/models/gemma-2-9b.json @@ -1,28 +1,30 @@ { "name": "Gemma 2 9B", "author": "Google", - "fullPrecision": { - "type": "Full Precision", - "path": "models/gemma-2-9b/fp16/gemma-2-9b-it-f32.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-f32.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/gemma-2-9b/int8/gemma-2-9b-it-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/gemma-2-9b/int4/gemma-2-9b-it-Q4_K_L.gguf", - "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-Q4_K_L.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-2-9b/fp16/gemma-2-9b-it-f32.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-f32.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-2-9b/int8/gemma-2-9b-it-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-2-9b/int4/gemma-2-9b-it-Q4_K_L.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-2-9b/resolve/main/gemma-2-9b-it-Q4_K_L.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/gemma-3-12b.json b/models/gemma-3-12b.json new file mode 100644 index 0000000..4670b0c --- /dev/null +++ b/models/gemma-3-12b.json @@ -0,0 +1,30 @@ +{ + "name": "Gemma 3 12B", + "author": "Google", + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-3-12b/fp16/google_gemma-3-12b-it_f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-12b/resolve/main/google_gemma-3-12b-it_f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-3-12b/int8/google_gemma-3-12b-it_q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-12b/resolve/main/google_gemma-3-12b-it_q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-3-1b/int4/google_gemma-3-12b-it_q4_k_m.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-12b/resolve/main/google_gemma-3-12b-it_q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } + } +} \ No newline at end of file diff --git a/models/gemma-3-1b.json b/models/gemma-3-1b.json new file mode 100644 index 0000000..599ed4d --- /dev/null +++ b/models/gemma-3-1b.json @@ -0,0 +1,30 @@ +{ + "name": "Gemma 3 1B", + "author": "Google", + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-3-1b/fp16/google_gemma-3-1b-it_f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-1b/resolve/main/google_gemma-3-1b-it_f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-3-1b/int8/google_gemma-3-1b-it_q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-1b/resolve/main/google_gemma-3-1b-it_q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-3-1b/int4/google_gemma-3-1b-it_q4_k_m.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-1b/resolve/main/google_gemma-3-1b-it_q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } + } +} \ No newline at end of file diff --git a/models/gemma-3-27b.json b/models/gemma-3-27b.json new file mode 100644 index 0000000..226b51b --- /dev/null +++ b/models/gemma-3-27b.json @@ -0,0 +1,22 @@ +{ + "name": "Gemma 3 27B", + "author": "Google", + "variants": { + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-3-27b/int8/google_gemma-3-27b-it_q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-27b/resolve/main/google_gemma-3-27b-it_q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-3-27b/int4/google_gemma-3-27b-it_q4_k_m.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-27b/resolve/main/google_gemma-3-27b-it_q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } + } +} \ No newline at end of file diff --git a/models/gemma-3-4b.json b/models/gemma-3-4b.json new file mode 100644 index 0000000..c989026 --- /dev/null +++ b/models/gemma-3-4b.json @@ -0,0 +1,30 @@ +{ + "name": "Gemma 3 4B", + "author": "Google", + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/gemma-3-4b/fp16/google_gemma-3-4b-it_f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-4b/resolve/main/google_gemma-3-4b-it_f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/gemma-3-4b/int8/google_gemma-3-4b-it_q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-4b/resolve/main/google_gemma-3-4b-it_q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/gemma-3-4b/int4/google_gemma-3-4b-it_q4_k_m.gguf", + "downloadLink": "https://huggingface.co/kolosal/gemma-3-4b/resolve/main/google_gemma-3-4b-it_q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } + } +} \ No newline at end of file diff --git a/models/llama-3-8b-sahabat.json b/models/llama-3-8b-sahabat.json index 107c788..f85bd3e 100644 --- a/models/llama-3-8b-sahabat.json +++ b/models/llama-3-8b-sahabat.json @@ -1,28 +1,30 @@ { "name": "Llama 3 8B Sahabat AI", "author": "Meta, GoTo", - "fullPrecision": { - "type": "Full Precision", - "path": "models/llama-3-sahabat-ai/fp16/llama3-8b-cpt-sahabatai-v1-instruct.bf16.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.bf16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/llama-3-sahabat-ai/int8/llama3-8b-cpt-sahabatai-v1-instruct.Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/llama-3-sahabat-ai/int4/llama3-8b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/llama-3-sahabat-ai/fp16/llama3-8b-cpt-sahabatai-v1-instruct.bf16.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.bf16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/llama-3-sahabat-ai/int8/llama3-8b-cpt-sahabatai-v1-instruct.Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/llama-3-sahabat-ai/int4/llama3-8b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3-8b-sahabat-ai/resolve/main/llama3-8b-cpt-sahabatai-v1-instruct.Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/llama-3.1-8b.json b/models/llama-3.1-8b.json index 65f9236..146f8c4 100644 --- a/models/llama-3.1-8b.json +++ b/models/llama-3.1-8b.json @@ -1,28 +1,30 @@ { "name": "Llama 3.1 8B", "author": "Meta", - "fullPrecision": { - "type": "Full Precision", - "path": "models/llama-3.1-8B/fp16/Meta-Llama-3.1-8B-Instruct.f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/llama-3.1-8B/int8/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/llama-3.1-8B/int4/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/llama-3.1-8B/fp16/Meta-Llama-3.1-8B-Instruct.f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/llama-3.1-8B/int8/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/llama-3.1-8B/int4/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.1-8b/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/llama-3.2-1b.json b/models/llama-3.2-1b.json index a53303d..781d0ad 100644 --- a/models/llama-3.2-1b.json +++ b/models/llama-3.2-1b.json @@ -1,28 +1,30 @@ { "name": "Llama 3.2 1B", "author": "Meta", - "fullPrecision": { - "type": "Full Precision", - "path": "models/llama-3.2-1B/fp16/Llama-3.2-1B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/llama-3.2-1B/int8/Llama-3.2-1B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/llama-3.2-1B/int4/Llama-3.2-1B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/llama-3.2-1B/fp16/Llama-3.2-1B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/llama-3.2-1B/int8/Llama-3.2-1B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/llama-3.2-1B/int4/Llama-3.2-1B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-1b/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/llama-3.2-3b.json b/models/llama-3.2-3b.json index 01c623d..345ca0f 100644 --- a/models/llama-3.2-3b.json +++ b/models/llama-3.2-3b.json @@ -1,28 +1,30 @@ { "name": "Llama 3.2 3B", "author": "Meta", - "fullPrecision": { - "type": "Full Precision", - "path": "models/llama-3.2-3B/fp16/Llama-3.2-3B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/llama-3.2-3B/int8/Llama-3.2-3B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/llama-3.2-3B/int4/Llama-3.2-3B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/llama-3.2-3B/fp16/Llama-3.2-3B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/llama-3.2-3B/int8/Llama-3.2-3B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/llama-3.2-3B/int4/Llama-3.2-3B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/llama-3.2-3b/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/phi-4-14b.json b/models/phi-4-14b.json index 6f830f5..c819573 100644 --- a/models/phi-4-14b.json +++ b/models/phi-4-14b.json @@ -1,28 +1,30 @@ { "name": "Phi 4 14B", "author": "Microsoft", - "fullPrecision": { - "type": "Full Precision", - "path": "models/phi-4-14b/fp16/phi-4-F16.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-F16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/phi-4-14b/int8/phi-4-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/phi-4-14b/int4/phi-4-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/phi-4-14b/fp16/phi-4-F16.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-F16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/phi-4-14b/int8/phi-4-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/phi-4-14b/int4/phi-4-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4/resolve/main/phi-4-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/phi-4-mini-3.8b.json b/models/phi-4-mini-3.8b.json index 16097c6..0c08fa6 100644 --- a/models/phi-4-mini-3.8b.json +++ b/models/phi-4-mini-3.8b.json @@ -1,28 +1,30 @@ { "name": "Phi 4 Mini 3.8B", "author": "Microsoft", - "fullPrecision": { - "type": "Full Precision", - "path": "models/phi-4-mini-3.8b/fp16/Phi-4-mini-instruct.BF16.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.BF16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/phi-4-mini-3.8b/int8/Phi-4-mini-instruct.Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/phi-4-mini-3.8b/int4/Phi-4-mini-instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/phi-4-mini-3.8b/fp16/Phi-4-mini-instruct.BF16.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.BF16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/phi-4-mini-3.8b/int8/Phi-4-mini-instruct.Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct.Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/phi-4-mini-3.8b/int4/Phi-4-mini-instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/phi-4-mini/resolve/main/Phi-4-mini-instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-0.5b.json b/models/qwen2.5-0.5b.json index 07feabb..4648131 100644 --- a/models/qwen2.5-0.5b.json +++ b/models/qwen2.5-0.5b.json @@ -1,28 +1,30 @@ { "name": "Qwen2.5 0.5B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen2.5-0.5b/fp16/Qwen2.5-0.5B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen2.5-0.5b/int8/Qwen2.5-0.5B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen2.5-0.5b/int4/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen2.5-0.5b/fp16/Qwen2.5-0.5B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen2.5-0.5b/int8/Qwen2.5-0.5B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen2.5-0.5b/int4/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-0.5b/resolve/main/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-1.5b.json b/models/qwen2.5-1.5b.json index f9fc696..d75c773 100644 --- a/models/qwen2.5-1.5b.json +++ b/models/qwen2.5-1.5b.json @@ -1,28 +1,30 @@ { "name": "Qwen2.5 1.5B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen2.5-1.5b/fp16/Qwen2.5-1.5B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen2.5-1.5b/int8/Qwen2.5-1.5B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen2.5-1.5b/int4/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen2.5-1.5b/fp16/Qwen2.5-1.5B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen2.5-1.5b/int8/Qwen2.5-1.5B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen2.5-1.5b/int4/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-1.5b/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-14b.json b/models/qwen2.5-14b.json index 777490b..6aebc86 100644 --- a/models/qwen2.5-14b.json +++ b/models/qwen2.5-14b.json @@ -1,28 +1,30 @@ { "name": "Qwen2.5 14B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen2.5-14b/fp16/Qwen2.5-14B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen2.5-14b/int8/Qwen2.5-14B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen2.5-14b/int4/Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen2.5-14b/fp16/Qwen2.5-14B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen2.5-14b/int8/Qwen2.5-14B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen2.5-14b/int4/Qwen2.5-14B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-3b.json b/models/qwen2.5-3b.json index 095c6dc..c15c311 100644 --- a/models/qwen2.5-3b.json +++ b/models/qwen2.5-3b.json @@ -1,28 +1,30 @@ { "name": "Qwen2.5 3B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen2.5-3b/fp16/Qwen2.5-3B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen2.5-3b/int8/Qwen2.5-3B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen2.5-3b/int4/Qwen2.5-3B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen2.5-3b/fp16/Qwen2.5-3B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen2.5-3b/int8/Qwen2.5-3B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen2.5-3b/int4/Qwen2.5-3B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-3b/resolve/main/Qwen2.5-3B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-7b.json b/models/qwen2.5-7b.json index 90eaf06..3cd26ea 100644 --- a/models/qwen2.5-7b.json +++ b/models/qwen2.5-7b.json @@ -1,28 +1,30 @@ { "name": "Qwen2.5 7B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen2.5-7b/fp16/Qwen2.5-7B-Instruct-f16.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen2.5-7b/int8/Qwen2.5-7B-Instruct-Q8_0.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-Q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen2.5-7b/int4/Qwen2.5-7B-Instruct-Q4_K_M.gguf", - "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen2.5-7b/fp16/Qwen2.5-7B-Instruct-f16.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen2.5-7b/int8/Qwen2.5-7B-Instruct-Q8_0.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-Q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen2.5-7b/int4/Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "downloadLink": "https://huggingface.co/kolosal/qwen2.5-7b/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-coder-0.5b.json b/models/qwen2.5-coder-0.5b.json index 096c127..49fb086 100644 --- a/models/qwen2.5-coder-0.5b.json +++ b/models/qwen2.5-coder-0.5b.json @@ -1,28 +1,30 @@ { "name": "Qwen Coder 0.5B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen-coder-0.5b/fp16/qwen2.5-coder-0.5b-instruct-fp16.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-fp16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen-coder-0.5b/int8/qwen2.5-coder-0.5b-instruct-q8_0.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen-coder-0.5b/int4/qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen-coder-0.5b/fp16/qwen2.5-coder-0.5b-instruct-fp16.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-fp16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen-coder-0.5b/int8/qwen2.5-coder-0.5b-instruct-q8_0.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen-coder-0.5b/int4/qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-coder-1.5b.json b/models/qwen2.5-coder-1.5b.json index 736ac51..85c8c4f 100644 --- a/models/qwen2.5-coder-1.5b.json +++ b/models/qwen2.5-coder-1.5b.json @@ -1,28 +1,30 @@ { "name": "Qwen Coder 1.5B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen-coder-1.5b/fp16/ggml-model-f16.gguf", - "downloadLink": "https://huggingface.co/neopolita/qwen2.5-coder-1.5b-gguf/resolve/main/ggml-model-f16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen-coder-1.5b/int8/qwen2.5-coder-0.5b-instruct-q8_0.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen-coder-1.5b/int4/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen-coder-1.5b/fp16/ggml-model-f16.gguf", + "downloadLink": "https://huggingface.co/neopolita/qwen2.5-coder-1.5b-gguf/resolve/main/ggml-model-f16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen-coder-1.5b/int8/qwen2.5-coder-0.5b-instruct-q8_0.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen-coder-1.5b/int4/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-coder-14b.json b/models/qwen2.5-coder-14b.json index 94f332d..890d9dd 100644 --- a/models/qwen2.5-coder-14b.json +++ b/models/qwen2.5-coder-14b.json @@ -1,28 +1,30 @@ { "name": "Qwen Coder 14B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen-coder-14b/fp16/qwen2.5-coder-14b-instruct-fp16.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-fp16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen-coder-14b/int8/qwen2.5-coder-14b-instruct-q8_0.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen-coder-14b/int4/qwen2.5-coder-14b-instruct-q4_k_m.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-q4_k_m.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen-coder-14b/fp16/qwen2.5-coder-14b-instruct-fp16.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-fp16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen-coder-14b/int8/qwen2.5-coder-14b-instruct-q8_0.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen-coder-14b/int4/qwen2.5-coder-14b-instruct-q4_k_m.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/qwen2.5-coder-14b-instruct-q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-coder-3b.json b/models/qwen2.5-coder-3b.json index 60c5548..15f82be 100644 --- a/models/qwen2.5-coder-3b.json +++ b/models/qwen2.5-coder-3b.json @@ -1,28 +1,30 @@ { "name": "Qwen Coder 3B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen-coder-3b/fp16/qwen2.5-coder-3b-instruct-fp16.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-fp16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen-coder-3b/int8/qwen2.5-coder-3b-instruct-q8_0.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen-coder-3b/int4/qwen2.5-coder-3b-instruct-q4_k_m.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen-coder-3b/fp16/qwen2.5-coder-3b-instruct-fp16.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-fp16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen-coder-3b/int8/qwen2.5-coder-3b-instruct-q8_0.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen-coder-3b/int4/qwen2.5-coder-3b-instruct-q4_k_m.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/models/qwen2.5-coder-7b.json b/models/qwen2.5-coder-7b.json index f623680..c98a7d5 100644 --- a/models/qwen2.5-coder-7b.json +++ b/models/qwen2.5-coder-7b.json @@ -1,28 +1,30 @@ { "name": "Qwen Coder 7B", "author": "Alibaba", - "fullPrecision": { - "type": "Full Precision", - "path": "models/qwen-coder-7b/fp16/qwen2.5-coder-7b-instruct-fp16.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-fp16.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized8Bit": { - "type": "8-bit Quantized", - "path": "models/qwen-coder-7b/int8/qwen2.5-coder-3b-instruct-q8_0.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q8_0.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 - }, - "quantized4Bit": { - "type": "4-bit Quantized", - "path": "models/qwen-coder-7b/int4/qwen2.5-coder-7b-instruct-q4_k_m.gguf", - "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf", - "isDownloaded": false, - "downloadProgress": 0.0, - "lastSelected": 0 + "variants": { + "Full Precision": { + "type": "Full Precision", + "path": "models/qwen-coder-7b/fp16/qwen2.5-coder-7b-instruct-fp16.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-fp16.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "8-bit Quantized": { + "type": "8-bit Quantized", + "path": "models/qwen-coder-7b/int8/qwen2.5-coder-3b-instruct-q8_0.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q8_0.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + }, + "4-bit Quantized": { + "type": "4-bit Quantized", + "path": "models/qwen-coder-7b/int4/qwen2.5-coder-7b-instruct-q4_k_m.gguf", + "downloadLink": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf", + "isDownloaded": false, + "downloadProgress": 0.0, + "lastSelected": 0 + } } } \ No newline at end of file diff --git a/server-test/python/openai_test.py b/server-test/python/openai_test.py index 290fae3..1d4fcd4 100644 --- a/server-test/python/openai_test.py +++ b/server-test/python/openai_test.py @@ -16,21 +16,21 @@ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Why anything to the power of zero is 1?"} ], - stream=False + stream=True ) # Process the full_response -print("Full response:") -print(stream.choices[0].message.content) +# print("Full response:") +# print(stream.choices[0].message.content) # Process streaming response -# print("Streaming response:") -# full_response = "" -# for chunk in stream: -# if chunk.choices[0].delta.content is not None: -# content = chunk.choices[0].delta.content -# full_response += content -# print(content, end="", flush=True) +print("Streaming response:") +full_response = "" +for chunk in stream: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + full_response += content + print(content, end="", flush=True) -# print("\n\nFull response:", full_response) +print("\n\nFull response:", full_response) diff --git a/server-test/python/openai_test_2.py b/server-test/python/openai_test_2.py new file mode 100644 index 0000000..40554f2 --- /dev/null +++ b/server-test/python/openai_test_2.py @@ -0,0 +1,36 @@ +import openai +import os + +# Configure the client to use your local endpoint +client = openai.OpenAI( + base_url="http://localhost:8080/v1", + api_key="sk-dummy" # Using dummy API key as in the curl example +) + +print("Starting streaming request...\n") + +# Make a streaming request +stream = client.chat.completions.create( + model="claude-3-opus-20240229", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ], + stream=True +) + +# Process the full_response +# print("Full response:") +# print(stream.choices[0].message.content) + +# Process streaming response +print("Streaming response:") +full_response = "" +for chunk in stream: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + full_response += content + print(content, end="", flush=True) + +print("\n\nFull response:", full_response) + diff --git a/source/main.cpp b/source/main.cpp index 82a64cc..c0fc4ed 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -2,14 +2,10 @@ #include "window/window_factory.hpp" #include "window/graphics_context_factory.hpp" -#include "window/gradient_background.hpp" #include "ui/fonts.hpp" #include "ui/title_bar.hpp" #include "ui/tab_manager.hpp" -#include "ui/chat/chat_history_sidebar.hpp" -#include "ui/chat/chat_window.hpp" -#include "ui/chat/preset_sidebar.hpp" #include "chat/chat_manager.hpp" #include "model/preset_manager.hpp" @@ -38,8 +34,6 @@ class ScopedCleanup ImGui_ImplWin32_Shutdown(); ImGui::DestroyContext(); - GradientBackground::CleanUp(); - NFD_Quit(); } }; @@ -122,13 +116,6 @@ void InitializeImGui(Window& window) ImGui_ImplOpenGL3_Init("#version 330"); } -void InitializeGradientBackground(int display_w, int display_h) -{ - GradientBackground::generateGradientTexture(display_w, display_h); - g_shaderProgram = GradientBackground::createShaderProgram(g_quadVertexShaderSource, g_quadFragmentShaderSource); - GradientBackground::setupFullScreenQuad(); -} - void StartNewFrame() { ImGui_ImplOpenGL3_NewFrame(); ImGui_ImplWin32_NewFrame(); @@ -191,9 +178,6 @@ class Application display_w = window->getWidth(); display_h = window->getHeight(); - // Initialize gradient background - InitializeGradientBackground(display_w, display_h); - // Create the window state transition manager transitionManager = std::make_unique(*window); } @@ -227,17 +211,12 @@ class Application { display_w = new_display_w; display_h = new_display_h; - GradientBackground::generateGradientTexture(display_w, display_h); glViewport(0, 0, display_w, display_h); } - // Render the gradient background with transition effects - GradientBackground::renderGradientBackground( - display_w, - display_h, - transitionManager->getTransitionProgress(), - transitionManager->getEasedProgress() - ); + // Clear background with solid color instead of gradient + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); // Transparent background + glClear(GL_COLOR_BUFFER_BIT); // Render the ImGui draw data using OpenGL ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());