From ee730ef751b70a160265efec63143ab2a7d70527 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 20:56:08 +0800 Subject: [PATCH 01/37] fix: correct documentation output path and add Doxygen installation Fix documentation workflow that was failing because: 1. Doxygen wasn't installed (assumed to be present) 2. Upload path was wrong (build/docs/html vs docs/html) Root cause: The CMakeLists.txt docs target runs Doxygen with: WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} The Doxyfile specifies: OUTPUT_DIRECTORY = docs HTML_OUTPUT = html So documentation is generated at: /docs/html (not build/docs/html) Changes: 1. Add step to install doxygen and graphviz 2. Remove '|| true' from docs build (fail fast if docs fail) 3. Add verification step to check docs/html exists 4. Fix upload path from 'build/docs/html' to 'docs/html' This ensures the workflow fails clearly if documentation generation fails, rather than silently continuing and failing at tar with a confusing error message. Fixes: tar: build/docs/html: Cannot open: No such file or directory --- .github/workflows/documentation.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index a4ab433..61e2348 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -57,10 +57,25 @@ jobs: cmake -S .. -B . -DCMAKE_BUILD_TYPE=Release -DNOVA_LLM_BUILD_TESTS=OFF -DNOVA_LLM_ENABLE_LOGGING=OFF fi + - name: Install Doxygen + run: | + sudo apt-get update + sudo apt-get install -y doxygen graphviz + - name: Build documentation run: | cd build - cmake --build . --target docs || true + cmake --build . --target docs + + - name: Check documentation output + run: | + if [ ! -d "docs/html" ]; then + echo "Error: Documentation not generated at docs/html" + ls -la docs/ || echo "docs/ directory not found" + exit 1 + fi + echo "Documentation generated successfully" + ls -la docs/html/ - name: Setup Pages uses: actions/configure-pages@v4 @@ -68,7 +83,7 @@ jobs: - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: - path: build/docs/html + path: docs/html - name: Deploy to GitHub Pages id: deployment From d4e7cec411f4472749ca60d56c7411ccbe944589 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 21:02:47 +0800 Subject: [PATCH 02/37] feat: add code coverage generation and Codecov integration Add proper code coverage tracking with Codecov. Changes to codecov.yaml: 1. Add coverage status configuration (project & patch targets) 2. Configure precision, range, and thresholds 3. Expand ignore patterns (test/*, standalone/*, build*/*, cmake/*) 4. Configure PR comment layout and behavior Changes to ubuntu.yml workflow: 1. Add new 'run_coverage' job that: - Installs lcov for coverage report generation - Builds tests with ENABLE_TEST_COVERAGE=ON - Runs tests with ctest - Generates coverage.info using lcov - Filters out system and test files - Uploads to Codecov using codecov-action@v4 Coverage workflow: 1. Build with -fprofile-arcs -ftest-coverage flags (CMake option) 2. Run tests to generate .gcda files 3. Use lcov to capture coverage data 4. Filter out unwanted paths (/usr/*, test/*, conan/*) 5. Upload to Codecov with CODECOV_TOKEN secret Requirements: - Add CODECOV_TOKEN to repository secrets - Badge will be available at: https://codecov.io/gh// This enables: - Coverage tracking on every PR - Coverage badges in README - Coverage trends over time - Line-by-line coverage visualization Fixes the issue where codecov.yaml existed but no coverage was generated. --- .github/workflows/ubuntu.yml | 42 ++++++++++++++++++++++++++++++++++++ codecov.yaml | 35 ++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 2158326..e0ccf65 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -132,6 +132,48 @@ jobs: cmake --build . --config Debug # 可选:ctest --output-on-failure + run_coverage: + name: Build & Run Tests with Coverage + runs-on: ubuntu-latest + needs: deps_build_debug + steps: + - *step_checkout + - *step_setup_python + - *step_install_conan + - *step_detect_profile + - name: Install lcov + run: sudo apt-get update && sudo apt-get install -y lcov + - name: Restore Conan cache + uses: actions/cache@v4 + with: + path: ~/.conan2 + key: ${{ runner.os }}-conan-debug-${{ hashFiles('**/conanfile.py', '**/standalone/conanfile.txt', 'source/**', 'include/**') }} + - name: Build and run tests with coverage + run: | + mkdir -p build-coverage + cd build-coverage + conan install ../test --output-folder=conan --build=missing -s build_type=Debug + TOOLCHAIN_FILE=$(find $(pwd) -name "conan_toolchain.cmake" -type f | head -1) + cmake -S ../test -B . \ + -DCMAKE_BUILD_TYPE=Debug \ + -DENABLE_TEST_COVERAGE=ON \ + -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" + cmake --build . --config Debug + ctest --output-on-failure || true + - name: Generate coverage report + run: | + cd build-coverage + lcov --directory . --capture --output-file coverage.info + lcov --remove coverage.info '/usr/*' '*/test/*' '*/conan/*' --output-file coverage.info + lcov --list coverage.info + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: ./build-coverage/coverage.info + fail_ci_if_error: false + verbose: true + token: ${{ secrets.CODECOV_TOKEN }} + run_standalone: name: Build & Run Standalone runs-on: ubuntu-latest diff --git a/codecov.yaml b/codecov.yaml index dae89fd..b29cd08 100644 --- a/codecov.yaml +++ b/codecov.yaml @@ -1,5 +1,36 @@ +# Codecov configuration for NovaLLM +# See: https://docs.codecov.com/docs/codecov-yaml + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true + + precision: 2 + round: down + range: "70...100" + ignore: - - "test" + - "test/**" + - "standalone/**" + - "documentation/**" + - "build*/**" + - "cmake/**" comment: - require_changes: true \ No newline at end of file + layout: "reach,diff,flags,tree,footer" + behavior: default + require_changes: true + require_base: false + require_head: true + +fixes: + - "before/:/after/" # Path fixes if needed From e484fb88c119113187c58c32e1ec5f7ff25f79ee Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 21:08:52 +0800 Subject: [PATCH 03/37] other(readme): add macos badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f10919d..c0f4364 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![Ubuntu](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml) [![Windows](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml) +[![MacOS](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml) [![Code Quality](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml) [![Documentation](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml) [![codecov](https://codecov.io/gh/peterlau123/NovaLLM/branch/master/graph/badge.svg)](https://codecov.io/gh/peterlau123/NovaLLM) From 6a12f5130627bd15bc24e76d3a249cd72ef26b39 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 21:12:34 +0800 Subject: [PATCH 04/37] feat(tools): add pre-commit hooks with conventional commit validation Add pre-commit configuration to enforce code quality and commit message standards. Features: 1. Commit Message Validation (commit-msg hook): - Enforces Conventional Commits format: type(scope): subject - Allowed types: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert - Commits with invalid format will be rejected 2. Code Quality Checks (pre-commit hook): - File checks: trailing whitespace, EOF newline, large files, merge conflicts, YAML/JSON syntax, private key detection - C++ formatting: clang-format (auto-format on commit) - CMake formatting: cmake-format and cmake-lint - YAML/JSON formatting: prettier - Shell scripts: shellcheck validation - Python scripts: black formatting 3. Documentation: - Added .pre-commit-setup.md with: - Installation instructions - Commit message format guide with examples - List of allowed types and recommended scopes - Troubleshooting guide - CI integration notes Setup Instructions: pip install pre-commit pre-commit install --hook-type commit-msg --hook-type pre-commit Example valid commits: feat(memory): add buffer pooling for better performance fix(build): correct Windows DLL export declarations docs(readme): update build instructions ci(workflows): add coverage reporting Excluded directories: build*, install*, .git, .github/workflows, docs/ This ensures consistent code quality and commit history across all contributors. Refs: https://www.conventionalcommits.org/ --- .pre-commit-config.yaml | 91 ++++++++++++++++++++++ .pre-commit-setup.md | 162 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 .pre-commit-setup.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7033072 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,91 @@ +# Pre-commit hooks for NovaLLM +# Install: pip install pre-commit +# Setup: pre-commit install +# Run manually: pre-commit run --all-files + +repos: + # Conventional Commits - enforce commit message format + - repo: https://github.com/compilerla/conventional-pre-commit + rev: v3.0.0 + hooks: + - id: conventional-pre-commit + stages: [commit-msg] + args: + - feat # New feature + - fix # Bug fix + - docs # Documentation only changes + - style # Code style changes (formatting, no logic change) + - refactor # Code refactoring (no feature/fix) + - perf # Performance improvements + - test # Adding/updating tests + - build # Build system or dependencies + - ci # CI/CD changes + - chore # Other changes (tooling, maintenance) + - revert # Revert previous commit + + # Basic file checks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: check-yaml + args: [--allow-multiple-documents] + - id: check-added-large-files + args: ['--maxkb=1000'] + - id: check-case-conflict + - id: check-merge-conflict + - id: check-json + - id: check-toml + - id: mixed-line-ending + args: ['--fix=lf'] + - id: detect-private-key + + # C++ formatting with clang-format + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.8 + hooks: + - id: clang-format + types_or: [c++, c] + args: ['-i'] # In-place formatting + + # CMake formatting + - repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + - id: cmake-lint + args: [--disabled-codes=C0103] + + # YAML/JSON formatting + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.1.0 + hooks: + - id: prettier + types_or: [yaml, json, markdown] + args: [--write, --prose-wrap=always] + + # Shell script checks + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.9.0.6 + hooks: + - id: shellcheck + + # Python formatting (for scripts) + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + language_version: python3 + +# Exclude patterns +exclude: | + (?x)^( + build.*| + install.*| + \.git| + \.github/workflows| + documentation/.*| + docs/.* + )$ diff --git a/.pre-commit-setup.md b/.pre-commit-setup.md new file mode 100644 index 0000000..405173f --- /dev/null +++ b/.pre-commit-setup.md @@ -0,0 +1,162 @@ +# Pre-commit Setup Guide + +This repository uses [pre-commit](https://pre-commit.com/) to ensure code quality and enforce commit message conventions. + +## Quick Setup + +```bash +# 1. Install pre-commit (one-time) +pip install pre-commit + +# 2. Install the git hooks (one-time per clone) +pre-commit install --hook-type commit-msg --hook-type pre-commit + +# 3. (Optional) Run checks on all files +pre-commit run --all-files +``` + +## Commit Message Format + +All commit messages **must** follow the [Conventional Commits](https://www.conventionalcommits.org/) format: + +``` +(): + +[optional body] + +[optional footer] +``` + +### Allowed Types + +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation only changes +- `style`: Code style changes (formatting, no logic change) +- `refactor`: Code refactoring (neither fixes a bug nor adds a feature) +- `perf`: Performance improvements +- `test`: Adding or updating tests +- `build`: Build system or dependency changes +- `ci`: CI/CD configuration changes +- `chore`: Other changes (tooling, maintenance) +- `revert`: Revert a previous commit + +### Examples + +✅ **Good commit messages:** +``` +feat(memory): add buffer pooling for better performance +fix(build): correct Windows DLL export declarations +docs(readme): update build instructions for macOS +refactor(tensor): simplify memory allocation logic +ci(workflows): add coverage reporting to ubuntu workflow +style: apply clang-format to all source files +test(buffer): add unit tests for buffer manager +``` + +❌ **Bad commit messages:** +``` +update code +Fixed bug +Add feature +WIP +asdf +``` + +### Scope (Optional but Recommended) + +The scope should indicate what part of the codebase is affected: +- `memory` - Memory management (buffers, allocators) +- `tensor` - Tensor operations +- `model` - Model implementation +- `build` - Build system (CMake, Conan) +- `ci` - CI/CD workflows +- `test` - Testing infrastructure +- `docs` - Documentation + +## What Gets Checked + +### On Every Commit + +1. **File checks:** + - Remove trailing whitespace + - Ensure files end with newline + - Check for large files (>1MB) + - Detect merge conflicts + - Check YAML/JSON syntax + - Detect private keys + +2. **Code formatting:** + - C/C++ files: `clang-format` + - CMake files: `cmake-format` + - YAML/JSON: `prettier` + - Shell scripts: `shellcheck` + - Python scripts: `black` + +3. **Commit message:** + - Must follow conventional commit format + - Type must be from allowed list + - Subject should be concise + +## Bypassing Hooks (Not Recommended) + +If you absolutely need to skip the hooks (e.g., for a WIP commit): + +```bash +# Skip pre-commit hooks (file checks) +git commit --no-verify + +# Skip commit-msg hook +git commit --no-verify +``` + +**⚠️ Warning:** Bypassed commits will still be checked by CI and may fail! + +## Updating Hooks + +```bash +# Update to latest versions +pre-commit autoupdate + +# Reinstall hooks after config changes +pre-commit install --hook-type commit-msg --hook-type pre-commit +``` + +## CI Integration + +Pre-commit checks also run in CI (GitHub Actions). If your local hooks pass, CI should pass too. + +## Troubleshooting + +### "command not found: pre-commit" +```bash +pip install pre-commit +``` + +### "hook failed" errors +```bash +# Run to see detailed error +pre-commit run --all-files + +# Reinstall hooks +pre-commit uninstall +pre-commit install --hook-type commit-msg --hook-type pre-commit +``` + +### Formatting conflicts +```bash +# Let pre-commit auto-fix formatting +pre-commit run --all-files + +# Stage the auto-fixed files +git add . + +# Commit again +git commit -m "style: apply automated formatting" +``` + +## More Information + +- [Pre-commit documentation](https://pre-commit.com/) +- [Conventional Commits specification](https://www.conventionalcommits.org/) +- [NovaLLM Contributing Guide](CONTRIBUTING.md) From 2cba35535528f5bba2eb79201a3ce69ecdeecf03 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 21:20:06 +0800 Subject: [PATCH 05/37] feat(tools): add branch name validation with post-checkout hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add automatic branch name validation to enforce naming conventions. New Features: 1. Branch Name Validation (post-checkout hook): - Enforces format: - or / - Valid types: feat, fix, docs, style, refactor, perf, test, build, ci, chore - Examples: feat-buffer-pooling, fix-memory-leak - Validates on git checkout -b or git switch -c - Shows helpful error message with examples if invalid 2. Protected Branches (skip validation): - main, master, develop - release/* (e.g., release/v1.0.0) - hotfix/* (e.g., hotfix/critical-bug) 3. Install Script (.githooks/install.sh): - One-command setup for all hooks - Configures git to use custom hooks directory - Installs pre-commit hooks - Makes hook scripts executable - Shows helpful summary 4. Updated Documentation: - Added branch naming section to .pre-commit-setup.md - Examples of valid/invalid branch names - Installation instructions updated Setup: ./.githooks/install.sh Or manually: git config core.hooksPath .githooks chmod +x .githooks/* Branch Validation Examples: ✅ git checkout -b feat-add-buffer-pooling ✅ git checkout -b fix/memory-leak ❌ git checkout -b my-feature (rejected - no type prefix) ❌ git checkout -b Feature-test (rejected - capital letter) This ensures consistent branch naming across all contributors and makes it easier to understand what each branch is for. Related to commit message validation from previous commit. --- .githooks/install.sh | 71 +++++++++++++++++++++++++++++++++++ .githooks/post-checkout | 82 +++++++++++++++++++++++++++++++++++++++++ .pre-commit-setup.md | 43 +++++++++++++++++++-- 3 files changed, 193 insertions(+), 3 deletions(-) create mode 100755 .githooks/install.sh create mode 100755 .githooks/post-checkout diff --git a/.githooks/install.sh b/.githooks/install.sh new file mode 100755 index 0000000..44b44d4 --- /dev/null +++ b/.githooks/install.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Install git hooks for NovaLLM +# This script sets up both pre-commit hooks and custom git hooks + +set -euo pipefail + +echo "🔧 Installing NovaLLM Git Hooks..." +echo "" + +# Get the repository root +REPO_ROOT=$(git rev-parse --show-toplevel) +HOOKS_DIR="$REPO_ROOT/.githooks" + +# 1. Configure git to use custom hooks directory +echo "📁 Configuring git to use custom hooks directory..." +git config core.hooksPath "$HOOKS_DIR" +echo " ✅ Git hooks path set to: $HOOKS_DIR" +echo "" + +# 2. Install pre-commit hooks +if command -v pre-commit &> /dev/null; then + echo "📦 Installing pre-commit hooks..." + pre-commit install --hook-type commit-msg --hook-type pre-commit + echo " ✅ Pre-commit hooks installed" +else + echo "⚠️ pre-commit not found. Install it with:" + echo " pip install pre-commit" + echo " Then run: pre-commit install --hook-type commit-msg --hook-type pre-commit" +fi +echo "" + +# 3. Make all hook scripts executable +echo "🔐 Making hook scripts executable..." +chmod +x "$HOOKS_DIR"/* 2>/dev/null || true +echo " ✅ Hook scripts are executable" +echo "" + +# 4. Test branch name validation (if on a feature branch) +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +echo "📋 Current branch: $CURRENT_BRANCH" + +# Summary +cat <- or / + Example: feat-buffer-pooling, fix-memory-leak + +Commit message format: + (): + Example: feat(memory): add buffer pooling + +Valid types: + feat, fix, docs, style, refactor, perf, test, build, ci, chore + +Try it out: + git checkout -b feat-test-branch + git commit -m "feat(test): try the hooks" + +For more info, see: .pre-commit-setup.md + +EOF diff --git a/.githooks/post-checkout b/.githooks/post-checkout new file mode 100755 index 0000000..e36439d --- /dev/null +++ b/.githooks/post-checkout @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Post-checkout hook to validate branch names +# This hook runs after 'git checkout' or 'git switch' + +set -euo pipefail + +# Get the previous HEAD, new HEAD, and checkout type +PREV_HEAD=$1 +NEW_HEAD=$2 +CHECKOUT_TYPE=$3 # 1 = branch checkout, 0 = file checkout + +# Only validate on branch checkouts +if [ "$CHECKOUT_TYPE" != "1" ]; then + exit 0 +fi + +# Get the current branch name +BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) + +# Skip validation for special branches +if [[ "$BRANCH_NAME" =~ ^(main|master|develop|release/.+|hotfix/.+)$ ]]; then + exit 0 +fi + +# Skip validation if we're in detached HEAD state +if [ "$BRANCH_NAME" = "HEAD" ]; then + exit 0 +fi + +# Define the valid branch name pattern +# Format: - or / +# Where type is: feat, fix, docs, style, refactor, perf, test, build, ci, chore +VALID_PATTERN="^(feat|fix|docs|style|refactor|perf|test|build|ci|chore)([-/])[a-z0-9_-]+$" + +# Check if branch name matches the pattern +if [[ ! "$BRANCH_NAME" =~ $VALID_PATTERN ]]; then + cat <- or / + +Valid types: + • feat - New feature + • fix - Bug fix + • docs - Documentation changes + • style - Code style changes + • refactor - Code refactoring + • perf - Performance improvements + • test - Test changes + • build - Build system changes + • ci - CI/CD changes + • chore - Other changes + +Examples: + ✅ feat-buffer-pooling + ✅ fix-windows-dll-exports + ✅ docs-update-readme + ✅ refactor/simplify-tensor-allocation + ✅ ci-add-coverage-reporting + +Current branch: ❌ $BRANCH_NAME + +To fix this, rename your branch: + git branch -m $BRANCH_NAME - + +Or delete and recreate: + git checkout main + git branch -D $BRANCH_NAME + git checkout -b - + +EOF + exit 1 +fi + +# Branch name is valid +echo "✅ Branch name '$BRANCH_NAME' is valid" +exit 0 diff --git a/.pre-commit-setup.md b/.pre-commit-setup.md index 405173f..73a68c3 100644 --- a/.pre-commit-setup.md +++ b/.pre-commit-setup.md @@ -5,16 +5,53 @@ This repository uses [pre-commit](https://pre-commit.com/) to ensure code qualit ## Quick Setup ```bash -# 1. Install pre-commit (one-time) -pip install pre-commit +# Option 1: Use the install script (recommended) +./.githooks/install.sh -# 2. Install the git hooks (one-time per clone) +# Option 2: Manual installation +pip install pre-commit pre-commit install --hook-type commit-msg --hook-type pre-commit +git config core.hooksPath .githooks # 3. (Optional) Run checks on all files pre-commit run --all-files ``` +## Branch Naming Convention + +All branch names **must** follow this format: + +``` +- or / +``` + +### Examples + +✅ **Valid branch names:** +``` +feat-buffer-pooling +fix-windows-dll-exports +docs-update-readme +refactor/simplify-tensor-allocation +ci-add-coverage-reporting +test-buffer-manager +``` + +❌ **Invalid branch names:** +``` +buffer_pooling (no type prefix) +Feat-something (capital letter) +feature-test (wrong type, use 'feat') +my-branch (no type prefix) +``` + +### Protected Branches (No Validation) + +These branches don't require the naming convention: +- `main`, `master`, `develop` +- `release/*` (e.g., `release/v1.0.0`) +- `hotfix/*` (e.g., `hotfix/critical-bug`) + ## Commit Message Format All commit messages **must** follow the [Conventional Commits](https://www.conventionalcommits.org/) format: From 20d79151f68842c500c56a0bceb705615300c1d6 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 13 Nov 2025 21:25:01 +0800 Subject: [PATCH 06/37] refactor(tools): simplify hook setup to single command for all platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor git hooks to work out-of-the-box with minimal user intervention across macOS, Linux, and Windows. Key Improvements: 1. Single Command Setup: - Before: Multiple manual steps + bash scripts - After: ONE command works on all platforms: pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout 2. Cross-Platform Branch Validation: - Replaced bash-only post-checkout hook with Python script - Works on Windows, macOS, Linux (any platform with Python 3) - Uses subprocess to call git (works everywhere) - No platform-specific shell syntax 3. Integrated with pre-commit: - Branch validation added as local pre-commit hook - Automatically installed with pre-commit install - No need for manual git config or custom hooks path 4. Better Documentation: - Added SETUP.md with simple quick-start guide - One command setup for new contributors - Clear cross-platform support matrix - Troubleshooting section Files Changed: - .githooks/check_branch_name.py (NEW) - Cross-platform Python validator - .pre-commit-config.yaml - Added local branch name check - SETUP.md (NEW) - Simple setup guide for contributors - .pre-commit-setup.md - Updated with simplified instructions - .pre-commit-hooks.yaml (NEW) - Hook metadata Benefits: ✅ Works on Windows (Git Bash, PowerShell, WSL) ✅ Works on macOS (zsh, bash) ✅ Works on Linux (bash, zsh, sh) ✅ Only requires Python 3 + pip (already needed for development) ✅ Single command setup after git clone ✅ No platform-specific scripts ✅ Automatic validation on commit and branch checkout User Experience: 1. Clone repo 2. Run: pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout 3. Done! All validations work automatically No need to: - Run separate install scripts - Configure git hooks path manually - Install platform-specific tools - Deal with Windows compatibility issues This makes contributing easier and ensures consistent code quality across all platforms. --- .githooks/check_branch_name.py | 129 +++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 11 +++ .pre-commit-hooks.yaml | 11 +++ .pre-commit-setup.md | 19 +++-- SETUP.md | 101 ++++++++++++++++++++++++++ 5 files changed, 264 insertions(+), 7 deletions(-) create mode 100755 .githooks/check_branch_name.py create mode 100644 .pre-commit-hooks.yaml create mode 100644 SETUP.md diff --git a/.githooks/check_branch_name.py b/.githooks/check_branch_name.py new file mode 100755 index 0000000..6ed5aa8 --- /dev/null +++ b/.githooks/check_branch_name.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +""" +Branch name validator for NovaLLM +Works on all platforms (Windows, macOS, Linux) +""" +import re +import subprocess +import sys + + +def get_current_branch(): + """Get the current git branch name.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return None + + +def is_protected_branch(branch_name): + """Check if branch is protected (doesn't need validation).""" + protected_patterns = [ + r"^main$", + r"^master$", + r"^develop$", + r"^release/.+", + r"^hotfix/.+", + r"^HEAD$", # Detached HEAD + ] + return any(re.match(pattern, branch_name) for pattern in protected_patterns) + + +def validate_branch_name(branch_name): + """ + Validate branch name against the required format. + Format: - or / + """ + # Pattern: type followed by - or / followed by lowercase alphanumeric + valid_types = [ + "feat", + "fix", + "docs", + "style", + "refactor", + "perf", + "test", + "build", + "ci", + "chore", + ] + pattern = rf"^({'|'.join(valid_types)})([-/])[a-z0-9_-]+$" + return re.match(pattern, branch_name) is not None + + +def print_error_message(branch_name): + """Print helpful error message for invalid branch names.""" + error_msg = f""" +{'='*70} + ❌ INVALID BRANCH NAME +{'='*70} + +Branch: {branch_name} + +Branch names must follow this format: + - or / + +Valid types: + • feat - New feature + • fix - Bug fix + • docs - Documentation changes + • style - Code style changes + • refactor - Code refactoring + • perf - Performance improvements + • test - Test changes + • build - Build system changes + • ci - CI/CD changes + • chore - Other changes + +✅ Valid examples: + feat-buffer-pooling + fix-windows-dll-exports + docs-update-readme + refactor/simplify-tensor-allocation + ci-add-coverage-reporting + +❌ Current branch: {branch_name} + +To fix this, rename your branch: + git branch -m {branch_name} - + +Or delete and recreate: + git checkout main + git branch -D {branch_name} + git checkout -b - + +{'='*70} +""" + print(error_msg, file=sys.stderr) + + +def main(): + """Main entry point for branch name validation.""" + branch_name = get_current_branch() + + if not branch_name: + # Can't determine branch, skip validation + sys.exit(0) + + # Skip protected branches + if is_protected_branch(branch_name): + sys.exit(0) + + # Validate branch name + if not validate_branch_name(branch_name): + print_error_message(branch_name) + sys.exit(1) + + # Branch name is valid + print(f"✅ Branch name '{branch_name}' is valid") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7033072..68ffd76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,17 @@ # Run manually: pre-commit run --all-files repos: + # Branch name validation - local hook (works out of the box) + - repo: local + hooks: + - id: check-branch-name + name: Check branch name format + entry: python3 .githooks/check_branch_name.py + language: system + pass_filenames: false + always_run: true + stages: [post-checkout, manual] + # Conventional Commits - enforce commit message format - repo: https://github.com/compilerla/conventional-pre-commit rev: v3.0.0 diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..2f688a9 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,11 @@ +# Pre-commit hooks metadata for NovaLLM +# This allows the hooks to be used as pre-commit hooks + +- id: check-branch-name + name: Check branch name format + description: Validates that branch names follow - format + entry: python .githooks/check_branch_name.py + language: system + pass_filenames: false + always_run: true + stages: [post-checkout] diff --git a/.pre-commit-setup.md b/.pre-commit-setup.md index 73a68c3..fed4327 100644 --- a/.pre-commit-setup.md +++ b/.pre-commit-setup.md @@ -4,17 +4,22 @@ This repository uses [pre-commit](https://pre-commit.com/) to ensure code qualit ## Quick Setup +**After cloning the repository, run ONE command:** + ```bash -# Option 1: Use the install script (recommended) -./.githooks/install.sh +pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout +``` -# Option 2: Manual installation -pip install pre-commit -pre-commit install --hook-type commit-msg --hook-type pre-commit -git config core.hooksPath .githooks +That's it! The hooks will now run automatically on every commit and branch checkout. -# 3. (Optional) Run checks on all files +### Optional: Test the setup + +```bash +# Run all checks manually pre-commit run --all-files + +# Test branch name validation +python3 .githooks/check_branch_name.py ``` ## Branch Naming Convention diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 0000000..d17989c --- /dev/null +++ b/SETUP.md @@ -0,0 +1,101 @@ +# NovaLLM Development Setup + +Quick guide for new contributors. + +## One-Time Setup (After Cloning) + +Run this single command to set up all development hooks: + +```bash +pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout +``` + +**What this does:** +- Installs pre-commit (if not already installed) +- Enables automatic validation on every commit +- Enables branch name validation +- Enables code quality checks + +## Naming Conventions + +### Branch Names + +Format: `-` + +**Valid types:** feat, fix, docs, style, refactor, perf, test, build, ci, chore + +**Examples:** +```bash +git checkout -b feat-add-buffer-pooling ✅ +git checkout -b fix-memory-leak ✅ +git checkout -b docs-update-readme ✅ +git checkout -b my-branch ❌ (no type prefix) +``` + +### Commit Messages + +Format: `(): ` + +**Examples:** +```bash +git commit -m "feat(memory): add buffer pooling" ✅ +git commit -m "fix(build): correct DLL exports" ✅ +git commit -m "docs(readme): update setup guide" ✅ +git commit -m "update code" ❌ (no type) +``` + +## What Happens Automatically + +After setup, the hooks will: + +1. **On `git checkout -b new-branch`:** + - ✅ Validate branch name format + - ❌ Reject invalid branch names with helpful error + +2. **On `git commit`:** + - ✅ Format C++ code with clang-format + - ✅ Check for trailing whitespace, large files, etc. + - ✅ Validate commit message format + - ❌ Reject invalid commits with helpful error + +## Cross-Platform Support + +Works on: +- ✅ macOS (zsh, bash) +- ✅ Linux (bash, zsh, sh) +- ✅ Windows (Git Bash, PowerShell, WSL) + +Requirements: +- Python 3.6+ (comes with most systems) +- Git (already have it if you cloned the repo) +- pip (to install pre-commit) + +## Troubleshooting + +### "pre-commit: command not found" +```bash +pip install pre-commit +# or +pip3 install pre-commit +``` + +### "Permission denied" on Linux/macOS +```bash +chmod +x .githooks/*.py +chmod +x .githooks/*.sh +``` + +### Need to bypass hooks temporarily? +```bash +git commit --no-verify -m "WIP: work in progress" +``` + +**⚠️ Warning:** Bypassed commits will still be checked by CI! + +## Full Documentation + +See [.pre-commit-setup.md](.pre-commit-setup.md) for complete documentation. + +## Build Instructions + +See [README.md](README.md) for build and development instructions. From dd7d864a2297c7d7a6e60e0eb22dad0d1ac47183 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Sat, 15 Nov 2025 21:48:59 +0800 Subject: [PATCH 07/37] docs(readme): add emoji to Developer-friendly feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 👨‍💻 emoji in front of Developer-friendly feature for visual consistency with other features. --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c0f4364..61d1a79 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,9 @@ A lightweight and efficient C/C++ library for Large Language Model (LLM) inferen - 🚀 **Lightweight**: Minimal dependencies, focusing on core functionality - 🔧 **Extensible**: Easy to extend with custom models and optimizations -- 🎯 **Efficient**: Support for extreme low-bit quantization -- 🛠️ **Flexible**: Support for OpenAI Triton and ThunderKittens kernels +- 🎯 **Efficient**: Support for low-bit quantization and custom kernels +- 🛠️ **Portable**: Support inference on MacOS/Linux/Windows platforms +- 👨‍💻 **Developer-friendly**: Easy to use and integrate into other projects ## Supported Models From 6399afe529a2bfadbcb25313c464b8d17dce1fc8 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Sun, 16 Nov 2025 23:37:13 +0800 Subject: [PATCH 08/37] fix(buffer_hub): fix refill logic --- .clang-format | 2 +- include/NovaLLM/memory/buffer_hub.h | 1 - source/memory/buffer_hub.cpp | 24 ++++++++++++++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.clang-format b/.clang-format index 4f09886..5ce9ead 100644 --- a/.clang-format +++ b/.clang-format @@ -10,7 +10,7 @@ SeparateDefinitionBlocks: Always # 基础缩进配置 IndentWidth: 2 ContinuationIndentWidth: 4 -ColumnLimit: 100 +ColumnLimit: 200 # 函数参数和初始化列表格式 BinPackArguments: false diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 13f6fe2..537b569 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -160,7 +160,6 @@ class NOVA_LLM_API BufferHub { uint32_t index = -1; Size level_size {static_cast(0)}; // each block size at this level - //using BlockPtr = Block*; std::list block_list; using BlockIterator = std::list::iterator; std::unordered_map free_map; diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 463e5cb..c9002de 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -53,6 +53,8 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { BlockPtr BufferHub::Level::fetchOneFreeBlock() { BlockPtr ret_block {nullptr}; if (!free_map.empty()) { + LOG_INFO("Found free block at level %d", index); + auto it = free_map.begin(); auto block_it = it->second; (*block_it)->ref_cnt++; @@ -60,7 +62,9 @@ BlockPtr BufferHub::Level::fetchOneFreeBlock() { free_map.erase(it); ret_block = *block_it; } else { - auto level_bytes = level_size.totalBytes(); + LOG_INFO("No free block at level %d,refilling...", index); + + auto level_bytes = this->level_size.totalBytes(); auto kb_level = Size(0, 1, 0, 0); auto mb_level = Size(0, 0, 1, 0); @@ -68,13 +72,22 @@ BlockPtr BufferHub::Level::fetchOneFreeBlock() { auto kb_bytes = kb_level.totalBytes(); auto mb_bytes = mb_level.totalBytes(); auto gb_bytes = gb_level.totalBytes(); - - if (level_bytes < kb_bytes) { + /** + * @brief: refill strategy + * if level size < 1KB, refill 1KB blocks + * else if level size < 1MB, refill 1MB blocks + * else if level size < 1GB, refill 1GB blocks + * else refill 4GB blocks + * + */ + if (level_bytes <= kb_bytes) { refill(kb_level); - } else if (level_bytes < mb_bytes) { + } else if (kb_bytessecond); } @@ -159,7 +172,6 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { void BufferHub::initConfig(const Config& config) { device_type_ = config.device_type; size_levels_ = config.size_levels; - // sort size levels in ascending order std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); }); From 3cf44a9bd0006bc3621d5fb849f408c7a91545ed Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Sun, 16 Nov 2025 23:38:56 +0800 Subject: [PATCH 09/37] chore: add file ignore --- .gitignore | 7 ++++++- scripts/build.sh | 0 scripts/build_macos.sh | 0 scripts/build_ubuntu.sh | 0 4 files changed, 6 insertions(+), 1 deletion(-) mode change 100644 => 100755 scripts/build.sh mode change 100644 => 100755 scripts/build_macos.sh mode change 100644 => 100755 scripts/build_ubuntu.sh diff --git a/.gitignore b/.gitignore index c320280..98d5c81 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,9 @@ latex/ *.gcov coverage/ CMakeUserPresets.json -build-test/ \ No newline at end of file +build-test/ +build-debug/ +build-release/ +build-test-debug/ +install-debug/ +install-release/ \ No newline at end of file diff --git a/scripts/build.sh b/scripts/build.sh old mode 100644 new mode 100755 diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh old mode 100644 new mode 100755 diff --git a/scripts/build_ubuntu.sh b/scripts/build_ubuntu.sh old mode 100644 new mode 100755 From e80c2a2be0f6a5f8b15980f7479c1927dc871b49 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Sun, 16 Nov 2025 23:54:43 +0800 Subject: [PATCH 10/37] fix: update README and fix script --- README.md | 38 ++++++++++++++++++++++++++++++++++++++ build.sh | 46 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 7 deletions(-) mode change 100644 => 100755 build.sh diff --git a/README.md b/README.md index 61d1a79..f8e593d 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,42 @@ cmake --build . ### Scripted builds +#### Option 1: Unified Build Script (Recommended) + +The root `build.sh` provides a comprehensive build system with full control: + +```bash +# Basic build (Release mode) +./build.sh + +# Build with tests +./build.sh -t + +# Clean build (removes all build-* and install-* directories) +./build.sh -c -r + +# Build everything (main + tests + standalone + package) +./build.sh -a + +# Debug build with verbose output +./build.sh -d -v + +# Show all options +./build.sh --help +``` + +**Key features:** +- `-c, --clean`: Cleans all `build-*` and `install-*` directories (including custom directories specified via `--build-dir`/`--install-dir`) before building +- `-r, --release`: Build in Release mode (default) +- `-d, --debug`: Build in Debug mode +- `-t, --tests`: Build and run tests +- `-s, --standalone`: Build standalone application +- `-p, --package`: Create Conan package +- `-a, --all`: Build everything +- `-v, --verbose`: Enable verbose output + +#### Option 2: Platform-Specific Scripts + Unified wrapper (auto-detects OS): ```bash @@ -86,6 +122,8 @@ scripts/build_ubuntu.sh --type Debug --enable-logging OFF scripts/build_windows.ps1 -Configuration Release -EnableLogging ON -WithTests ``` +**Note:** The root `build.sh` is more feature-rich and recommended for development, while `scripts/build.sh` is a lightweight wrapper for CI/CD pipelines. + ### Makefile builds - Use scripts via Make: `make script-build` (honors BUILD_TYPE, ENABLE_LOGGING, ENABLE_TESTS) diff --git a/build.sh b/build.sh old mode 100644 new mode 100755 index 6fe541e..7b16b88 --- a/build.sh +++ b/build.sh @@ -112,7 +112,8 @@ Build Targets: -a, --all Build everything (main + tests + standalone + package) Build Options: - -c, --clean Clean build directory before building + -c, --clean Clean all build-* and install-* directories (including custom + directories specified via --build-dir/--install-dir) before building --build-dir DIR Set custom build directory (default: build) --install-dir DIR Set custom install directory (default: install) @@ -304,14 +305,45 @@ setup_conan() { clean_build_dirs() { print_header "Cleaning build directories" - if [[ -d "$BUILD_DIR" ]]; then - print_info "Removing $BUILD_DIR" - rm -rf "$BUILD_DIR" + local cleaned=false + local dirs_to_clean=() + + # First, add user-specified directories if they exist + if [[ -n "$BUILD_DIR" && -d "$BUILD_DIR" ]]; then + dirs_to_clean+=("$BUILD_DIR") + fi + if [[ -n "$INSTALL_DIR" && -d "$INSTALL_DIR" ]]; then + dirs_to_clean+=("$INSTALL_DIR") fi - if [[ -d "$INSTALL_DIR" ]]; then - print_info "Removing $INSTALL_DIR" - rm -rf "$INSTALL_DIR" + # Then, find all build and install directories + shopt -s nullglob # Don't include pattern if no match + for dir in build build-* build_* install install-* install_*; do + if [[ -d "$dir" ]]; then + # Avoid duplicates + local already_added=false + for added_dir in "${dirs_to_clean[@]}"; do + if [[ "$dir" == "$added_dir" ]]; then + already_added=true + break + fi + done + if [[ "$already_added" == false ]]; then + dirs_to_clean+=("$dir") + fi + fi + done + shopt -u nullglob + + # Remove all collected directories + for dir in "${dirs_to_clean[@]}"; do + print_info "Removing $dir" + rm -rf "$dir" + cleaned=true + done + + if [[ "$cleaned" == false ]]; then + print_info "No build or install directories to clean" fi print_success "Clean complete" From 9e84da0c51df27eae56e0f82550182e4b529b197 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 17 Nov 2025 00:11:49 +0800 Subject: [PATCH 11/37] chore(build.sh): automate build.sh --- build.sh | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index 7b16b88..d8db20f 100755 --- a/build.sh +++ b/build.sh @@ -139,19 +139,23 @@ VERBOSE=false parse_args() { # Reset target flags if any target is explicitly specified local targets_specified=false + local main_explicitly_specified=false # First pass: check if any targets are specified for arg in "$@"; do case $arg in + -m|--main) + main_explicitly_specified=true + ;; -t|--tests|-s|--standalone|-p|--package|-a|--all) targets_specified=true - break ;; esac done - # If targets are specified, disable default main build - if [[ "$targets_specified" = true ]]; then + # If targets are specified but main is not explicitly requested, disable default main build + # We'll check package freshness later to decide if main needs to be built + if [[ "$targets_specified" = true && "$main_explicitly_specified" = false ]]; then BUILD_MAIN=false fi @@ -298,6 +302,86 @@ setup_conan() { fi } +# ============================================================================ +# Package Detection Functions +# ============================================================================ + +check_package_exists() { + local package_ref="novallm/0.1.0@local/testing" + if conan list "$package_ref" 2>/dev/null | grep -q "$package_ref"; then + return 0 # Package exists + else + return 1 # Package doesn't exist + fi +} + +get_package_timestamp() { + local package_ref="novallm/0.1.0@local/testing" + # Get the package folder path from conan cache + local cache_info + cache_info=$(conan cache path "$package_ref" 2>/dev/null) + + if [[ -z "$cache_info" ]]; then + echo "0" # Package doesn't exist + return + fi + + # Get the most recent modification time in the package + local package_time + if [[ "$(uname)" == "Darwin" ]]; then + # macOS + package_time=$(stat -f "%m" "$cache_info" 2>/dev/null || echo "0") + else + # Linux + package_time=$(stat -c "%Y" "$cache_info" 2>/dev/null || echo "0") + fi + + echo "$package_time" +} + +get_source_timestamp() { + # Get the most recent modification time of source files + local newest_time=0 + local file + + # Check source, include, and CMakeLists.txt files + while IFS= read -r -d '' file; do + local file_time + if [[ "$(uname)" == "Darwin" ]]; then + file_time=$(stat -f "%m" "$file" 2>/dev/null || echo "0") + else + file_time=$(stat -c "%Y" "$file" 2>/dev/null || echo "0") + fi + + if [[ $file_time -gt $newest_time ]]; then + newest_time=$file_time + fi + done < <(find source include CMakeLists.txt conanfile.py -type f -print0 2>/dev/null) + + echo "$newest_time" +} + +is_package_outdated() { + if ! check_package_exists; then + print_info "Package not found in cache" + return 0 # Package doesn't exist, needs rebuild + fi + + local package_time + local source_time + + package_time=$(get_package_timestamp) + source_time=$(get_source_timestamp) + + if [[ $source_time -gt $package_time ]]; then + print_info "Source code is newer than cached package" + return 0 # Source is newer, needs rebuild + else + print_info "Cached package is up-to-date" + return 1 # Package is up-to-date + fi +} + # ============================================================================ # Build Functions # ============================================================================ @@ -563,8 +647,28 @@ main() { fi # Execute build targets + # If tests or standalone are requested but main wasn't explicitly requested, + # check if we need to rebuild main based on package freshness + local need_package=false + if [[ "$BUILD_TESTS" == true || "$BUILD_STANDALONE" == true ]]; then + need_package=true + if [[ "$BUILD_MAIN" == false ]]; then + print_header "Checking package freshness" + if is_package_outdated; then + print_info "Enabling main project build due to outdated/missing package" + BUILD_MAIN=true + fi + fi + fi + if [[ "$BUILD_MAIN" == true ]]; then build_main_project + + # Auto-create package if tests or standalone need it + if [[ "$need_package" == true || "$CREATE_PACKAGE" == true ]]; then + print_info "Creating/updating Conan package for downstream consumers..." + CREATE_PACKAGE=true + fi fi if [[ "$CREATE_PACKAGE" == true ]]; then From 9b031a1bd858d3e9280d035b8bbd9c4ba2a8fc0a Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 17 Nov 2025 22:42:28 +0800 Subject: [PATCH 12/37] refactor(buffer_hub): refactor the refill logic --- include/NovaLLM/memory/buffer_hub.h | 47 ++++++++++++++------ source/memory/buffer_hub.cpp | 68 ++++++++++------------------- test/source/buffer_hub_test.cpp | 4 +- 3 files changed, 59 insertions(+), 60 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 537b569..6635653 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -18,19 +18,22 @@ struct Size { uint64_t mb_ = 0; uint64_t gb_ = 0; uint64_t total_bytes_ = 0; - const uint64_t ratio_ = 1024; + const uint64_t ratio_ = 1<<10; void convert_in_units(uint64_t bytes) { - auto down_ratio = std::pow(ratio_, 3); + auto down_ratio = ratio_*ratio_*ratio_;//std::pow(ratio_, 3); + //number of gb units gb_ = bytes / down_ratio; bytes -= gb_ * down_ratio; down_ratio /= ratio_; - + + //number of mb units mb_ = bytes / down_ratio; bytes -= mb_ * down_ratio; down_ratio /= ratio_; + //number of kb units kb_ = bytes / down_ratio; bytes -= kb_ * down_ratio; @@ -77,6 +80,22 @@ struct Size { convert_in_units(total_bytes_); } + uint64_t gb() const { + return this->gb_; + } + + uint64_t mb() const { + return this->mb_; + } + + uint64_t kb() const { + return this->kb_; + } + + uint64_t b() const { + return this->b_; + } + Size& operator=(const Size& rhs) { total_bytes_ = rhs.totalBytes(); convert_in_units(total_bytes_); @@ -102,13 +121,11 @@ struct SizeEqual { struct Block { using DataPtr = uint8_t*; - //using BlockPtr = Block*; DataPtr data = nullptr; uint64_t size = 0; int32_t ref_cnt = 0; bool isValid() const { - // return data != nullptr && (prev != nullptr || next != nullptr) && 0 != size; return data != nullptr && 0 != size; } }; @@ -147,19 +164,19 @@ class NOVA_LLM_API BufferHub { IAllocatorSharedPtr allocator; }; + /** + * @brief Buffers at the specified size level + * + */ struct Level { public: BlockPtr fetchOneFreeBlock(); - void putOneBlock(const BlockPtr& block_ptr); - void refill(const Size& sz); - ~Level(); - - uint32_t index = -1; - Size level_size {static_cast(0)}; // each block size at this level - + uint32_t index = -1;//level index in buffer hub + Size block_size {static_cast(0)}; // each block size at this level + uint32_t expand_factor=2; std::list block_list; using BlockIterator = std::list::iterator; std::unordered_map free_map; @@ -202,12 +219,14 @@ class NOVA_LLM_API BufferHub { BufferHub() = default; std::unordered_map buffers_; + DeviceType device_type_; + std::vector size_levels_; // ensure that levels are in ascending order + Size size_limit_ {0, 0, 0, 4}; // Memory in buffer hub cannot exceed this limit - // Be cautious when memory in buffer hub exceeds size_limit*warning_level - float warning_level_ = 0.95; + float warning_level_ = 0.95;// Be cautious when memory in buffer hub exceeds size_limit*warning_level IAllocatorSharedPtr allocator_; }; diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index c9002de..1d2ef00 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -63,47 +63,23 @@ BlockPtr BufferHub::Level::fetchOneFreeBlock() { ret_block = *block_it; } else { LOG_INFO("No free block at level %d,refilling...", index); - - auto level_bytes = this->level_size.totalBytes(); - - auto kb_level = Size(0, 1, 0, 0); - auto mb_level = Size(0, 0, 1, 0); - auto gb_level = Size(0, 0, 0, 1); - auto kb_bytes = kb_level.totalBytes(); - auto mb_bytes = mb_level.totalBytes(); - auto gb_bytes = gb_level.totalBytes(); - /** - * @brief: refill strategy - * if level size < 1KB, refill 1KB blocks - * else if level size < 1MB, refill 1MB blocks - * else if level size < 1GB, refill 1GB blocks - * else refill 4GB blocks - * - */ - if (level_bytes <= kb_bytes) { - refill(kb_level); - } else if (kb_bytesblock_size.totalBytes(); + refill(Size(expand_factor*block_bytes));//每次分配expand_factor倍的内存块 ret_block = *(free_map.begin()->second); } return ret_block; } -void BufferHub::Level::refill(const nova_llm::Size& sz) { - auto dst_size = sz.totalBytes(); - auto level_bytes = this->level_size.totalBytes(); - uint64_t cnt = dst_size / level_bytes; +void BufferHub::Level::refill(const nova_llm::Size& dst_sz) { + auto dst_total_bytes = dst_sz.totalBytes(); + auto block_bytes = this->block_size.totalBytes(); + uint64_t cnt = dst_total_bytes / block_bytes; - auto* data = this->hub->allocData(dst_size); + auto* data = this->hub->allocData(dst_total_bytes); for (uint64_t i = 0; i < cnt; i++) { auto* one_block = hub->allocBlock(); - one_block->data = data + i * level_bytes; - one_block->size = level_bytes; + one_block->data = data + i * block_bytes; + one_block->size = block_bytes; one_block->ref_cnt = 1;// set ref_cnt to 1 when allocated auto it = this->block_list.insert(this->block_list.end(), one_block); this->free_map[one_block->data] = it; @@ -164,6 +140,9 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { if (hub && *hub) { // Deleting the BufferHub will call destructors of its members (including Level), // which will in turn call tearDownBlock to free internal allocations. + (*hub)->size_levels_.clear(); + (*hub)->buffers_.clear(); + delete *hub; *hub = nullptr; } @@ -171,13 +150,13 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { void BufferHub::initConfig(const Config& config) { device_type_ = config.device_type; - size_levels_ = config.size_levels; + this->size_levels_ = config.size_levels; std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); }); - size_limit_ = config.size_limit; - warning_level_ = config.warning_level; - allocator_ = config.allocator; + this->size_limit_ = config.size_limit; + this->warning_level_ = config.warning_level; + this->allocator_ = config.allocator; } Block::DataPtr BufferHub::allocData(uint64_t sz) { @@ -219,9 +198,9 @@ void BufferHub::tearDownBlock(BlockPtr& block) { } } -void BufferHub::addSizeLevel(uint32_t index, const Size& level_sz) { - auto& level = buffers_[level_sz]; - level.level_size = level_sz; +void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { + auto& level = buffers_[level_block_sz]; + level.block_size = level_block_sz; level.index = index; level.hub = this; } @@ -269,7 +248,7 @@ void BufferHub::putBlock(const BlockPtr& block_ptr) { auto& level = buffers_[level_size]; level.putOneBlock(block_ptr); } else { - LOG_ERROR("Level with size %d is not found!", level_size.totalBytes()); + LOG_ERROR("Level size %d is not found in buffers!", level_size.totalBytes()); } } @@ -290,17 +269,18 @@ void BufferHub::putBlockFromBuffer(const Buffer& buffer) { } } +//TODO: optim the level selection algorithm Size BufferHub::gradeLevel(const Size& sz) const { Size ret; uint32_t level_index = 0; size_t i = 0; - for (; i < size_levels_.size(); i++) { - if (sz.totalBytes() < size_levels_[i].totalBytes()) { + for (; i < this->size_levels_.size(); i++) { + if (sz.totalBytes() <= this->size_levels_[i].totalBytes()) { level_index = i; break; } } - if (size_levels_.size() == i) { + if (this->size_levels_.size() == i) { LOG_ERROR("Cannot grade to current levels for size %d", sz.totalBytes()); return Size {}; } diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index b78c82f..388e751 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -15,7 +15,7 @@ class CPUBufferHubTest : public ::testing::Test { BufferHub::Config config; //set config config.device_type = DeviceType::CPU; - config.size_levels=std::vector{Size(0, 0, 0, 1)}; + config.size_levels=std::vector{Size(8, 4, 2, 1)}; config.allocator = std::make_shared(); buffer_hub_ = BufferHub::Builder::build(config); @@ -35,7 +35,7 @@ TEST_F(CPUBufferHubTest, GetBlock) { auto *block = getBufferHub()->getBlock(Size(1024)); EXPECT_NE(block, nullptr); - EXPECT_EQ(block->data, nullptr); + EXPECT_NE(block->data, nullptr); EXPECT_EQ(block->size, 1024); EXPECT_EQ(block->ref_cnt, 1); From 15539ad354e42ae9b4fcaa9c4c62bfa9a0629271 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 17 Nov 2025 22:46:56 +0800 Subject: [PATCH 13/37] feat: add buffer design doc --- documentation/memory/buffer_hub_design.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 documentation/memory/buffer_hub_design.md diff --git a/documentation/memory/buffer_hub_design.md b/documentation/memory/buffer_hub_design.md new file mode 100644 index 0000000..d6c6257 --- /dev/null +++ b/documentation/memory/buffer_hub_design.md @@ -0,0 +1,9 @@ +# Buffer Hub Overview + +## Design + + + + + +## Usage From a87613a4d8087ba8b7fbe028d3549c13457d0daa Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 18 Nov 2025 22:56:07 +0800 Subject: [PATCH 14/37] style: format code --- format.sh | 2 +- include/NovaLLM/data/tensor.h | 13 ++-- include/NovaLLM/utils/log.h | 15 ++-- include/NovaLLM/utils/macros.h | 28 ++++---- source/data/tensor.cpp | 22 ++---- source/device/device.cpp | 4 +- source/memory/cpu_allocator.cpp | 4 +- source/memory/gpu_allocator.cpp | 6 +- source/utils/log.cpp | 7 +- standalone/source/main.cpp | 11 +-- test/source/buffer_hub_test.cpp | 102 +++++++++++++--------------- test/source/buffer_manager_test.cpp | 60 ++++++++-------- test/source/tensor_test.cpp | 3 +- 13 files changed, 120 insertions(+), 157 deletions(-) diff --git a/format.sh b/format.sh index 04c1513..b34cc94 100755 --- a/format.sh +++ b/format.sh @@ -1,7 +1,7 @@ #!/bin/bash # Define the directories to search for C++ files -DIRECTORIES=("include" "source" "standalone" "test") +DIRECTORIES=("include" "source" "standalone/source" "test/source") # Find all C++ source and header files in the specified directories FILES=$(find "${DIRECTORIES[@]}" -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cu" -o -name "*.cuh" \) 2>/dev/null) diff --git a/include/NovaLLM/data/tensor.h b/include/NovaLLM/data/tensor.h index d917109..4a02d5d 100644 --- a/include/NovaLLM/data/tensor.h +++ b/include/NovaLLM/data/tensor.h @@ -1,8 +1,9 @@ #pragma once +#include #include #include #include -#include + #include "../common/device.h" #include "../common/dtype.h" #include "NovaLLM/utils/macros.h" @@ -58,11 +59,7 @@ class NOVA_LLM_API Tensor { * @param device 设备类型 * @param deleter 自定义删除器,默认使用DefaultDeletor */ - Tensor(const void* data, - const std::vector& dims, - DataType dtype, - DeviceType device, - Deleter deleter = DefaultDeletor()); + Tensor(const void* data, const std::vector& dims, DataType dtype, DeviceType device, Deleter deleter = DefaultDeletor()); /** * @brief 拷贝构造函数 @@ -125,7 +122,7 @@ class NOVA_LLM_API Tensor { DataSourceType dataFrom() const { return m_data_source_; } - uint32_t refCnt() const { return ref_cnt_?ref_cnt_->load():0; } + uint32_t refCnt() const { return ref_cnt_ ? ref_cnt_->load() : 0; } Deleter deleter() const { return m_deleter_; } @@ -147,7 +144,7 @@ class NOVA_LLM_API Tensor { std::vector dims_; ///< 张量的维度数组 uint32_t ele_cnt_ {0}; ///< 元素总数 void* data_ {nullptr}; ///< 数据缓冲区 - uint64_t capacity_ {0}; ///< 数据缓冲区大小,单位为字节,大于等于size_*sizeof(m_dtype_) + uint64_t capacity_ {0}; ///< 数据缓冲区大小,单位为字节,大于等于size_*sizeof(m_dtype_) DataSourceType m_data_source_ {DataSourceType::AUTO}; DataType m_dtype_ {DataType::UNKNOWN}; ///< 数据类型 DeviceType m_device_ {DeviceType::UNKNOWN}; ///< 设备类型 diff --git a/include/NovaLLM/utils/log.h b/include/NovaLLM/utils/log.h index ab7563e..ee8058a 100644 --- a/include/NovaLLM/utils/log.h +++ b/include/NovaLLM/utils/log.h @@ -4,18 +4,19 @@ // - If NOVA_LLM_ENABLE_LOGGING is defined and spdlog is available, use spdlog. // - Otherwise provide a no-op Logger and a minimal spdlog::level::level_enum so callers compile. -#include #include +#include // Prefer spdlog when logging is enabled and available #if defined(NOVA_LLM_ENABLE_LOGGING) && NOVA_LLM_ENABLE_LOGGING && __has_include() #if __has_include() -# include +#include #elif __has_include() -# include +#include #endif #include + #include namespace nova_llm { @@ -27,9 +28,7 @@ class Logger { return instance; } - void init(const std::string& name = "NovaLLM", - const std::string& logFile = "NovaLLM.log", - spdlog::level::level_enum level = spdlog::level::info); + void init(const std::string& name = "NovaLLM", const std::string& logFile = "NovaLLM.log", spdlog::level::level_enum level = spdlog::level::info); void setLevel(spdlog::level::level_enum level) { if (logger_) logger_->set_level(level); @@ -100,9 +99,7 @@ class Logger { return instance; } - void init(const std::string& /*name*/ = "NovaLLM", - const std::string& /*logFile*/ = "NovaLLM.log", - spdlog::level::level_enum /*level*/ = spdlog::level::info); + void init(const std::string& /*name*/ = "NovaLLM", const std::string& /*logFile*/ = "NovaLLM.log", spdlog::level::level_enum /*level*/ = spdlog::level::info); void setLevel(spdlog::level::level_enum /*level*/) {} diff --git a/include/NovaLLM/utils/macros.h b/include/NovaLLM/utils/macros.h index 25f5ad1..5d5c30d 100644 --- a/include/NovaLLM/utils/macros.h +++ b/include/NovaLLM/utils/macros.h @@ -8,31 +8,29 @@ #define NOVA_LLM_VERSION_MINOR 1 #define NOVA_LLM_VERSION_PATCH 0 #define NOVA_LLM_VERSION_STRING "0.1.0" -#define NOVA_LLM_VERSION \ - (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH) +#define NOVA_LLM_VERSION (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH) // For API export and import #if defined(_WIN32) - // When building the library define NOVA_LLM_EXPORTS (set by CMake) - #if defined(NOVA_LLM_EXPORTS) - #define NOVA_LLM_API __declspec(dllexport) - #else - #define NOVA_LLM_API __declspec(dllimport) - #endif +// When building the library define NOVA_LLM_EXPORTS (set by CMake) +#if defined(NOVA_LLM_EXPORTS) +#define NOVA_LLM_API __declspec(dllexport) #else - #define NOVA_LLM_API __attribute__((visibility("default"))) +#define NOVA_LLM_API __declspec(dllimport) +#endif +#else +#define NOVA_LLM_API __attribute__((visibility("default"))) #endif // For debugging and runtime check #ifdef NDEBUG #define ASSERT(condition, message) ((void)0) #else -#define ASSERT(condition, message) \ - do { \ - if (!(condition)) { \ - throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + \ - message); \ - } \ +#define ASSERT(condition, message) \ + do { \ + if (!(condition)) { \ + throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + message); \ + } \ } while (0) #endif diff --git a/source/data/tensor.cpp b/source/data/tensor.cpp index 5f8f5a0..b2f9a31 100644 --- a/source/data/tensor.cpp +++ b/source/data/tensor.cpp @@ -57,15 +57,7 @@ Tensor::Tensor() , m_deleter_(DefaultDeletor()) {} Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType device) - : dims_(dims) - , ele_cnt_(0) - , data_(nullptr) - , capacity_(0) - , m_data_source_(DataSourceType::AUTO) - , m_dtype_(dtype) - , m_device_(device) - , ref_cnt_{nullptr} - , m_deleter_(DefaultDeletor()) { + : dims_(dims), ele_cnt_(0), data_(nullptr), capacity_(0), m_data_source_(DataSourceType::AUTO), m_dtype_(dtype), m_device_(device), ref_cnt_ {nullptr}, m_deleter_(DefaultDeletor()) { // Check if the data type is valid ASSERT(dtype >= DataType::INT8 && dtype < DataType::TOTAL, "Invalid data type"); // Check if the device type is valid @@ -77,9 +69,7 @@ Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType dev this->data_ = buffer.data; this->capacity_ = buffer.size; m_deleter_ = [&](void** data) { - Buffer buffer {static_cast().data)>(*data), - static_cast().size)>(capacity_), - m_device_}; + Buffer buffer {static_cast().data)>(*data), static_cast().size)>(capacity_), m_device_}; buffer_manager.put(buffer); *data = nullptr; }; @@ -87,11 +77,7 @@ Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType dev *ref_cnt_ = 1; } -Tensor::Tensor(const void* data, - const std::vector& dims, - DataType dtype, - DeviceType device, - Deleter deleter) { +Tensor::Tensor(const void* data, const std::vector& dims, DataType dtype, DeviceType device, Deleter deleter) { ASSERT(nullptr != data, "data cannot be null!"); ASSERT(DataType::UNKNOWN < dtype, "data type must be specified!"); ASSERT(DeviceType::UNKNOWN < device, "device type must be specified!"); @@ -133,7 +119,7 @@ Tensor& Tensor::operator=(const Tensor& other) { m_data_source_ = other.dataFrom(); m_dtype_ = other.dtype(); m_device_ = other.device(); - ref_cnt_ = other.ref_cnt_;//TODO:notice here + ref_cnt_ = other.ref_cnt_; // TODO:notice here m_deleter_ = other.deleter(); } return *this; diff --git a/source/device/device.cpp b/source/device/device.cpp index 7755ea3..749037d 100644 --- a/source/device/device.cpp +++ b/source/device/device.cpp @@ -2,9 +2,7 @@ namespace nova_llm { -bool DeviceTypeFlags::has(DeviceType type) const { - return (flags_ & static_cast(type)) != 0; -} +bool DeviceTypeFlags::has(DeviceType type) const { return (flags_ & static_cast(type)) != 0; } // 添加设备 void DeviceTypeFlags::set(DeviceType type) { flags_ |= static_cast(type); } diff --git a/source/memory/cpu_allocator.cpp b/source/memory/cpu_allocator.cpp index d60e9ff..7a3eff7 100644 --- a/source/memory/cpu_allocator.cpp +++ b/source/memory/cpu_allocator.cpp @@ -1,7 +1,7 @@ -#include "NovaLLM/memory/allocator.h" - #include +#include "NovaLLM/memory/allocator.h" + namespace nova_llm { diff --git a/source/memory/gpu_allocator.cpp b/source/memory/gpu_allocator.cpp index 6422ec3..6249c0f 100644 --- a/source/memory/gpu_allocator.cpp +++ b/source/memory/gpu_allocator.cpp @@ -16,9 +16,7 @@ void* CUDAAllocator::do_allocate(size_t size) { return ptr; } -void CUDAAllocator::do_deallocate(void* ptr) { - cudaFree(ptr); -} +void CUDAAllocator::do_deallocate(void* ptr) { cudaFree(ptr); } -} +} // namespace nova_llm #endif \ No newline at end of file diff --git a/source/utils/log.cpp b/source/utils/log.cpp index f07efc3..e82610b 100644 --- a/source/utils/log.cpp +++ b/source/utils/log.cpp @@ -10,9 +10,7 @@ namespace nova_llm { -void Logger::init(const std::string& name, - const std::string& logFile, - spdlog::level::level_enum level) { +void Logger::init(const std::string& name, const std::string& logFile, spdlog::level::level_enum level) { try { // Refer to // https://github.com/gabime/spdlog?tab=readme-ov-file#logger-with-multi-sinks---each-with-a-different-format-and-log-level @@ -25,8 +23,7 @@ void Logger::init(const std::string& name, console_sink->set_pattern(pattern_str); // Use set_pattern // Create file sink - auto file_sink = - std::make_shared(logFile, 1024 * 1024 * 5, 3); + auto file_sink = std::make_shared(logFile, 1024 * 1024 * 5, 3); file_sink->set_level(level); file_sink->set_pattern(pattern_str); // Use set_pattern diff --git a/standalone/source/main.cpp b/standalone/source/main.cpp index 3f83589..8eda34f 100644 --- a/standalone/source/main.cpp +++ b/standalone/source/main.cpp @@ -1,14 +1,15 @@ #include + #include #include #include #include auto main(int argc, char** argv) -> int { - int arg_num=argc; - std::cout<<"arg num:"<{Size(8, 4, 2, 1)}; - config.allocator = std::make_shared(); - - buffer_hub_ = BufferHub::Builder::build(config); - } - - void TearDown() override { - BufferHub::Builder::destroy(&buffer_hub_); - } - BufferHub* buffer_hub_; + public: + BufferHub* getBufferHub() { return buffer_hub_; } + + protected: + void SetUp() override { + BufferHubConfig config; + // set config + config.device_type = DeviceType::CPU; + config.size_levels = std::vector {Size(8, 4, 2, 1)}; + config.allocator = std::make_shared(); + + buffer_hub_ = BufferHub::Builder::build(config); + } + + void TearDown() override { BufferHub::Builder::destroy(&buffer_hub_); } + + BufferHub* buffer_hub_; }; -TEST_F(CPUBufferHubTest, Init) { - EXPECT_NE(getBufferHub(), nullptr); -} +TEST_F(CPUBufferHubTest, Init) { EXPECT_NE(getBufferHub(), nullptr); } TEST_F(CPUBufferHubTest, GetBlock) { - auto *block = getBufferHub()->getBlock(Size(1024)); + auto* block = getBufferHub()->getBlock(Size(1024)); - EXPECT_NE(block, nullptr); - EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_EQ(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); - getBufferHub()->putBlock(block); + getBufferHub()->putBlock(block); } TEST_F(CPUBufferHubTest, PutBlock) { - auto* block = getBufferHub()->getBlock(Size(1024)); + auto* block = getBufferHub()->getBlock(Size(1024)); - EXPECT_NE(block, nullptr); - EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_EQ(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); - getBufferHub()->putBlock(block); + getBufferHub()->putBlock(block); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); + EXPECT_EQ(block->data, nullptr); + EXPECT_EQ(block->size, 0); + EXPECT_EQ(block->ref_cnt, 0); } - TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { - auto* block = getBufferHub()->getBlock(Size(1024)); + auto* block = getBufferHub()->getBlock(Size(1024)); - EXPECT_NE(block, nullptr); - EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_EQ(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); - Buffer buffer; - buffer.data = block->data; - buffer.size = block->size; - buffer.device_type = DeviceType::CPU; - getBufferHub()->putBlockFromBuffer(buffer); + Buffer buffer; + buffer.data = block->data; + buffer.size = block->size; + buffer.device_type = DeviceType::CPU; + getBufferHub()->putBlockFromBuffer(buffer); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); + EXPECT_EQ(block->data, nullptr); + EXPECT_EQ(block->size, 0); + EXPECT_EQ(block->ref_cnt, 0); - EXPECT_EQ(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 0); + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); } \ No newline at end of file diff --git a/test/source/buffer_manager_test.cpp b/test/source/buffer_manager_test.cpp index 280606f..be3761f 100644 --- a/test/source/buffer_manager_test.cpp +++ b/test/source/buffer_manager_test.cpp @@ -5,49 +5,47 @@ using namespace nova_llm; class BufferManagerTest : public ::testing::Test { -protected: - void SetUp() override { - BufferManager::Config config; - //set config - config.device_flags.set(DeviceType::CPU); - config.cpu.alloc = std::make_shared(); - #if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON - config.device_flags.set(DeviceType::CUDA); - config.gpu.alloc = std::make_shared(); - #endif - - BufferManager::Builder::build(config); - } - - void TearDown() override { - BufferManager::Builder::getInstance().~BufferManager(); - } + protected: + void SetUp() override { + BufferManager::Config config; + // set config + config.device_flags.set(DeviceType::CPU); + config.cpu.alloc = std::make_shared(); +#if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON + config.device_flags.set(DeviceType::CUDA); + config.gpu.alloc = std::make_shared(); +#endif + + BufferManager::Builder::build(config); + } + + void TearDown() override { BufferManager::Builder::getInstance().~BufferManager(); } }; TEST(BufferManagerTest, Init) { - auto& buffer_manager = BufferManager::Builder::getInstance(); - EXPECT_TRUE(buffer_manager.isInited()); + auto& buffer_manager = BufferManager::Builder::getInstance(); + EXPECT_TRUE(buffer_manager.isInited()); } TEST(BufferManagerTest, FetchCpu) { - auto& buffer_manager = BufferManager::Builder::getInstance(); + auto& buffer_manager = BufferManager::Builder::getInstance(); - auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); + auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); - EXPECT_NE(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 1024); - EXPECT_EQ(buffer.device_type, DeviceType::CPU); + EXPECT_NE(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 1024); + EXPECT_EQ(buffer.device_type, DeviceType::CPU); - buffer_manager.put(buffer); + buffer_manager.put(buffer); } TEST(BufferManagerTest, PutCpu) { - auto& buffer_manager = BufferManager::Builder::getInstance(); + auto& buffer_manager = BufferManager::Builder::getInstance(); - auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); + auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); - buffer_manager.put(buffer); - EXPECT_EQ(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 0); - EXPECT_EQ(buffer.device_type, DeviceType::CPU); + buffer_manager.put(buffer); + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); + EXPECT_EQ(buffer.device_type, DeviceType::CPU); } \ No newline at end of file diff --git a/test/source/tensor_test.cpp b/test/source/tensor_test.cpp index c03e51a..7362825 100644 --- a/test/source/tensor_test.cpp +++ b/test/source/tensor_test.cpp @@ -38,8 +38,7 @@ TEST_F(TensorTest, ConstructWithDims) { // 测试非法维度 TEST_F(TensorTest, InvalidDimensions) { std::vector empty_dims; - EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), - std::invalid_argument); + EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument); std::vector zero_dims = {2, 0, 4}; EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument); From a57cb19c6169297bf7616bf196f9b28a1b220d97 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 18 Nov 2025 22:56:50 +0800 Subject: [PATCH 15/37] refactor: refactor the bufferhub class --- documentation/memory/buffer_hub_design.md | 52 ++++++++++ include/NovaLLM/memory/buffer_hub.h | 121 +++++++++++----------- include/NovaLLM/memory/buffer_manager.h | 4 +- source/memory/buffer_hub.cpp | 41 +++----- 4 files changed, 128 insertions(+), 90 deletions(-) diff --git a/documentation/memory/buffer_hub_design.md b/documentation/memory/buffer_hub_design.md index d6c6257..ba165d2 100644 --- a/documentation/memory/buffer_hub_design.md +++ b/documentation/memory/buffer_hub_design.md @@ -2,7 +2,59 @@ ## Design +We divide memory into the following four major levels: ++ Byte level + Byte number ranges from 0 to 1023 ++ KB level + Byte number ranges from 1024 to 1024*1023 ++ MB level + Byte number ranges from 1024*1024 to 1024*1024*1023 ++ GB level + Byte number ranges from 1024*1024*1024 to min(1024*1024*1024*1023,Device memory) + +On top of that, we continue divide into sub levels from major levels. + +In byte level, we form the following sub levels ++ 16 bytes ++ 64 bytes ++ 256 bytes + +In KB level, we form the following sub levels ++ 1 kb ++ 2 kb ++ 4 kb ++ 8 kb ++ 16 kb ++ 32 kb ++ 64 kb ++ 128 kb ++ 256 kb ++ 512 kb + +In MB level, we form the following sub levels ++ 1 mb ++ 2 mb ++ 4 mb ++ 8 mb ++ 16 mb ++ 32 mb ++ 64 mb ++ 128 mb ++ 256 mb ++ 512 mb + +In GB level, we form the following sub levels ++ 1 GB ++ 2 GB ++ 4 GB ++ 8 GB ++ 16 GB ++ 32 GB ++ 64 GB ++ 128 GB ++ 256 GB ++ 512 GB diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 6635653..e19b363 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -1,13 +1,14 @@ #pragma once +#include #include #include #include -#include + #include "NovaLLM/common/device.h" #include "NovaLLM/memory/allocator.h" #include "NovaLLM/memory/buffer_define.h" -#include "NovaLLM/utils/template.h" #include "NovaLLM/utils/macros.h" +#include "NovaLLM/utils/template.h" namespace nova_llm { @@ -18,22 +19,22 @@ struct Size { uint64_t mb_ = 0; uint64_t gb_ = 0; uint64_t total_bytes_ = 0; - const uint64_t ratio_ = 1<<10; + const uint64_t ratio_ = 1 << 10; void convert_in_units(uint64_t bytes) { - auto down_ratio = ratio_*ratio_*ratio_;//std::pow(ratio_, 3); + auto down_ratio = ratio_ * ratio_ * ratio_; // std::pow(ratio_, 3); - //number of gb units + // number of gb units gb_ = bytes / down_ratio; bytes -= gb_ * down_ratio; down_ratio /= ratio_; - - //number of mb units + + // number of mb units mb_ = bytes / down_ratio; bytes -= mb_ * down_ratio; down_ratio /= ratio_; - //number of kb units + // number of kb units kb_ = bytes / down_ratio; bytes -= kb_ * down_ratio; @@ -80,21 +81,13 @@ struct Size { convert_in_units(total_bytes_); } - uint64_t gb() const { - return this->gb_; - } + uint64_t gb() const { return this->gb_; } - uint64_t mb() const { - return this->mb_; - } + uint64_t mb() const { return this->mb_; } - uint64_t kb() const { - return this->kb_; - } + uint64_t kb() const { return this->kb_; } - uint64_t b() const { - return this->b_; - } + uint64_t b() const { return this->b_; } Size& operator=(const Size& rhs) { total_bytes_ = rhs.totalBytes(); @@ -114,9 +107,7 @@ struct SizeHash { }; struct SizeEqual { - bool operator()(const Size& lhs, const Size& rhs) const { - return lhs.totalBytes() == rhs.totalBytes(); - } + bool operator()(const Size& lhs, const Size& rhs) const { return lhs.totalBytes() == rhs.totalBytes(); } }; struct Block { @@ -125,22 +116,52 @@ struct Block { uint64_t size = 0; int32_t ref_cnt = 0; - bool isValid() const { - return data != nullptr && 0 != size; - } + bool isValid() const { return data != nullptr && 0 != size; } }; using BlockPtr = Block*; class DefaultSizeLevelStrategy { public: - NOVA_LLM_API static std::vector byteSizes() ; + NOVA_LLM_API static std::vector byteSizes(); + + NOVA_LLM_API static std::vector kiloByteSizes(); - NOVA_LLM_API static std::vector kiloByteSizes() ; + NOVA_LLM_API static std::vector megaByteSizes(); - NOVA_LLM_API static std::vector megaByteSizes() ; + NOVA_LLM_API static std::vector gigaByteSizes(); +}; + +struct BufferHubConfig { + DeviceType device_type; + std::vector size_levels {{ + , + , + , + }}; // ensure that levels are in ascending order + Size size_limit {0, 0, 0, 8}; // Memory in buffer hub cannot exceed this limit + float warning_level = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level + IAllocatorSharedPtr allocator; +}; - NOVA_LLM_API static std::vector gigaByteSizes() ; +/** + * @brief Buffers at the specified size level + * + */ +struct BufferHubLevel { + public: + BlockPtr fetchOneFreeBlock(); + void putOneBlock(const BlockPtr& block_ptr); + void refill(const Size& sz); + ~BufferHubLevel(); + uint32_t index = -1; // level index in buffer hub + Size block_size {static_cast(0)}; // each block size at this level + uint32_t expand_factor = 2; + std::list block_list; + using BlockIterator = std::list::iterator; + std::unordered_map free_map; + std::unordered_map busy_map; + BufferHub* hub; }; /* @@ -155,43 +176,17 @@ class DefaultSizeLevelStrategy { * */ class NOVA_LLM_API BufferHub { public: - struct Config { - DeviceType device_type; - std::vector size_levels; // ensure that levels are in ascending order - Size size_limit {0, 0, 0, 8}; // Memory in buffer hub cannot exceed this limit - float warning_level = - 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level - IAllocatorSharedPtr allocator; - }; - - /** - * @brief Buffers at the specified size level - * - */ - struct Level { - public: - BlockPtr fetchOneFreeBlock(); - void putOneBlock(const BlockPtr& block_ptr); - void refill(const Size& sz); - ~Level(); - uint32_t index = -1;//level index in buffer hub - Size block_size {static_cast(0)}; // each block size at this level - uint32_t expand_factor=2; - std::list block_list; - using BlockIterator = std::list::iterator; - std::unordered_map free_map; - std::unordered_map busy_map; - BufferHub* hub; - }; + friend class BufferHubConfig; + friend class BufferHubLevel; class Builder { public: - NOVA_LLM_API static BufferHub* build(const Config& config); + NOVA_LLM_API static BufferHub* build(const BufferHubConfig& config); NOVA_LLM_API static void destroy(BufferHub** hub); }; - void initConfig(const Config& config); + void initConfig(const BufferHubConfig& config); BlockPtr getBlock(const Size& sz); @@ -218,15 +213,15 @@ class NOVA_LLM_API BufferHub { BufferHub() = default; - std::unordered_map buffers_; + std::unordered_map buffers_; DeviceType device_type_; std::vector size_levels_; // ensure that levels are in ascending order - Size size_limit_ {0, 0, 0, 4}; // Memory in buffer hub cannot exceed this limit + Size size_limit_ {0, 0, 0, 4}; // Memory in buffer hub cannot exceed this limit - float warning_level_ = 0.95;// Be cautious when memory in buffer hub exceeds size_limit*warning_level + float warning_level_ = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level IAllocatorSharedPtr allocator_; }; diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 4333e71..1b63e66 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -1,8 +1,9 @@ #pragma once #include #include -#include #include +#include + #include "NovaLLM/common/device.h" #include "NovaLLM/memory/allocator.h" #include "NovaLLM/memory/buffer_define.h" @@ -63,7 +64,6 @@ class NOVA_LLM_API BufferManager { ~BufferManager(); private: - void destroy(); BufferManager() = default; diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 1d2ef00..c8e2194 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -50,7 +50,7 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { return ret; } -BlockPtr BufferHub::Level::fetchOneFreeBlock() { +BlockPtr BufferHubLevel::fetchOneFreeBlock() { BlockPtr ret_block {nullptr}; if (!free_map.empty()) { LOG_INFO("Found free block at level %d", index); @@ -64,13 +64,13 @@ BlockPtr BufferHub::Level::fetchOneFreeBlock() { } else { LOG_INFO("No free block at level %d,refilling...", index); auto block_bytes = this->block_size.totalBytes(); - refill(Size(expand_factor*block_bytes));//每次分配expand_factor倍的内存块 + refill(Size(expand_factor * block_bytes)); // 每次分配expand_factor倍的内存块 ret_block = *(free_map.begin()->second); } return ret_block; } -void BufferHub::Level::refill(const nova_llm::Size& dst_sz) { +void BufferHubLevel::refill(const nova_llm::Size& dst_sz) { auto dst_total_bytes = dst_sz.totalBytes(); auto block_bytes = this->block_size.totalBytes(); uint64_t cnt = dst_total_bytes / block_bytes; @@ -80,13 +80,13 @@ void BufferHub::Level::refill(const nova_llm::Size& dst_sz) { auto* one_block = hub->allocBlock(); one_block->data = data + i * block_bytes; one_block->size = block_bytes; - one_block->ref_cnt = 1;// set ref_cnt to 1 when allocated + one_block->ref_cnt = 1; // set ref_cnt to 1 when allocated auto it = this->block_list.insert(this->block_list.end(), one_block); this->free_map[one_block->data] = it; } } -void BufferHub::Level::putOneBlock(const BlockPtr& block_ptr) { +void BufferHubLevel::putOneBlock(const BlockPtr& block_ptr) { BlockPtr dst_block(block_ptr); if (block_list.empty()) { auto ret_it = block_list.insert(block_list.end(), dst_block); @@ -100,9 +100,7 @@ void BufferHub::Level::putOneBlock(const BlockPtr& block_ptr) { (*it)->ref_cnt = 0; free_map.insert({(*it)->data, it}); } else if (in_free_m) { - LOG_WARN("Block %p already in block list at level %d", - static_cast(dst_block->data), - index); + LOG_WARN("Block %p already in block list at level %d", static_cast(dst_block->data), index); } else { // in_busy_m is true auto& it = busy_map[dst_block->data]; auto& busy_block = *it; @@ -117,7 +115,7 @@ void BufferHub::Level::putOneBlock(const BlockPtr& block_ptr) { } } -BufferHub::Level::~Level() { +BufferHubLevel::~BufferHubLevel() { free_map.clear(); busy_map.clear(); for (auto& block_ptr : block_list) { @@ -125,7 +123,7 @@ BufferHub::Level::~Level() { } } -BufferHub* BufferHub::Builder::build(const Config& config) { +BufferHub* BufferHub::Builder::build(const BufferHubConfig& config) { auto* hub = new BufferHub; hub->initConfig(config); int index = 0; @@ -148,20 +146,16 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { } } -void BufferHub::initConfig(const Config& config) { +void BufferHub::initConfig(const BufferHubConfig& config) { device_type_ = config.device_type; this->size_levels_ = config.size_levels; - std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { - return a.totalBytes() < b.totalBytes(); - }); + std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); }); this->size_limit_ = config.size_limit; this->warning_level_ = config.warning_level; this->allocator_ = config.allocator; } -Block::DataPtr BufferHub::allocData(uint64_t sz) { - return static_cast(this->allocator_->allocate(sz)); -} +Block::DataPtr BufferHub::allocData(uint64_t sz) { return static_cast(this->allocator_->allocate(sz)); } void BufferHub::deallocData(Block::DataPtr& data_ptr) { if (data_ptr) { @@ -170,9 +164,7 @@ void BufferHub::deallocData(Block::DataPtr& data_ptr) { } } -BlockPtr BufferHub::allocBlock() { - return static_cast(this->allocator_->allocate(sizeof(Block))); -} +BlockPtr BufferHub::allocBlock() { return static_cast(this->allocator_->allocate(sizeof(Block))); } void BufferHub::deallocateBlock(BlockPtr& block_ptr) { if (block_ptr) { @@ -210,17 +202,16 @@ void BufferHub::eraseSizeLevel(const Size& level_sz) { if (buffers_.count(level_sz)) { if (buffers_[level_sz].busy_map.empty()) { auto& level = buffers_[level_sz]; - level.~Level(); + level.~BufferHubLevel(); } else { - LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", - level_sz.totalBytes()); + LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", level_sz.totalBytes()); } } else { LOG_WARN("Level with size %d is not found!", level_sz.totalBytes()); // TODO:optimize } } -BlockPtr BufferHub::getBlock(const Size& sz){ +BlockPtr BufferHub::getBlock(const Size& sz) { // round it to ceil level auto level_sz = gradeLevel(sz); if (!level_sz.isValid()) { @@ -269,7 +260,7 @@ void BufferHub::putBlockFromBuffer(const Buffer& buffer) { } } -//TODO: optim the level selection algorithm +// TODO: optim the level selection algorithm Size BufferHub::gradeLevel(const Size& sz) const { Size ret; uint32_t level_index = 0; From 016a5eed2a5914b428d29e297b263c081e8eacf1 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 18 Nov 2025 23:38:18 +0800 Subject: [PATCH 16/37] refactor: refactor Size --- include/NovaLLM/memory/buffer_hub.h | 93 ++++++++++------------------- source/memory/buffer_hub.cpp | 81 +++++++++++++++++++++++-- source/memory/buffer_manager.cpp | 25 +------- test/source/buffer_hub_test.cpp | 13 ++-- 4 files changed, 113 insertions(+), 99 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index e19b363..cb96565 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -21,25 +21,7 @@ struct Size { uint64_t total_bytes_ = 0; const uint64_t ratio_ = 1 << 10; - void convert_in_units(uint64_t bytes) { - auto down_ratio = ratio_ * ratio_ * ratio_; // std::pow(ratio_, 3); - - // number of gb units - gb_ = bytes / down_ratio; - bytes -= gb_ * down_ratio; - down_ratio /= ratio_; - - // number of mb units - mb_ = bytes / down_ratio; - bytes -= mb_ * down_ratio; - down_ratio /= ratio_; - - // number of kb units - kb_ = bytes / down_ratio; - bytes -= kb_ * down_ratio; - - b_ = bytes; - } + void convert_in_units(uint64_t bytes); public: Size() = default; @@ -49,32 +31,7 @@ struct Size { convert_in_units(total_bytes_); } - Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb) { - b_ = b; - kb_ = kb; - mb_ = mb; - gb_ = gb; - - if (ratio_ < b_) { - auto kb_cnt = b_ / ratio_; - b_ -= kb_cnt * ratio_; - kb_ += kb_cnt; - } - - if (ratio_ < kb_) { - auto mb_cnt = kb_ / ratio_; - kb_ -= mb_cnt * ratio_; - mb_ += mb_cnt; - } - - if (ratio_ < mb_) { - auto gb_cnt = mb_ / ratio_; - mb_ -= gb_cnt * ratio_; - gb_ += gb_cnt; - } - - total_bytes_ = b_ + kb_ * ratio_ + mb_ * ratio_ * ratio_ + gb_ * ratio_ * ratio_ * ratio_; - } + Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb); Size(const Size& rhs) { total_bytes_ = rhs.totalBytes(); @@ -121,34 +78,46 @@ struct Block { using BlockPtr = Block*; -class DefaultSizeLevelStrategy { +class LevelAssignStrategy { public: - NOVA_LLM_API static std::vector byteSizes(); + virtual std::vector assignLevels(); +}; - NOVA_LLM_API static std::vector kiloByteSizes(); +class BufferHubConfig { + public: + BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit, LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95) + : device_type_(device_type), allocator_(allocator), size_limit_(size_limit), warning_level_(warning_level), level_assign_strategy_(strategy) { + size_levels_ = strategy.assignLevels(); + }; - NOVA_LLM_API static std::vector megaByteSizes(); + void setLevelAssignStrategy(LevelAssignStrategy strategy) { size_levels_ = strategy.assignLevels(); } - NOVA_LLM_API static std::vector gigaByteSizes(); -}; + void setWarningLevel(float warning_level) { warning_level_ = warning_level; } -struct BufferHubConfig { - DeviceType device_type; - std::vector size_levels {{ - , - , - , - }}; // ensure that levels are in ascending order - Size size_limit {0, 0, 0, 8}; // Memory in buffer hub cannot exceed this limit - float warning_level = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level - IAllocatorSharedPtr allocator; + DeviceType deviceType() const { return device_type_; } + + const std::vector& sizeLevels() const { return size_levels_; } + + Size sizeLimit() const { return size_limit_; } + + float warningLevel() const { return warning_level_; } + + IAllocatorSharedPtr allocator() const { return allocator_; } + + private: + DeviceType device_type_; + std::vector size_levels_; // ensure that levels are in ascending order + Size size_limit_; // Memory in buffer hub cannot exceed this limit + float warning_level_; // Be cautious when memory in buffer hub exceeds size_limit*warning_level + IAllocatorSharedPtr allocator_; + LevelAssignStrategy level_assign_strategy_; }; /** * @brief Buffers at the specified size level * */ -struct BufferHubLevel { +class BufferHubLevel { public: BlockPtr fetchOneFreeBlock(); void putOneBlock(const BlockPtr& block_ptr); diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index c8e2194..dd855f5 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -6,6 +6,65 @@ namespace nova_llm { +void Size::convert_in_units(uint64_t bytes) { + auto down_ratio = ratio_ * ratio_ * ratio_; // std::pow(ratio_, 3); + + // number of gb units + gb_ = bytes / down_ratio; + bytes -= gb_ * down_ratio; + down_ratio /= ratio_; + + // number of mb units + mb_ = bytes / down_ratio; + bytes -= mb_ * down_ratio; + down_ratio /= ratio_; + + // number of kb units + kb_ = bytes / down_ratio; + bytes -= kb_ * down_ratio; + + b_ = bytes; +} + +Size::Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb) { + b_ = b; + kb_ = kb; + mb_ = mb; + gb_ = gb; + + if (ratio_ < b_) { + auto kb_cnt = b_ / ratio_; + b_ -= kb_cnt * ratio_; + kb_ += kb_cnt; + } + + if (ratio_ < kb_) { + auto mb_cnt = kb_ / ratio_; + kb_ -= mb_cnt * ratio_; + mb_ += mb_cnt; + } + + if (ratio_ < mb_) { + auto gb_cnt = mb_ / ratio_; + mb_ -= gb_cnt * ratio_; + gb_ += gb_cnt; + } + + total_bytes_ = b_ + kb_ * ratio_ + mb_ * ratio_ * ratio_ + gb_ * ratio_ * ratio_ * ratio_; +} + +namespace { +class DefaultSizeLevelStrategy { + public: + static std::vector byteSizes(); + + static std::vector kiloByteSizes(); + + static std::vector megaByteSizes(); + + static std::vector gigaByteSizes(); +}; + std::vector DefaultSizeLevelStrategy::byteSizes() { std::vector ret; uint32_t base = 64; @@ -49,6 +108,16 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { } return ret; } +} // namespace + +std::vector LevelAssignStrategy::assignLevels() { + std::vector ret; + ret.insert(ret.end(), DefaultSizeLevelStrategy::byteSizes().begin(), DefaultSizeLevelStrategy::byteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::kiloByteSizes().begin(), DefaultSizeLevelStrategy::kiloByteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::megaByteSizes().begin(), DefaultSizeLevelStrategy::megaByteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::gigaByteSizes().begin(), DefaultSizeLevelStrategy::gigaByteSizes().end()); + return ret; +} BlockPtr BufferHubLevel::fetchOneFreeBlock() { BlockPtr ret_block {nullptr}; @@ -127,7 +196,7 @@ BufferHub* BufferHub::Builder::build(const BufferHubConfig& config) { auto* hub = new BufferHub; hub->initConfig(config); int index = 0; - for (auto v : config.size_levels) { + for (auto v : config.sizeLevels()) { hub->addSizeLevel(index, v); ++index; } @@ -147,12 +216,12 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { } void BufferHub::initConfig(const BufferHubConfig& config) { - device_type_ = config.device_type; - this->size_levels_ = config.size_levels; + device_type_ = config.deviceType(); + this->size_levels_ = config.sizeLevels(); std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); }); - this->size_limit_ = config.size_limit; - this->warning_level_ = config.warning_level; - this->allocator_ = config.allocator; + this->size_limit_ = config.sizeLimit(); + this->warning_level_ = config.warningLevel(); + this->allocator_ = config.allocator(); } Block::DataPtr BufferHub::allocData(uint64_t sz) { return static_cast(this->allocator_->allocate(sz)); } diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index 2d18350..fc74ddf 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -1,9 +1,9 @@ #include "NovaLLM/memory/buffer_manager.h" #include "NovaLLM/memory/allocator.h" +#include "NovaLLM/memory/buffer_hub.h" #include "NovaLLM/utils/log.h" #include "NovaLLM/utils/macros.h" -#include "NovaLLM/memory/buffer_hub.h" namespace nova_llm { @@ -28,24 +28,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) { } bool ret = false; if (config.device_flags.has(DeviceType::CPU)) { - BufferHub::Config cfg; - cfg.allocator = config.cpu.alloc; - - auto byte_sizes = DefaultSizeLevelStrategy::byteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), byte_sizes.begin(), byte_sizes.end()); - // for size below 1kb - auto kilobyte_sizes = DefaultSizeLevelStrategy::kiloByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), kilobyte_sizes.begin(), kilobyte_sizes.end()); - // for size below 1mb - auto mb_sizes = DefaultSizeLevelStrategy::megaByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), mb_sizes.begin(), mb_sizes.end()); - // for size below 1gb - auto gb_sizes = DefaultSizeLevelStrategy::gigaByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), gb_sizes.begin(), gb_sizes.end()); - - cfg.size_limit = Size(0, 0, 0, 4); - cfg.warning_level = 0.95; - + BufferHubConfig cfg(DeviceType::CPU, config.cpu.alloc, Size(0, 0, 0, 4)); buffer_hubs_[DeviceType::CPU] = BufferHub::Builder::build(cfg); ret |= true; } @@ -74,9 +57,7 @@ Buffer BufferManager::fetch(size_t size, DeviceType device_type) { return buffer; } -BufferManager::~BufferManager() { - destroy(); -} +BufferManager::~BufferManager() { destroy(); } void BufferManager::destroy() { for (auto p : buffer_hubs_) { diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index a34f95d..84203eb 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -10,12 +10,7 @@ class CPUBufferHubTest : public ::testing::Test { protected: void SetUp() override { - BufferHubConfig config; - // set config - config.device_type = DeviceType::CPU; - config.size_levels = std::vector {Size(8, 4, 2, 1)}; - config.allocator = std::make_shared(); - + BufferHubConfig config(DeviceType::CPU, std::make_shared(), Size(0, 0, 0, 4)); buffer_hub_ = BufferHub::Builder::build(config); } @@ -31,7 +26,7 @@ TEST_F(CPUBufferHubTest, GetBlock) { EXPECT_NE(block, nullptr); EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); + EXPECT_GE(block->size, 1024); EXPECT_EQ(block->ref_cnt, 1); getBufferHub()->putBlock(block); @@ -42,7 +37,7 @@ TEST_F(CPUBufferHubTest, PutBlock) { EXPECT_NE(block, nullptr); EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); + EXPECT_GE(block->size, 1024); EXPECT_EQ(block->ref_cnt, 1); getBufferHub()->putBlock(block); @@ -57,7 +52,7 @@ TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { EXPECT_NE(block, nullptr); EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); + EXPECT_GE(block->size, 1024); EXPECT_EQ(block->ref_cnt, 1); Buffer buffer; From 819309a6eabd2285e2902bd5d51467a83bfa0b36 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 19 Nov 2025 23:29:18 +0800 Subject: [PATCH 17/37] Fix BufferHub pool memory management and update tests --- include/NovaLLM/memory/buffer_hub.h | 8 ++- include/NovaLLM/memory/buffer_manager.h | 3 +- source/memory/buffer_hub.cpp | 70 ++++++++++++++++--------- source/memory/buffer_manager.cpp | 2 +- test/source/buffer_hub_test.cpp | 22 +++++--- 5 files changed, 69 insertions(+), 36 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index cb96565..d754655 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -113,6 +113,7 @@ class BufferHubConfig { LevelAssignStrategy level_assign_strategy_; }; +class BufferHub; /** * @brief Buffers at the specified size level * @@ -161,7 +162,8 @@ class NOVA_LLM_API BufferHub { void putBlock(const BlockPtr& block); - void putBlockFromBuffer(const Buffer& buffer); + // Return a buffer to the pool and clear the Buffer to avoid dangling pointers. + void putBlockFromBuffer(Buffer& buffer); private: Block::DataPtr allocData(uint64_t sz); @@ -180,7 +182,9 @@ class NOVA_LLM_API BufferHub { [[nodiscard]] Size gradeLevel(const Size& sz) const; - BufferHub() = default; + BufferHub(); + + ~BufferHub(); std::unordered_map buffers_; diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 1b63e66..2e1fad6 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -59,7 +59,8 @@ class NOVA_LLM_API BufferManager { Buffer fetch(size_t size, DeviceType device_type); - void put(const Buffer& buffer); + // Return a buffer obtained from fetch back to the pool and clear it. + void put(Buffer& buffer); ~BufferManager(); diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index dd855f5..f6da4fb 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -121,21 +121,26 @@ std::vector LevelAssignStrategy::assignLevels() { BlockPtr BufferHubLevel::fetchOneFreeBlock() { BlockPtr ret_block {nullptr}; + + if (free_map.empty()) { + LOG_INFO("No free block at level %d,refilling...", index); + auto block_bytes = this->block_size.totalBytes(); + refill(Size(expand_factor * block_bytes)); // allocate expand_factor blocks + } + if (!free_map.empty()) { LOG_INFO("Found free block at level %d", index); - auto it = free_map.begin(); auto block_it = it->second; + // Transition from free to busy: increment ref_cnt from 0 to 1 (*block_it)->ref_cnt++; busy_map.insert({it->first, it->second}); free_map.erase(it); ret_block = *block_it; } else { - LOG_INFO("No free block at level %d,refilling...", index); - auto block_bytes = this->block_size.totalBytes(); - refill(Size(expand_factor * block_bytes)); // 每次分配expand_factor倍的内存块 - ret_block = *(free_map.begin()->second); + LOG_WARN("Unable to fetch free block at level %d even after refill", index); } + return ret_block; } @@ -144,12 +149,13 @@ void BufferHubLevel::refill(const nova_llm::Size& dst_sz) { auto block_bytes = this->block_size.totalBytes(); uint64_t cnt = dst_total_bytes / block_bytes; - auto* data = this->hub->allocData(dst_total_bytes); + // Allocate data per block so that each pointer we free was directly allocated + // Blocks start in the free list with ref_cnt == 0. for (uint64_t i = 0; i < cnt; i++) { auto* one_block = hub->allocBlock(); - one_block->data = data + i * block_bytes; + one_block->data = hub->allocData(block_bytes); one_block->size = block_bytes; - one_block->ref_cnt = 1; // set ref_cnt to 1 when allocated + one_block->ref_cnt = 0; // free blocks have ref_cnt == 0 auto it = this->block_list.insert(this->block_list.end(), one_block); this->free_map[one_block->data] = it; } @@ -173,12 +179,13 @@ void BufferHubLevel::putOneBlock(const BlockPtr& block_ptr) { } else { // in_busy_m is true auto& it = busy_map[dst_block->data]; auto& busy_block = *it; - if (0 == --busy_block->ref_cnt) { + // Decrease ref count once; when it reaches zero, move block back to free_map + if (busy_block->ref_cnt > 0) { + busy_block->ref_cnt--; + } + if (busy_block->ref_cnt == 0) { busy_map.erase(busy_block->data); - busy_block->ref_cnt = 0; free_map[dst_block->data] = it; - } else { - busy_block->ref_cnt--; } } } @@ -192,6 +199,15 @@ BufferHubLevel::~BufferHubLevel() { } } +BufferHub::BufferHub() {} + +BufferHub::~BufferHub() { + // Let the map manage BufferHubLevel destruction + buffers_.clear(); + // Clear configuration metadata + size_levels_.clear(); +} + BufferHub* BufferHub::Builder::build(const BufferHubConfig& config) { auto* hub = new BufferHub; hub->initConfig(config); @@ -207,8 +223,7 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { if (hub && *hub) { // Deleting the BufferHub will call destructors of its members (including Level), // which will in turn call tearDownBlock to free internal allocations. - (*hub)->size_levels_.clear(); - (*hub)->buffers_.clear(); + //(*hub)->~BufferHub(); delete *hub; *hub = nullptr; @@ -267,17 +282,20 @@ void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { } void BufferHub::eraseSizeLevel(const Size& level_sz) { - // NOTE:cautious,make sure the size level is not in use - if (buffers_.count(level_sz)) { - if (buffers_[level_sz].busy_map.empty()) { - auto& level = buffers_[level_sz]; - level.~BufferHubLevel(); - } else { - LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", level_sz.totalBytes()); - } - } else { + auto it = buffers_.find(level_sz); + if (it == buffers_.end()) { LOG_WARN("Level with size %d is not found!", level_sz.totalBytes()); // TODO:optimize + return; + } + + auto& level = it->second; + if (!level.busy_map.empty()) { + LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", level_sz.totalBytes()); + return; } + + // Erasing from the map will automatically destroy the BufferHubLevel + buffers_.erase(it); } BlockPtr BufferHub::getBlock(const Size& sz) { @@ -312,7 +330,7 @@ void BufferHub::putBlock(const BlockPtr& block_ptr) { } } -void BufferHub::putBlockFromBuffer(const Buffer& buffer) { +void BufferHub::putBlockFromBuffer(Buffer& buffer) { if (0 == buffer.size || nullptr == buffer.data) { return; } @@ -327,6 +345,10 @@ void BufferHub::putBlockFromBuffer(const Buffer& buffer) { } else { LOG_ERROR("Level with size %d cannot be found in this memory hub", level_sz.totalBytes()); } + + // Clear the Buffer to avoid dangling pointers for callers. + buffer.data = nullptr; + buffer.size = 0; } // TODO: optim the level selection algorithm diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index fc74ddf..5216648 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -37,7 +37,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) { return ret; } -void BufferManager::put(const Buffer &buffer) { +void BufferManager::put(Buffer &buffer) { if (nullptr == buffer.data || 0 == buffer.size) { return; } diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index 84203eb..cee3482 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -40,11 +40,19 @@ TEST_F(CPUBufferHubTest, PutBlock) { EXPECT_GE(block->size, 1024); EXPECT_EQ(block->ref_cnt, 1); + // Return the block to the pool; block remains valid but is marked free getBufferHub()->putBlock(block); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 0); // ref count reset when returned to pool + + // Fetch another block of the same size and ensure we get a (possibly reused) block + auto* block2 = getBufferHub()->getBlock(Size(1024)); + EXPECT_NE(block2, nullptr); + EXPECT_NE(block2->data, nullptr); + EXPECT_GE(block2->size, 1024); + EXPECT_EQ(block2->ref_cnt, 1); } TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { @@ -61,10 +69,8 @@ TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { buffer.device_type = DeviceType::CPU; getBufferHub()->putBlockFromBuffer(buffer); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); - + // After returning via Buffer, the underlying block should be returned to the pool. + // The Buffer should be cleared to avoid dangling pointers. EXPECT_EQ(buffer.data, nullptr); EXPECT_EQ(buffer.size, 0); -} \ No newline at end of file +} From 93eb9f5197b2f55421e5c42f8414cbffbb370023 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Fri, 21 Nov 2025 00:11:07 +0800 Subject: [PATCH 18/37] fix: fix free_map error --- source/memory/buffer_hub.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index f6da4fb..02dcb7a 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -184,8 +184,8 @@ void BufferHubLevel::putOneBlock(const BlockPtr& block_ptr) { busy_block->ref_cnt--; } if (busy_block->ref_cnt == 0) { + free_map[dst_block->data] = it;//NOTE: Be cautious about the order of operations here busy_map.erase(busy_block->data); - free_map[dst_block->data] = it; } } } From c8edc765dd3ec68aedf037334193fc389943b5dc Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 24 Nov 2025 21:46:14 +0800 Subject: [PATCH 19/37] fix: forward declaration of BufferHUb --- include/NovaLLM/memory/buffer_hub.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index d754655..10a3490 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -12,6 +12,9 @@ namespace nova_llm { +// Forward declaration +class BufferHub; + struct Size { private: uint64_t b_ = 0; From 1b214e2ad2cca10f4b49430081734da6385a552d Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 24 Nov 2025 21:54:25 +0800 Subject: [PATCH 20/37] fix: add thread safty --- include/NovaLLM/memory/buffer_hub.h | 6 +++++- source/memory/buffer_hub.cpp | 23 ++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 10a3490..a239159 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #include #include @@ -189,6 +191,9 @@ class NOVA_LLM_API BufferHub { ~BufferHub(); + // Thread safety: protects all mutable state + mutable std::shared_mutex mutex_; + std::unordered_map buffers_; DeviceType device_type_; @@ -200,6 +205,5 @@ class NOVA_LLM_API BufferHub { float warning_level_ = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level IAllocatorSharedPtr allocator_; -}; } // namespace nova_llm diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 02dcb7a..2550484 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -275,6 +275,8 @@ void BufferHub::tearDownBlock(BlockPtr& block) { } void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { + std::unique_lock lock(mutex_); + auto& level = buffers_[level_block_sz]; level.block_size = level_block_sz; level.index = index; @@ -282,23 +284,34 @@ void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { } void BufferHub::eraseSizeLevel(const Size& level_sz) { + std::unique_lock lock(mutex_); + auto it = buffers_.find(level_sz); if (it == buffers_.end()) { - LOG_WARN("Level with size %d is not found!", level_sz.totalBytes()); // TODO:optimize + LOG_WARN("Level with size %llu is not found!", level_sz.totalBytes()); return; } auto& level = it->second; if (!level.busy_map.empty()) { - LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", level_sz.totalBytes()); + LOG_ERROR("Level with size %llu has %zu busy blocks, cannot erase now", + level_sz.totalBytes(), level.busy_map.size()); return; } - // Erasing from the map will automatically destroy the BufferHubLevel + // Free all blocks in the block_list before erasing + // The destructor will be called, but let's be explicit about cleanup + LOG_INFO("Erasing level with size %llu, freeing %zu blocks", + level_sz.totalBytes(), level.block_list.size()); + + // Erasing from the map will call BufferHubLevel destructor, + // which properly frees all blocks via tearDownBlock buffers_.erase(it); } BlockPtr BufferHub::getBlock(const Size& sz) { + std::unique_lock lock(mutex_); + // round it to ceil level auto level_sz = gradeLevel(sz); if (!level_sz.isValid()) { @@ -320,6 +333,8 @@ BlockPtr BufferHub::getBlock(const Size& sz) { } void BufferHub::putBlock(const BlockPtr& block_ptr) { + std::unique_lock lock(mutex_); + auto size = block_ptr->size; Size level_size(size); if (buffers_.count(level_size)) { @@ -331,6 +346,8 @@ void BufferHub::putBlock(const BlockPtr& block_ptr) { } void BufferHub::putBlockFromBuffer(Buffer& buffer) { + std::unique_lock lock(mutex_); + if (0 == buffer.size || nullptr == buffer.data) { return; } From 564f8aef799617450b5160d9f669118b4fb74c23 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Mon, 24 Nov 2025 23:07:07 +0800 Subject: [PATCH 21/37] refactor: add unit test for multi-thread and replace raw pointer by smart pointer --- include/NovaLLM/data/tensor.h | 2 +- include/NovaLLM/memory/buffer_hub.h | 45 +++-- include/NovaLLM/memory/buffer_manager.h | 2 +- source/memory/buffer_hub.cpp | 105 +++++----- test/CMakeLists.txt | 1 + test/source/buffer_hub_test.cpp | 256 ++++++++++++++++++++++++ 6 files changed, 349 insertions(+), 62 deletions(-) diff --git a/include/NovaLLM/data/tensor.h b/include/NovaLLM/data/tensor.h index 4a02d5d..a31539c 100644 --- a/include/NovaLLM/data/tensor.h +++ b/include/NovaLLM/data/tensor.h @@ -31,7 +31,7 @@ class NOVA_LLM_API Tensor { * @brief 默认删除器 */ struct DefaultDeletor { - void operator()(void** data) {} + void operator()(void** /*data*/) {} }; using Deleter = std::function; diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index a239159..744566a 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include @@ -81,7 +82,10 @@ struct Block { bool isValid() const { return data != nullptr && 0 != size; } }; -using BlockPtr = Block*; +// BlockPtr for owning pointers (used in collections) +using BlockPtr = std::unique_ptr; +// Raw non-owning pointer for temporary access +using BlockRawPtr = Block*; class LevelAssignStrategy { public: @@ -91,7 +95,11 @@ class LevelAssignStrategy { class BufferHubConfig { public: BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit, LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95) - : device_type_(device_type), allocator_(allocator), size_limit_(size_limit), warning_level_(warning_level), level_assign_strategy_(strategy) { + : device_type_(device_type), + size_limit_(size_limit), + warning_level_(warning_level), + allocator_(allocator), + level_assign_strategy_(strategy) { size_levels_ = strategy.assignLevels(); }; @@ -125,14 +133,16 @@ class BufferHub; */ class BufferHubLevel { public: - BlockPtr fetchOneFreeBlock(); - void putOneBlock(const BlockPtr& block_ptr); + // Returns non-owning pointer since pool retains ownership + BlockRawPtr fetchOneFreeBlock(); + // Accepts non-owning pointer for blocks already in the pool + void putOneBlock(BlockRawPtr block_ptr); void refill(const Size& sz); ~BufferHubLevel(); uint32_t index = -1; // level index in buffer hub Size block_size {static_cast(0)}; // each block size at this level uint32_t expand_factor = 2; - std::list block_list; + std::list block_list; // Owns the blocks using BlockIterator = std::list::iterator; std::unordered_map free_map; std::unordered_map busy_map; @@ -163,27 +173,32 @@ class NOVA_LLM_API BufferHub { void initConfig(const BufferHubConfig& config); - BlockPtr getBlock(const Size& sz); + // Returns non-owning pointer to block managed by pool + BlockRawPtr getBlock(const Size& sz); - void putBlock(const BlockPtr& block); + // Accepts non-owning pointer to block managed by pool + void putBlock(BlockRawPtr block); // Return a buffer to the pool and clear the Buffer to avoid dangling pointers. void putBlockFromBuffer(Buffer& buffer); + void addSizeLevel(uint32_t index, const Size& level_sz); + + void eraseSizeLevel(const Size& level_sz); + private: Block::DataPtr allocData(uint64_t sz); void deallocData(Block::DataPtr& data_ptr); + // Creates a new block with ownership BlockPtr allocBlock(); - void deallocateBlock(BlockPtr& block_ptr); - - BlockPtr setUpBlock(const Size& sz); // alloc and init block + void deallocateBlock(BlockPtr block); - void tearDownBlock(BlockPtr& block); + // Creates and initializes a new block + BlockPtr setUpBlock(const Size& sz); - void addSizeLevel(uint32_t index, const Size& level_sz); - - void eraseSizeLevel(const Size& level_sz); + // Cleans up and destroys a block + void tearDownBlock(BlockPtr block); [[nodiscard]] Size gradeLevel(const Size& sz) const; @@ -206,4 +221,6 @@ class NOVA_LLM_API BufferHub { IAllocatorSharedPtr allocator_; +}; + } // namespace nova_llm diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 2e1fad6..73991c6 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -55,7 +55,7 @@ class NOVA_LLM_API BufferManager { BufferManager& operator=(BufferManager&&) = delete; // Disable move assignment - [[nodiscard("Do not drop isInit return value")]] bool isInited() const { return is_init_; } + [[nodiscard]] bool isInited() const { return is_init_; } Buffer fetch(size_t size, DeviceType device_type); diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 2550484..918cd94 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -119,8 +119,8 @@ std::vector LevelAssignStrategy::assignLevels() { return ret; } -BlockPtr BufferHubLevel::fetchOneFreeBlock() { - BlockPtr ret_block {nullptr}; +BlockRawPtr BufferHubLevel::fetchOneFreeBlock() { + BlockRawPtr ret_block {nullptr}; if (free_map.empty()) { LOG_INFO("No free block at level %d,refilling...", index); @@ -136,7 +136,7 @@ BlockPtr BufferHubLevel::fetchOneFreeBlock() { (*block_it)->ref_cnt++; busy_map.insert({it->first, it->second}); free_map.erase(it); - ret_block = *block_it; + ret_block = block_it->get(); // Return non-owning pointer } else { LOG_WARN("Unable to fetch free block at level %d even after refill", index); } @@ -152,41 +152,42 @@ void BufferHubLevel::refill(const nova_llm::Size& dst_sz) { // Allocate data per block so that each pointer we free was directly allocated // Blocks start in the free list with ref_cnt == 0. for (uint64_t i = 0; i < cnt; i++) { - auto* one_block = hub->allocBlock(); - one_block->data = hub->allocData(block_bytes); - one_block->size = block_bytes; + auto one_block = hub->setUpBlock(Size(block_bytes)); one_block->ref_cnt = 0; // free blocks have ref_cnt == 0 - auto it = this->block_list.insert(this->block_list.end(), one_block); - this->free_map[one_block->data] = it; + auto* block_ptr = one_block.get(); + auto it = this->block_list.insert(this->block_list.end(), std::move(one_block)); + this->free_map[block_ptr->data] = it; } } -void BufferHubLevel::putOneBlock(const BlockPtr& block_ptr) { - BlockPtr dst_block(block_ptr); +void BufferHubLevel::putOneBlock(BlockRawPtr block_ptr) { + if (block_ptr == nullptr) { + return; + } + if (block_list.empty()) { - auto ret_it = block_list.insert(block_list.end(), dst_block); - (*ret_it)->ref_cnt = 0; - free_map.insert({dst_block->data, ret_it}); - } else { - bool in_free_m = free_map.count(dst_block->data); - bool in_busy_m = busy_map.count(dst_block->data); - if (!in_free_m && !in_busy_m) { - auto it = block_list.insert(block_list.end(), dst_block); - (*it)->ref_cnt = 0; - free_map.insert({(*it)->data, it}); - } else if (in_free_m) { - LOG_WARN("Block %p already in block list at level %d", static_cast(dst_block->data), index); - } else { // in_busy_m is true - auto& it = busy_map[dst_block->data]; - auto& busy_block = *it; - // Decrease ref count once; when it reaches zero, move block back to free_map - if (busy_block->ref_cnt > 0) { - busy_block->ref_cnt--; - } - if (busy_block->ref_cnt == 0) { - free_map[dst_block->data] = it;//NOTE: Be cautious about the order of operations here - busy_map.erase(busy_block->data); - } + LOG_WARN("putOneBlock called on empty block_list at level %d", index); + return; + } + + bool in_free_m = free_map.count(block_ptr->data); + bool in_busy_m = busy_map.count(block_ptr->data); + + if (!in_free_m && !in_busy_m) { + LOG_WARN("Block %p not found in level %d", static_cast(block_ptr->data), index); + return; + } else if (in_free_m) { + LOG_WARN("Block %p already in free list at level %d", static_cast(block_ptr->data), index); + } else { // in_busy_m is true + auto it = busy_map[block_ptr->data]; + auto& busy_block = *it; + // Decrease ref count once; when it reaches zero, move block back to free_map + if (busy_block->ref_cnt > 0) { + busy_block->ref_cnt--; + } + if (busy_block->ref_cnt == 0) { + free_map[block_ptr->data] = it; // NOTE: Be cautious about the order of operations here + busy_map.erase(busy_block->data); } } } @@ -194,9 +195,14 @@ void BufferHubLevel::putOneBlock(const BlockPtr& block_ptr) { BufferHubLevel::~BufferHubLevel() { free_map.clear(); busy_map.clear(); + // Blocks are automatically cleaned up when unique_ptrs are destroyed + // but we need to manually free the data for (auto& block_ptr : block_list) { - hub->tearDownBlock(block_ptr); + if (block_ptr && block_ptr->data) { + hub->deallocData(block_ptr->data); + } } + block_list.clear(); // unique_ptrs will deallocate Block structs } BufferHub::BufferHub() {} @@ -248,12 +254,15 @@ void BufferHub::deallocData(Block::DataPtr& data_ptr) { } } -BlockPtr BufferHub::allocBlock() { return static_cast(this->allocator_->allocate(sizeof(Block))); } +BlockPtr BufferHub::allocBlock() { + auto* raw_ptr = static_cast(this->allocator_->allocate(sizeof(Block))); + return BlockPtr(raw_ptr); +} -void BufferHub::deallocateBlock(BlockPtr& block_ptr) { - if (block_ptr) { - this->allocator_->deallocate(block_ptr); - block_ptr = nullptr; +void BufferHub::deallocateBlock(BlockPtr block) { + if (block) { + Block* raw = block.release(); + this->allocator_->deallocate(raw); } } @@ -265,12 +274,12 @@ BlockPtr BufferHub::setUpBlock(const Size& sz) { return block; } -void BufferHub::tearDownBlock(BlockPtr& block) { +void BufferHub::tearDownBlock(BlockPtr block) { if (block) { deallocData(block->data); block->size = 0; block->ref_cnt = 0; - deallocateBlock(block); + deallocateBlock(std::move(block)); } } @@ -309,7 +318,7 @@ void BufferHub::eraseSizeLevel(const Size& level_sz) { buffers_.erase(it); } -BlockPtr BufferHub::getBlock(const Size& sz) { +BlockRawPtr BufferHub::getBlock(const Size& sz) { std::unique_lock lock(mutex_); // round it to ceil level @@ -318,11 +327,11 @@ BlockPtr BufferHub::getBlock(const Size& sz) { return nullptr; } // search the block list - BlockPtr ret_block {nullptr}; + BlockRawPtr ret_block {nullptr}; if (buffers_.count(level_sz)) { auto& level = buffers_[level_sz]; auto block = level.fetchOneFreeBlock(); - if (block->isValid()) { + if (block && block->isValid()) { ret_block = block; } } @@ -332,7 +341,11 @@ BlockPtr BufferHub::getBlock(const Size& sz) { return ret_block; } -void BufferHub::putBlock(const BlockPtr& block_ptr) { +void BufferHub::putBlock(BlockRawPtr block_ptr) { + if (!block_ptr) { + return; + } + std::unique_lock lock(mutex_); auto size = block_ptr->size; @@ -357,7 +370,7 @@ void BufferHub::putBlockFromBuffer(Buffer& buffer) { auto* data = static_cast(buffer.data); if (level.busy_map.count(data)) { auto block_it = level.busy_map[data]; - level.putOneBlock(*block_it); + level.putOneBlock(block_it->get()); // Get raw pointer from unique_ptr } } else { LOG_ERROR("Level with size %d cannot be found in this memory hub", level_sz.totalBytes()); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 732cb7d..b1472ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -28,6 +28,7 @@ add_executable(${PROJECT_NAME} ${sources}) # Add include directories target_include_directories(${PROJECT_NAME} PRIVATE + ${PROJECT_ROOT}/include ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}/conan/include ) diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index cee3482..e184d6a 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -1,6 +1,10 @@ #include "NovaLLM/memory/buffer_hub.h" #include +#include +#include +#include +#include using namespace nova_llm; @@ -74,3 +78,255 @@ TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { EXPECT_EQ(buffer.data, nullptr); EXPECT_EQ(buffer.size, 0); } + +// Concurrent access tests +TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { + const int num_threads = 10; + const int num_levels_per_thread = 5; + std::vector threads; + std::atomic success_count{0}; + + // Each thread adds multiple size levels + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &success_count]() { + for (int i = 0; i < num_levels_per_thread; ++i) { + // Create unique sizes for each thread to avoid conflicts + uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ + Size level_size(size_bytes); + uint32_t index = t * num_levels_per_thread + i + 1000; + + getBufferHub()->addSizeLevel(index, level_size); + success_count++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify all additions succeeded + EXPECT_EQ(success_count.load(), num_threads * num_levels_per_thread); +} + +TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { + const int num_threads = 8; + std::vector threads; + std::vector sizes_to_add; + + // Pre-populate with size levels + for (int i = 0; i < num_threads * 2; ++i) { + uint64_t size_bytes = (1 << 20) * (i + 200); // 200MB+ + Size level_size(size_bytes); + sizes_to_add.push_back(level_size); + getBufferHub()->addSizeLevel(2000 + i, level_size); + } + + std::atomic erase_attempts{0}; + + // Each thread attempts to erase different size levels concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &sizes_to_add, &erase_attempts]() { + // Each thread erases 2 levels + for (int i = 0; i < 2; ++i) { + int idx = t * 2 + i; + getBufferHub()->eraseSizeLevel(sizes_to_add[idx]); + erase_attempts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(erase_attempts.load(), num_threads * 2); +} + +TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { + const int num_threads = 20; + const int blocks_per_thread = 5; + std::vector threads; + std::vector> thread_blocks(num_threads); + std::atomic successful_gets{0}; + + // Multiple threads requesting blocks of the same size concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_blocks, &successful_gets]() { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks + if (block != nullptr && block->data != nullptr) { + thread_blocks[t].push_back(block); + successful_gets++; + + // Verify block properties + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 4096); + EXPECT_EQ(block->ref_cnt, 1); + } + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify we got the expected number of blocks + EXPECT_EQ(successful_gets.load(), num_threads * blocks_per_thread); + + // Verify all blocks have unique data pointers (no double allocation) + std::vector all_data_ptrs; + for (const auto& blocks : thread_blocks) { + for (const auto& block : blocks) { + all_data_ptrs.push_back(block->data); + } + } + std::sort(all_data_ptrs.begin(), all_data_ptrs.end()); + auto last = std::unique(all_data_ptrs.begin(), all_data_ptrs.end()); + EXPECT_EQ(last - all_data_ptrs.begin(), num_threads * blocks_per_thread); + + // Clean up - return all blocks + for (auto& blocks : thread_blocks) { + for (auto* block : blocks) { + getBufferHub()->putBlock(block); + } + } +} + +TEST_F(CPUBufferHubTest, ConcurrentPutBlock) { + const int num_threads = 15; + const int blocks_per_thread = 4; + std::vector threads; + std::vector> thread_blocks(num_threads); + + // First, get blocks in a single-threaded manner + for (int t = 0; t < num_threads; ++t) { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(2048)); // 2KB blocks + ASSERT_NE(block, nullptr); + thread_blocks[t].push_back(block); + } + } + + std::atomic successful_puts{0}; + + // Now return blocks concurrently from multiple threads + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_blocks, &successful_puts]() { + for (auto* block : thread_blocks[t]) { + EXPECT_EQ(block->ref_cnt, 1); + getBufferHub()->putBlock(block); + successful_puts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); + + // Verify blocks are returned properly by checking ref_cnt + for (const auto& blocks : thread_blocks) { + for (const auto* block : blocks) { + EXPECT_EQ(block->ref_cnt, 0); + } + } +} + +TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) { + const int num_threads = 12; + const int blocks_per_thread = 3; + std::vector threads; + std::vector> thread_buffers(num_threads); + + // First, get blocks and create buffers in a single-threaded manner + for (int t = 0; t < num_threads; ++t) { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(8192)); // 8KB blocks + ASSERT_NE(block, nullptr); + + Buffer buffer; + buffer.data = block->data; + buffer.size = block->size; + buffer.device_type = DeviceType::CPU; + thread_buffers[t].push_back(buffer); + } + } + + std::atomic successful_puts{0}; + + // Now return buffers concurrently from multiple threads + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_buffers, &successful_puts]() { + for (auto& buffer : thread_buffers[t]) { + EXPECT_NE(buffer.data, nullptr); + EXPECT_NE(buffer.size, 0); + + getBufferHub()->putBlockFromBuffer(buffer); + + // Verify buffer was cleared + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); + + successful_puts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); +} + +// Mixed concurrent operations test +TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { + const int num_threads = 16; + std::vector threads; + std::atomic total_operations{0}; + + // Mix of get and put operations happening concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &total_operations]() { + std::vector blocks; + + // Perform alternating get and put operations + for (int i = 0; i < 10; ++i) { + // Get a block + auto* block = getBufferHub()->getBlock(Size(1024 * (t % 4 + 1))); // Varying sizes + if (block != nullptr) { + EXPECT_NE(block->data, nullptr); + EXPECT_EQ(block->ref_cnt, 1); + blocks.push_back(block); + total_operations++; + } + + // Return a previously acquired block if we have any + if (!blocks.empty() && i % 3 == 0) { + auto* return_block = blocks.back(); + blocks.pop_back(); + getBufferHub()->putBlock(return_block); + // Note: Don't check ref_cnt here as it's being modified concurrently + total_operations++; + } + } + + // Clean up remaining blocks + for (auto* block : blocks) { + getBufferHub()->putBlock(block); + total_operations++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify operations completed + EXPECT_GT(total_operations.load(), 0); +} From 9b4698a1daab22c37f2ff1f4830283279e61f324 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 25 Nov 2025 20:23:15 +0800 Subject: [PATCH 22/37] refactor: only use uint64_t in Size --- include/NovaLLM/memory/buffer_hub.h | 45 +++++------------------ source/memory/buffer_hub.cpp | 55 +++-------------------------- source/memory/buffer_manager.cpp | 2 +- test/source/buffer_hub_test.cpp | 2 +- 4 files changed, 16 insertions(+), 88 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 744566a..7824411 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -20,49 +20,22 @@ class BufferHub; struct Size { private: - uint64_t b_ = 0; - uint64_t kb_ = 0; - uint64_t mb_ = 0; - uint64_t gb_ = 0; - uint64_t total_bytes_ = 0; - const uint64_t ratio_ = 1 << 10; - - void convert_in_units(uint64_t bytes); + uint64_t bytes_ = 0; public: Size() = default; - explicit Size(uint64_t sz) { - total_bytes_ = sz; - convert_in_units(total_bytes_); - } - - Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb); - - Size(const Size& rhs) { - total_bytes_ = rhs.totalBytes(); - convert_in_units(total_bytes_); - } - - uint64_t gb() const { return this->gb_; } - - uint64_t mb() const { return this->mb_; } - - uint64_t kb() const { return this->kb_; } + explicit Size(uint64_t bytes) : bytes_(bytes) {} - uint64_t b() const { return this->b_; } + Size(const Size& rhs) = default; - Size& operator=(const Size& rhs) { - total_bytes_ = rhs.totalBytes(); - convert_in_units(total_bytes_); - return *this; - } + Size& operator=(const Size& rhs) = default; - [[nodiscard]] uint64_t totalBytes() const { return total_bytes_; } + [[nodiscard]] uint64_t totalBytes() const { return bytes_; } - bool operator==(const Size& rhs) const { return totalBytes() == rhs.totalBytes(); } + bool operator==(const Size& rhs) const { return bytes_ == rhs.bytes_; } - [[nodiscard]] bool isValid() const { return totalBytes() != 0; } + [[nodiscard]] bool isValid() const { return bytes_ != 0; } }; struct SizeHash { @@ -94,7 +67,7 @@ class LevelAssignStrategy { class BufferHubConfig { public: - BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit, LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95) + BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95) : device_type_(device_type), size_limit_(size_limit), warning_level_(warning_level), @@ -215,7 +188,7 @@ class NOVA_LLM_API BufferHub { std::vector size_levels_; // ensure that levels are in ascending order - Size size_limit_ {0, 0, 0, 4}; // Memory in buffer hub cannot exceed this limit + Size size_limit_; // Memory in buffer hub cannot exceed this limit float warning_level_ = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 918cd94..5188972 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -6,52 +6,7 @@ namespace nova_llm { -void Size::convert_in_units(uint64_t bytes) { - auto down_ratio = ratio_ * ratio_ * ratio_; // std::pow(ratio_, 3); - - // number of gb units - gb_ = bytes / down_ratio; - bytes -= gb_ * down_ratio; - down_ratio /= ratio_; - - // number of mb units - mb_ = bytes / down_ratio; - bytes -= mb_ * down_ratio; - down_ratio /= ratio_; - - // number of kb units - kb_ = bytes / down_ratio; - bytes -= kb_ * down_ratio; - - b_ = bytes; -} - -Size::Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb) { - b_ = b; - kb_ = kb; - mb_ = mb; - gb_ = gb; - - if (ratio_ < b_) { - auto kb_cnt = b_ / ratio_; - b_ -= kb_cnt * ratio_; - kb_ += kb_cnt; - } - - if (ratio_ < kb_) { - auto mb_cnt = kb_ / ratio_; - kb_ -= mb_cnt * ratio_; - mb_ += mb_cnt; - } - - if (ratio_ < mb_) { - auto gb_cnt = mb_ / ratio_; - mb_ -= gb_cnt * ratio_; - gb_ += gb_cnt; - } - - total_bytes_ = b_ + kb_ * ratio_ + mb_ * ratio_ * ratio_ + gb_ * ratio_ * ratio_ * ratio_; -} +// Size class is now header-only with simplified implementation namespace { class DefaultSizeLevelStrategy { @@ -70,7 +25,7 @@ std::vector DefaultSizeLevelStrategy::byteSizes() { uint32_t base = 64; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(i, 0, 0, 0)); + ret.push_back(Size(i)); // bytes i *= ratio; } return ret; @@ -81,7 +36,7 @@ std::vector DefaultSizeLevelStrategy::kiloByteSizes() { uint32_t base = 4; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(0, i, 0, 0)); + ret.push_back(Size(i * 1024)); // kilobytes to bytes i *= ratio; } return ret; @@ -92,7 +47,7 @@ std::vector DefaultSizeLevelStrategy::megaByteSizes() { uint32_t base = 2; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(0, 0, i, 0)); + ret.push_back(Size(i * 1024 * 1024)); // megabytes to bytes i *= ratio; } return ret; @@ -103,7 +58,7 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { uint32_t base = 1; uint32_t ratio = 2; for (uint64_t i = base; i < 10;) { - ret.push_back(Size(0, 0, 0, i)); + ret.push_back(Size(i * 1024ULL * 1024 * 1024)); // gigabytes to bytes i *= ratio; } return ret; diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index 5216648..f55ad24 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -28,7 +28,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) { } bool ret = false; if (config.device_flags.has(DeviceType::CPU)) { - BufferHubConfig cfg(DeviceType::CPU, config.cpu.alloc, Size(0, 0, 0, 4)); + BufferHubConfig cfg(DeviceType::CPU, config.cpu.alloc, Size(4UL*1024*1024*1024)); buffer_hubs_[DeviceType::CPU] = BufferHub::Builder::build(cfg); ret |= true; } diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index e184d6a..daf270d 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -14,7 +14,7 @@ class CPUBufferHubTest : public ::testing::Test { protected: void SetUp() override { - BufferHubConfig config(DeviceType::CPU, std::make_shared(), Size(0, 0, 0, 4)); + BufferHubConfig config(DeviceType::CPU, std::make_shared(), Size(4ULL * 1024 * 1024 * 1024)); buffer_hub_ = BufferHub::Builder::build(config); } From 92447ac4b2e37c2df9dee4cde4987963bbe7866a Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 25 Nov 2025 20:47:03 +0800 Subject: [PATCH 23/37] refactor: encapsulate BufferHubLevel --- include/NovaLLM/memory/buffer_hub.h | 39 +++++++--- source/memory/buffer_hub.cpp | 113 +++++++++++++++++----------- 2 files changed, 98 insertions(+), 54 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 7824411..2be8820 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -106,20 +105,40 @@ class BufferHub; */ class BufferHubLevel { public: + // Default constructor required for unordered_map + BufferHubLevel() = default; + + void initialize(uint32_t index, const Size& block_size, BufferHub* hub); + // Returns non-owning pointer since pool retains ownership BlockRawPtr fetchOneFreeBlock(); + // Accepts non-owning pointer for blocks already in the pool void putOneBlock(BlockRawPtr block_ptr); - void refill(const Size& sz); + + // Attempts to put a block back by its data pointer. Returns true if successful. + bool tryPutBlock(Block::DataPtr data); + + size_t busyBlockCount() const; + + size_t totalBlocks() const; + ~BufferHubLevel(); - uint32_t index = -1; // level index in buffer hub - Size block_size {static_cast(0)}; // each block size at this level - uint32_t expand_factor = 2; - std::list block_list; // Owns the blocks + + private: + void refill(const Size& sz); + + uint32_t index_ = -1; // level index in buffer hub + Size block_size_ {static_cast(0)}; // each block size at this level + uint32_t expand_factor_ = 2; + + std::list block_list_; // Owns the blocks using BlockIterator = std::list::iterator; - std::unordered_map free_map; - std::unordered_map busy_map; - BufferHub* hub; + + std::unordered_map free_map_; + std::unordered_map busy_map_; + + BufferHub* hub_ = nullptr; }; /* @@ -180,7 +199,7 @@ class NOVA_LLM_API BufferHub { ~BufferHub(); // Thread safety: protects all mutable state - mutable std::shared_mutex mutex_; + mutable std::mutex mutex_; std::unordered_map buffers_; diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 5188972..a412052 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -74,44 +74,59 @@ std::vector LevelAssignStrategy::assignLevels() { return ret; } +void BufferHubLevel::initialize(uint32_t index, const Size& block_size, BufferHub* hub) { + index_ = index; + block_size_ = block_size; + hub_ = hub; +} + +size_t BufferHubLevel::busyBlockCount() const { + return busy_map_.size(); +} + +size_t BufferHubLevel::totalBlocks() const { + return block_list_.size(); +} + BlockRawPtr BufferHubLevel::fetchOneFreeBlock() { BlockRawPtr ret_block {nullptr}; - if (free_map.empty()) { - LOG_INFO("No free block at level %d,refilling...", index); - auto block_bytes = this->block_size.totalBytes(); - refill(Size(expand_factor * block_bytes)); // allocate expand_factor blocks + if (free_map_.empty()) { + LOG_INFO("No free block at level %d,refilling...", index_); + auto block_bytes = this->block_size_.totalBytes(); + refill(Size(expand_factor_ * block_bytes)); // allocate expand_factor blocks } - if (!free_map.empty()) { - LOG_INFO("Found free block at level %d", index); - auto it = free_map.begin(); + if (!free_map_.empty()) { + LOG_INFO("Found free block at level %d", index_); + auto it = free_map_.begin(); auto block_it = it->second; // Transition from free to busy: increment ref_cnt from 0 to 1 (*block_it)->ref_cnt++; - busy_map.insert({it->first, it->second}); - free_map.erase(it); + busy_map_.insert({it->first, it->second}); + free_map_.erase(it); ret_block = block_it->get(); // Return non-owning pointer } else { - LOG_WARN("Unable to fetch free block at level %d even after refill", index); + LOG_WARN("Unable to fetch free block at level %d even after refill", index_); } return ret_block; } void BufferHubLevel::refill(const nova_llm::Size& dst_sz) { + if (!hub_) return; auto dst_total_bytes = dst_sz.totalBytes(); - auto block_bytes = this->block_size.totalBytes(); + auto block_bytes = this->block_size_.totalBytes(); uint64_t cnt = dst_total_bytes / block_bytes; // Allocate data per block so that each pointer we free was directly allocated // Blocks start in the free list with ref_cnt == 0. for (uint64_t i = 0; i < cnt; i++) { - auto one_block = hub->setUpBlock(Size(block_bytes)); + auto one_block = hub_->setUpBlock(Size(block_bytes)); one_block->ref_cnt = 0; // free blocks have ref_cnt == 0 auto* block_ptr = one_block.get(); - auto it = this->block_list.insert(this->block_list.end(), std::move(one_block)); - this->free_map[block_ptr->data] = it; + auto it = this->block_list_.insert(this->block_list_.end(), std::move(one_block)); + this->free_map_[block_ptr->data] = it; } } @@ -120,44 +135,53 @@ void BufferHubLevel::putOneBlock(BlockRawPtr block_ptr) { return; } - if (block_list.empty()) { - LOG_WARN("putOneBlock called on empty block_list at level %d", index); + if (block_list_.empty()) { + LOG_WARN("putOneBlock called on empty block_list at level %d", index_); return; } - bool in_free_m = free_map.count(block_ptr->data); - bool in_busy_m = busy_map.count(block_ptr->data); + bool in_free_m = free_map_.count(block_ptr->data); + bool in_busy_m = busy_map_.count(block_ptr->data); if (!in_free_m && !in_busy_m) { - LOG_WARN("Block %p not found in level %d", static_cast(block_ptr->data), index); + LOG_WARN("Block %p not found in level %d", static_cast(block_ptr->data), index_); return; } else if (in_free_m) { - LOG_WARN("Block %p already in free list at level %d", static_cast(block_ptr->data), index); + LOG_WARN("Block %p already in free list at level %d", static_cast(block_ptr->data), index_); } else { // in_busy_m is true - auto it = busy_map[block_ptr->data]; + auto it = busy_map_[block_ptr->data]; auto& busy_block = *it; // Decrease ref count once; when it reaches zero, move block back to free_map if (busy_block->ref_cnt > 0) { busy_block->ref_cnt--; } if (busy_block->ref_cnt == 0) { - free_map[block_ptr->data] = it; // NOTE: Be cautious about the order of operations here - busy_map.erase(busy_block->data); + free_map_[block_ptr->data] = it; // NOTE: Be cautious about the order of operations here + busy_map_.erase(busy_block->data); } } } +bool BufferHubLevel::tryPutBlock(Block::DataPtr data) { + if (busy_map_.count(data)) { + auto block_it = busy_map_[data]; + putOneBlock(block_it->get()); + return true; + } + return false; +} + BufferHubLevel::~BufferHubLevel() { - free_map.clear(); - busy_map.clear(); + free_map_.clear(); + busy_map_.clear(); // Blocks are automatically cleaned up when unique_ptrs are destroyed // but we need to manually free the data - for (auto& block_ptr : block_list) { - if (block_ptr && block_ptr->data) { - hub->deallocData(block_ptr->data); + for (auto& block_ptr : block_list_) { + if (block_ptr && block_ptr->data && hub_) { + hub_->deallocData(block_ptr->data); } } - block_list.clear(); // unique_ptrs will deallocate Block structs + block_list_.clear(); // unique_ptrs will deallocate Block structs } BufferHub::BufferHub() {} @@ -239,16 +263,14 @@ void BufferHub::tearDownBlock(BlockPtr block) { } void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto& level = buffers_[level_block_sz]; - level.block_size = level_block_sz; - level.index = index; - level.hub = this; + level.initialize(index, level_block_sz, this); } void BufferHub::eraseSizeLevel(const Size& level_sz) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto it = buffers_.find(level_sz); if (it == buffers_.end()) { @@ -257,16 +279,16 @@ void BufferHub::eraseSizeLevel(const Size& level_sz) { } auto& level = it->second; - if (!level.busy_map.empty()) { + if (level.busyBlockCount() > 0) { LOG_ERROR("Level with size %llu has %zu busy blocks, cannot erase now", - level_sz.totalBytes(), level.busy_map.size()); + level_sz.totalBytes(), level.busyBlockCount()); return; } // Free all blocks in the block_list before erasing // The destructor will be called, but let's be explicit about cleanup LOG_INFO("Erasing level with size %llu, freeing %zu blocks", - level_sz.totalBytes(), level.block_list.size()); + level_sz.totalBytes(), level.totalBlocks()); // Erasing from the map will call BufferHubLevel destructor, // which properly frees all blocks via tearDownBlock @@ -274,7 +296,7 @@ void BufferHub::eraseSizeLevel(const Size& level_sz) { } BlockRawPtr BufferHub::getBlock(const Size& sz) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); // round it to ceil level auto level_sz = gradeLevel(sz); @@ -301,7 +323,7 @@ void BufferHub::putBlock(BlockRawPtr block_ptr) { return; } - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); auto size = block_ptr->size; Size level_size(size); @@ -314,7 +336,7 @@ void BufferHub::putBlock(BlockRawPtr block_ptr) { } void BufferHub::putBlockFromBuffer(Buffer& buffer) { - std::unique_lock lock(mutex_); + std::lock_guard lock(mutex_); if (0 == buffer.size || nullptr == buffer.data) { return; @@ -323,10 +345,13 @@ void BufferHub::putBlockFromBuffer(Buffer& buffer) { if (buffers_.count(level_sz)) { auto& level = buffers_[level_sz]; auto* data = static_cast(buffer.data); - if (level.busy_map.count(data)) { - auto block_it = level.busy_map[data]; - level.putOneBlock(block_it->get()); // Get raw pointer from unique_ptr + + if (!level.tryPutBlock(data)) { + // Maybe log warning if data was expected to be there? + // But original code just did nothing if not found in busy_map. + // Actually original code: if (level.busy_map.count(data)) { ... } } + } else { LOG_ERROR("Level with size %d cannot be found in this memory hub", level_sz.totalBytes()); } @@ -354,4 +379,4 @@ Size BufferHub::gradeLevel(const Size& sz) const { return size_levels_[level_index]; } -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm From 73037acc785d78f4fd40b90cb9cec39c7e58d0c8 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 25 Nov 2025 22:19:49 +0800 Subject: [PATCH 24/37] style: format buffer_hub_test --- test/source/buffer_hub_test.cpp | 89 +++++++++++++++++---------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index daf270d..68de60d 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -1,10 +1,11 @@ #include "NovaLLM/memory/buffer_hub.h" #include + +#include +#include #include #include -#include -#include using namespace nova_llm; @@ -84,8 +85,8 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { const int num_threads = 10; const int num_levels_per_thread = 5; std::vector threads; - std::atomic success_count{0}; - + std::atomic success_count {0}; + // Each thread adds multiple size levels for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &success_count]() { @@ -94,17 +95,17 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ Size level_size(size_bytes); uint32_t index = t * num_levels_per_thread + i + 1000; - + getBufferHub()->addSizeLevel(index, level_size); success_count++; } }); } - + for (auto& thread : threads) { thread.join(); } - + // Verify all additions succeeded EXPECT_EQ(success_count.load(), num_threads * num_levels_per_thread); } @@ -113,7 +114,7 @@ TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { const int num_threads = 8; std::vector threads; std::vector sizes_to_add; - + // Pre-populate with size levels for (int i = 0; i < num_threads * 2; ++i) { uint64_t size_bytes = (1 << 20) * (i + 200); // 200MB+ @@ -121,9 +122,9 @@ TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { sizes_to_add.push_back(level_size); getBufferHub()->addSizeLevel(2000 + i, level_size); } - - std::atomic erase_attempts{0}; - + + std::atomic erase_attempts {0}; + // Each thread attempts to erase different size levels concurrently for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &sizes_to_add, &erase_attempts]() { @@ -135,11 +136,11 @@ TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { } }); } - + for (auto& thread : threads) { thread.join(); } - + EXPECT_EQ(erase_attempts.load(), num_threads * 2); } @@ -148,8 +149,8 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { const int blocks_per_thread = 5; std::vector threads; std::vector> thread_blocks(num_threads); - std::atomic successful_gets{0}; - + std::atomic successful_gets {0}; + // Multiple threads requesting blocks of the same size concurrently for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &thread_blocks, &successful_gets]() { @@ -158,7 +159,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { if (block != nullptr && block->data != nullptr) { thread_blocks[t].push_back(block); successful_gets++; - + // Verify block properties EXPECT_NE(block->data, nullptr); EXPECT_GE(block->size, 4096); @@ -167,14 +168,14 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { } }); } - + for (auto& thread : threads) { thread.join(); } - + // Verify we got the expected number of blocks EXPECT_EQ(successful_gets.load(), num_threads * blocks_per_thread); - + // Verify all blocks have unique data pointers (no double allocation) std::vector all_data_ptrs; for (const auto& blocks : thread_blocks) { @@ -185,7 +186,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { std::sort(all_data_ptrs.begin(), all_data_ptrs.end()); auto last = std::unique(all_data_ptrs.begin(), all_data_ptrs.end()); EXPECT_EQ(last - all_data_ptrs.begin(), num_threads * blocks_per_thread); - + // Clean up - return all blocks for (auto& blocks : thread_blocks) { for (auto* block : blocks) { @@ -199,7 +200,7 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlock) { const int blocks_per_thread = 4; std::vector threads; std::vector> thread_blocks(num_threads); - + // First, get blocks in a single-threaded manner for (int t = 0; t < num_threads; ++t) { for (int i = 0; i < blocks_per_thread; ++i) { @@ -208,9 +209,9 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlock) { thread_blocks[t].push_back(block); } } - - std::atomic successful_puts{0}; - + + std::atomic successful_puts {0}; + // Now return blocks concurrently from multiple threads for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &thread_blocks, &successful_puts]() { @@ -221,13 +222,13 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlock) { } }); } - + for (auto& thread : threads) { thread.join(); } - + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); - + // Verify blocks are returned properly by checking ref_cnt for (const auto& blocks : thread_blocks) { for (const auto* block : blocks) { @@ -241,13 +242,13 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) { const int blocks_per_thread = 3; std::vector threads; std::vector> thread_buffers(num_threads); - + // First, get blocks and create buffers in a single-threaded manner for (int t = 0; t < num_threads; ++t) { for (int i = 0; i < blocks_per_thread; ++i) { auto* block = getBufferHub()->getBlock(Size(8192)); // 8KB blocks ASSERT_NE(block, nullptr); - + Buffer buffer; buffer.data = block->data; buffer.size = block->size; @@ -255,31 +256,31 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) { thread_buffers[t].push_back(buffer); } } - - std::atomic successful_puts{0}; - + + std::atomic successful_puts {0}; + // Now return buffers concurrently from multiple threads for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &thread_buffers, &successful_puts]() { for (auto& buffer : thread_buffers[t]) { EXPECT_NE(buffer.data, nullptr); EXPECT_NE(buffer.size, 0); - + getBufferHub()->putBlockFromBuffer(buffer); - + // Verify buffer was cleared EXPECT_EQ(buffer.data, nullptr); EXPECT_EQ(buffer.size, 0); - + successful_puts++; } }); } - + for (auto& thread : threads) { thread.join(); } - + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); } @@ -287,13 +288,13 @@ TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) { TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { const int num_threads = 16; std::vector threads; - std::atomic total_operations{0}; - + std::atomic total_operations {0}; + // Mix of get and put operations happening concurrently for (int t = 0; t < num_threads; ++t) { threads.emplace_back([this, t, &total_operations]() { std::vector blocks; - + // Perform alternating get and put operations for (int i = 0; i < 10; ++i) { // Get a block @@ -304,7 +305,7 @@ TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { blocks.push_back(block); total_operations++; } - + // Return a previously acquired block if we have any if (!blocks.empty() && i % 3 == 0) { auto* return_block = blocks.back(); @@ -314,7 +315,7 @@ TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { total_operations++; } } - + // Clean up remaining blocks for (auto* block : blocks) { getBufferHub()->putBlock(block); @@ -322,11 +323,11 @@ TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { } }); } - + for (auto& thread : threads) { thread.join(); } - + // Verify operations completed EXPECT_GT(total_operations.load(), 0); } From a5066b25b73540a2679305134f1fd672ddda43f2 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 25 Nov 2025 22:45:32 +0800 Subject: [PATCH 25/37] fix: fix buffer_manager_test --- include/NovaLLM/memory/buffer_manager.h | 2 +- source/memory/buffer_manager.cpp | 5 +++-- test/source/buffer_manager_test.cpp | 10 +++++----- test/source/tensor_test.cpp | 19 ++++++++++++++----- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 73991c6..54702ff 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -64,9 +64,9 @@ class NOVA_LLM_API BufferManager { ~BufferManager(); - private: void destroy(); + private: BufferManager() = default; bool init(const Config& config); diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index f55ad24..b97ff25 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -60,11 +60,12 @@ Buffer BufferManager::fetch(size_t size, DeviceType device_type) { BufferManager::~BufferManager() { destroy(); } void BufferManager::destroy() { - for (auto p : buffer_hubs_) { + for (auto& p : buffer_hubs_) { BufferHub::Builder::destroy(&(p.second)); } + buffer_hubs_.clear(); is_init_ = false; } -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm diff --git a/test/source/buffer_manager_test.cpp b/test/source/buffer_manager_test.cpp index be3761f..cc1eb52 100644 --- a/test/source/buffer_manager_test.cpp +++ b/test/source/buffer_manager_test.cpp @@ -19,15 +19,15 @@ class BufferManagerTest : public ::testing::Test { BufferManager::Builder::build(config); } - void TearDown() override { BufferManager::Builder::getInstance().~BufferManager(); } + void TearDown() override { BufferManager::Builder::getInstance().destroy(); } }; -TEST(BufferManagerTest, Init) { +TEST_F(BufferManagerTest, Init) { auto& buffer_manager = BufferManager::Builder::getInstance(); EXPECT_TRUE(buffer_manager.isInited()); } -TEST(BufferManagerTest, FetchCpu) { +TEST_F(BufferManagerTest, FetchCpu) { auto& buffer_manager = BufferManager::Builder::getInstance(); auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); @@ -39,7 +39,7 @@ TEST(BufferManagerTest, FetchCpu) { buffer_manager.put(buffer); } -TEST(BufferManagerTest, PutCpu) { +TEST_F(BufferManagerTest, PutCpu) { auto& buffer_manager = BufferManager::Builder::getInstance(); auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); @@ -48,4 +48,4 @@ TEST(BufferManagerTest, PutCpu) { EXPECT_EQ(buffer.data, nullptr); EXPECT_EQ(buffer.size, 0); EXPECT_EQ(buffer.device_type, DeviceType::CPU); -} \ No newline at end of file +} diff --git a/test/source/tensor_test.cpp b/test/source/tensor_test.cpp index 7362825..189eeac 100644 --- a/test/source/tensor_test.cpp +++ b/test/source/tensor_test.cpp @@ -1,4 +1,5 @@ #include "NovaLLM/data/tensor.h" +#include "NovaLLM/memory/buffer_manager.h" #include @@ -7,11 +8,17 @@ using namespace nova_llm; class TensorTest : public ::testing::Test { protected: void SetUp() override { - // 测试前的设置 + BufferManager::Config config; + // set config + config.device_flags.set(DeviceType::CPU); + config.cpu.alloc = std::make_shared(); + // Use a smaller size for testing if needed, or default 4GB + // But since BufferManager is a singleton, we must ensure it's initialized. + BufferManager::Builder::build(config); } void TearDown() override { - // 测试后的清理 + BufferManager::Builder::getInstance().destroy(); } }; @@ -38,10 +45,12 @@ TEST_F(TensorTest, ConstructWithDims) { // 测试非法维度 TEST_F(TensorTest, InvalidDimensions) { std::vector empty_dims; - EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument); + // ASSERT macro throws std::runtime_error + EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error); std::vector zero_dims = {2, 0, 4}; - EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument); + // ASSERT macro throws std::runtime_error + EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error); } // 测试拷贝构造 @@ -76,4 +85,4 @@ TEST_F(TensorTest, MemoryAllocation) { EXPECT_NE(tensor.data(), nullptr); EXPECT_EQ(tensor.totalElements(), 6); -} \ No newline at end of file +} From 3471201e767a04a29eef299a9c238eeb28e1d892 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Tue, 25 Nov 2025 22:52:00 +0800 Subject: [PATCH 26/37] fix: fix buffer_manager_test.cpp --- test/source/buffer_manager_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/source/buffer_manager_test.cpp b/test/source/buffer_manager_test.cpp index cc1eb52..be22c98 100644 --- a/test/source/buffer_manager_test.cpp +++ b/test/source/buffer_manager_test.cpp @@ -33,7 +33,7 @@ TEST_F(BufferManagerTest, FetchCpu) { auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); EXPECT_NE(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 1024); + EXPECT_GE(buffer.size, 1024); // Size should be at least requested (may be rounded up to next level) EXPECT_EQ(buffer.device_type, DeviceType::CPU); buffer_manager.put(buffer); From 2783f6190c1aadc3917e3ab021f2c2c9f2b06284 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 20:56:22 +0800 Subject: [PATCH 27/37] fix: fix ubuntu and windows github ci issues --- .github/workflows/ubuntu.yml | 4 +++- test/source/buffer_hub_test.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index e0ccf65..f935990 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -163,7 +163,9 @@ jobs: - name: Generate coverage report run: | cd build-coverage - lcov --directory . --capture --output-file coverage.info + # lcov --directory . --capture --output-file coverage.info + # Added --ignore-errors mismatch to handle GTest/GCC 13 issues + lcov --directory . --capture --output-file coverage.info --ignore-errors mismatch lcov --remove coverage.info '/usr/*' '*/test/*' '*/conan/*' --output-file coverage.info lcov --list coverage.info - name: Upload coverage to Codecov diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index 68de60d..b094a07 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -89,7 +89,7 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { // Each thread adds multiple size levels for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &success_count]() { + threads.emplace_back([this, t, &success_count,&num_levels_per_thread]() { for (int i = 0; i < num_levels_per_thread; ++i) { // Create unique sizes for each thread to avoid conflicts uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ @@ -153,7 +153,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { // Multiple threads requesting blocks of the same size concurrently for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &thread_blocks, &successful_gets]() { + threads.emplace_back([this, t, &thread_blocks, &successful_gets,&blocks_per_thread]() { for (int i = 0; i < blocks_per_thread; ++i) { auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks if (block != nullptr && block->data != nullptr) { From 0c7de54be756edcd5615e38aff5b1bb67d1aa113 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 21:45:51 +0800 Subject: [PATCH 28/37] fix: fix C4251 as error in windows --- include/NovaLLM/data/tensor.h | 13 ++++++++++++- include/NovaLLM/memory/buffer_hub.h | 17 ++++++++++++++--- include/NovaLLM/memory/buffer_manager.h | 8 ++++++++ source/memory/buffer_manager.cpp | 3 ++- test/source/buffer_hub_test.cpp | 4 ++-- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/NovaLLM/data/tensor.h b/include/NovaLLM/data/tensor.h index a31539c..6efdf0f 100644 --- a/include/NovaLLM/data/tensor.h +++ b/include/NovaLLM/data/tensor.h @@ -1,4 +1,11 @@ #pragma once + +// Disable C4251 warning on Windows (DLL interface for STL containers) +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif + #include #include #include @@ -152,4 +159,8 @@ class NOVA_LLM_API Tensor { Deleter m_deleter_ = DefaultDeletor(); ///< 自定义删除器 }; -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 2be8820..2d0a113 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -1,4 +1,11 @@ #pragma once + +// Disable C4251 warning on Windows (DLL interface for STL containers) +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif + #include #include #include @@ -66,7 +73,7 @@ class LevelAssignStrategy { class BufferHubConfig { public: - BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95) + BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95f) : device_type_(device_type), size_limit_(size_limit), warning_level_(warning_level), @@ -128,7 +135,7 @@ class BufferHubLevel { private: void refill(const Size& sz); - uint32_t index_ = -1; // level index in buffer hub + uint32_t index_ = static_cast(-1); // level index in buffer hub Size block_size_ {static_cast(0)}; // each block size at this level uint32_t expand_factor_ = 2; @@ -209,10 +216,14 @@ class NOVA_LLM_API BufferHub { Size size_limit_; // Memory in buffer hub cannot exceed this limit - float warning_level_ = 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level + float warning_level_ = 0.95f; // Be cautious when memory in buffer hub exceeds size_limit*warning_level IAllocatorSharedPtr allocator_; }; } // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 54702ff..8e19e41 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -8,6 +8,10 @@ #include "NovaLLM/memory/allocator.h" #include "NovaLLM/memory/buffer_define.h" #include "NovaLLM/memory/buffer_hub.h" +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif namespace nova_llm { /* @@ -77,3 +81,7 @@ class NOVA_LLM_API BufferManager { }; } // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index b97ff25..7790c74 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -4,6 +4,7 @@ #include "NovaLLM/memory/buffer_hub.h" #include "NovaLLM/utils/log.h" #include "NovaLLM/utils/macros.h" +// Disable C4251 warning on Windows (DLL interface for STL containers) namespace nova_llm { @@ -68,4 +69,4 @@ void BufferManager::destroy() { } -} // namespace nova_llm +} // namespace nova_llm \ No newline at end of file diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index b094a07..68de60d 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -89,7 +89,7 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { // Each thread adds multiple size levels for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &success_count,&num_levels_per_thread]() { + threads.emplace_back([this, t, &success_count]() { for (int i = 0; i < num_levels_per_thread; ++i) { // Create unique sizes for each thread to avoid conflicts uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ @@ -153,7 +153,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { // Multiple threads requesting blocks of the same size concurrently for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &thread_blocks, &successful_gets,&blocks_per_thread]() { + threads.emplace_back([this, t, &thread_blocks, &successful_gets]() { for (int i = 0; i < blocks_per_thread; ++i) { auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks if (block != nullptr && block->data != nullptr) { From 53663fb73ea7c672c8a431c07d88255cd100c836 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 22:04:03 +0800 Subject: [PATCH 29/37] fix : fix capture error in windows --- test/source/buffer_hub_test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index 68de60d..e5af107 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -82,8 +82,8 @@ TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { // Concurrent access tests TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { - const int num_threads = 10; - const int num_levels_per_thread = 5; + constexpr int num_threads = 10; + constexpr int num_levels_per_thread = 5; std::vector threads; std::atomic success_count {0}; @@ -145,8 +145,8 @@ TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { } TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { - const int num_threads = 20; - const int blocks_per_thread = 5; + constexpr int num_threads = 20; + constexpr int blocks_per_thread = 5; std::vector threads; std::vector> thread_blocks(num_threads); std::atomic successful_gets {0}; From e9d71c6633b40067bb9857e491e9d582bec9eafc Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 22:16:17 +0800 Subject: [PATCH 30/37] fix: fix ubuntu code coverage error --- .github/workflows/ubuntu.yml | 57 ++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index f935990..849b362 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -159,15 +159,60 @@ jobs: -DENABLE_TEST_COVERAGE=ON \ -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" cmake --build . --config Debug - ctest --output-on-failure || true + + # Create empty coverage files to ensure they exist + find . -name "*.gcda" -exec rm {} \; 2>/dev/null || true + find . -name "*.gcno" -exec touch {} \; 2>/dev/null || true + + # Run tests one by one to ensure proper execution and output + echo "Running tests individually..." + for test_file in $(find . -name "NovaLLMTests*" -type f -executable); do + echo "Running $test_file..." + $test_file --gtest_output=xml:test_results.xml --gtest_filter="*Concurrent*" || ( + echo "Test $test_file completed with issues, checking for coverage data..." + ) + done + + # Explicitly run ctest + ctest --output-on-failure --verbose || echo "ctest failed, proceeding to coverage..." - name: Generate coverage report run: | cd build-coverage - # lcov --directory . --capture --output-file coverage.info - # Added --ignore-errors mismatch to handle GTest/GCC 13 issues - lcov --directory . --capture --output-file coverage.info --ignore-errors mismatch - lcov --remove coverage.info '/usr/*' '*/test/*' '*/conan/*' --output-file coverage.info - lcov --list coverage.info + echo "Checking for .gcda files..." + find . -name "*.gcda" -type f | head -10 + + # Capture coverage data with better error handling + echo "Capturing coverage data..." + lcov --directory . \ + --capture \ + --output-file coverage.info \ + --ignore-errors mismatch,gcov,unused \ + --no-external \ + --base-directory $GITHUB_WORKSPACE || echo "lcov capture had issues, proceeding..." + + # Check if coverage.info was created and has content + if [ -f coverage.info ]; then + echo "Coverage file created, size: $(du -h coverage.info)" + cat coverage.info | head -20 + + # Remove unwanted paths + echo "Removing unwanted paths..." + lcov --remove coverage.info \ + '/usr/*' \ + '*/test/*' \ + '*/conan/*' \ + '*/CMakeFiles/*' \ + '*/build*/*' \ + --output-file coverage_cleaned.info \ + --ignore-errors mismatch,gcov,unused + + # Check final coverage + echo "Final coverage report:" + lcov --list coverage_cleaned.info + mv coverage_cleaned.info coverage.info + else + echo "Coverage file not created, skipping removal step" + fi - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: From 2c6caaf04884707a197e4c0b38fa67ee993fe3f3 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 23:20:42 +0800 Subject: [PATCH 31/37] fix: fix lambda capture in windows --- test/source/buffer_hub_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index e5af107..ccb5182 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -89,7 +89,7 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { // Each thread adds multiple size levels for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &success_count]() { + threads.emplace_back([this, t, &success_count, num_levels_per_thread]() { for (int i = 0; i < num_levels_per_thread; ++i) { // Create unique sizes for each thread to avoid conflicts uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ @@ -153,7 +153,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { // Multiple threads requesting blocks of the same size concurrently for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &thread_blocks, &successful_gets]() { + threads.emplace_back([this, t, &thread_blocks, &successful_gets, blocks_per_thread]() { for (int i = 0; i < blocks_per_thread; ++i) { auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks if (block != nullptr && block->data != nullptr) { From 539723b4c93cf832d7476be67163106f536550bc Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Wed, 26 Nov 2025 23:44:29 +0800 Subject: [PATCH 32/37] fix: fix the symbol export in windows --- include/NovaLLM/common/device.h | 6 +++--- test/CMakeLists.txt | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/NovaLLM/common/device.h b/include/NovaLLM/common/device.h index eee659a..39cb16c 100644 --- a/include/NovaLLM/common/device.h +++ b/include/NovaLLM/common/device.h @@ -10,9 +10,9 @@ struct DeviceTypeFlags { public: [[nodiscard]] bool has(DeviceType type) const; - void set(DeviceType type); + NOVA_LLM_API void set(DeviceType type); - void clear(DeviceType type); + NOVA_LLM_API void clear(DeviceType type); [[nodiscard]] constexpr DeviceType get() const; @@ -20,4 +20,4 @@ struct DeviceTypeFlags { uint32_t flags_ = 0; }; -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b1472ec..394c538 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,8 +33,8 @@ target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_BINARY_DIR}/conan/include ) -target_link_libraries(${PROJECT_NAME} - PRIVATE +target_link_libraries(${PROJECT_NAME} + PRIVATE NovaLLM::NovaLLM GTest::gtest GTest::gtest_main @@ -46,6 +46,11 @@ target_link_libraries(${PROJECT_NAME} set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17) +# Define import macro for Windows (tests consume the DLL, library exports) +if(WIN32) + target_compile_definitions(${PROJECT_NAME} PRIVATE NOVA_LLM_IMPORTS) +endif() + # enable compiler warnings if(NOT TEST_INSTALLED_VERSION) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") From afd3b9b7f679438005f9b529c8b3d79dad242051 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 27 Nov 2025 00:11:11 +0800 Subject: [PATCH 33/37] fix: fix symbol export of API in windows --- source/memory/buffer_hub.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index a412052..9672d24 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -65,7 +65,7 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { } } // namespace -std::vector LevelAssignStrategy::assignLevels() { +NOVA_LLM_API std::vector LevelAssignStrategy::assignLevels() { std::vector ret; ret.insert(ret.end(), DefaultSizeLevelStrategy::byteSizes().begin(), DefaultSizeLevelStrategy::byteSizes().end()); ret.insert(ret.end(), DefaultSizeLevelStrategy::kiloByteSizes().begin(), DefaultSizeLevelStrategy::kiloByteSizes().end()); From 0c23533343a0b91e03c73fcef5a6fcd68a9cac13 Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 27 Nov 2025 00:33:32 +0800 Subject: [PATCH 34/37] feat: add API export in windows --- include/NovaLLM/common/device.h | 2 +- include/NovaLLM/memory/buffer_hub.h | 6 +++--- source/memory/buffer_hub.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/NovaLLM/common/device.h b/include/NovaLLM/common/device.h index 39cb16c..619ca9f 100644 --- a/include/NovaLLM/common/device.h +++ b/include/NovaLLM/common/device.h @@ -8,7 +8,7 @@ enum class DeviceType : uint32_t { UNKNOWN = 0, CPU = 0x01, CUDA = 0x02, METAL = struct DeviceTypeFlags { public: - [[nodiscard]] bool has(DeviceType type) const; + NOVA_LLM_API [[nodiscard]] bool has(DeviceType type) const; NOVA_LLM_API void set(DeviceType type); diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 2d0a113..d96124e 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -24,7 +24,7 @@ namespace nova_llm { // Forward declaration class BufferHub; -struct Size { +struct NOVA_LLM_API Size { private: uint64_t bytes_ = 0; @@ -71,7 +71,7 @@ class LevelAssignStrategy { virtual std::vector assignLevels(); }; -class BufferHubConfig { +class NOVA_LLM_API BufferHubConfig { public: BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95f) : device_type_(device_type), @@ -110,7 +110,7 @@ class BufferHub; * @brief Buffers at the specified size level * */ -class BufferHubLevel { +class NOVA_LLM_API BufferHubLevel { public: // Default constructor required for unordered_map BufferHubLevel() = default; diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 9672d24..a412052 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -65,7 +65,7 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { } } // namespace -NOVA_LLM_API std::vector LevelAssignStrategy::assignLevels() { +std::vector LevelAssignStrategy::assignLevels() { std::vector ret; ret.insert(ret.end(), DefaultSizeLevelStrategy::byteSizes().begin(), DefaultSizeLevelStrategy::byteSizes().end()); ret.insert(ret.end(), DefaultSizeLevelStrategy::kiloByteSizes().begin(), DefaultSizeLevelStrategy::kiloByteSizes().end()); From 53a80464b0d7272e4380414cb74f836528f962be Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 27 Nov 2025 00:41:48 +0800 Subject: [PATCH 35/37] fix: fix deleted function error --- include/NovaLLM/memory/buffer_hub.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index d96124e..8731680 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -114,7 +114,15 @@ class NOVA_LLM_API BufferHubLevel { public: // Default constructor required for unordered_map BufferHubLevel() = default; - + + // Move constructor and assignment for unique_ptr compatibility + BufferHubLevel(BufferHubLevel&&) = default; + BufferHubLevel& operator=(BufferHubLevel&&) = default; + + // Copy operations deleted to prevent unique_ptr copying + BufferHubLevel(const BufferHubLevel&) = delete; + BufferHubLevel& operator=(const BufferHubLevel&) = delete; + void initialize(uint32_t index, const Size& block_size, BufferHub* hub); // Returns non-owning pointer since pool retains ownership @@ -208,7 +216,7 @@ class NOVA_LLM_API BufferHub { // Thread safety: protects all mutable state mutable std::mutex mutex_; - std::unordered_map buffers_; + std::unordered_map, SizeHash, SizeEqual> buffers_; DeviceType device_type_; @@ -227,3 +235,4 @@ class NOVA_LLM_API BufferHub { #ifdef _MSC_VER #pragma warning(pop) #endif +#endif From d4bbf4505a2c93460508f60a1609626e9160c7ac Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 27 Nov 2025 00:47:33 +0800 Subject: [PATCH 36/37] fix: fix error of compiling --- include/NovaLLM/common/device.h | 4 ++-- include/NovaLLM/memory/buffer_hub.h | 3 +-- source/memory/buffer_hub.cpp | 14 +++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/NovaLLM/common/device.h b/include/NovaLLM/common/device.h index 619ca9f..bb248eb 100644 --- a/include/NovaLLM/common/device.h +++ b/include/NovaLLM/common/device.h @@ -8,13 +8,13 @@ enum class DeviceType : uint32_t { UNKNOWN = 0, CPU = 0x01, CUDA = 0x02, METAL = struct DeviceTypeFlags { public: - NOVA_LLM_API [[nodiscard]] bool has(DeviceType type) const; + [[nodiscard]] NOVA_LLM_API bool has(DeviceType type) const; NOVA_LLM_API void set(DeviceType type); NOVA_LLM_API void clear(DeviceType type); - [[nodiscard]] constexpr DeviceType get() const; + [[nodiscard]] NOVA_LLM_API constexpr DeviceType get() const; private: uint32_t flags_ = 0; diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 8731680..bbb2512 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -66,7 +66,7 @@ using BlockPtr = std::unique_ptr; // Raw non-owning pointer for temporary access using BlockRawPtr = Block*; -class LevelAssignStrategy { +class NOVA_LLM_API LevelAssignStrategy { public: virtual std::vector assignLevels(); }; @@ -235,4 +235,3 @@ class NOVA_LLM_API BufferHub { #ifdef _MSC_VER #pragma warning(pop) #endif -#endif diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index a412052..8836e4a 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -266,7 +266,7 @@ void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { std::lock_guard lock(mutex_); auto& level = buffers_[level_block_sz]; - level.initialize(index, level_block_sz, this); + level->initialize(index, level_block_sz, this); } void BufferHub::eraseSizeLevel(const Size& level_sz) { @@ -279,16 +279,16 @@ void BufferHub::eraseSizeLevel(const Size& level_sz) { } auto& level = it->second; - if (level.busyBlockCount() > 0) { + if (level->busyBlockCount() > 0) { LOG_ERROR("Level with size %llu has %zu busy blocks, cannot erase now", - level_sz.totalBytes(), level.busyBlockCount()); + level_sz.totalBytes(), level->busyBlockCount()); return; } // Free all blocks in the block_list before erasing // The destructor will be called, but let's be explicit about cleanup LOG_INFO("Erasing level with size %llu, freeing %zu blocks", - level_sz.totalBytes(), level.totalBlocks()); + level_sz.totalBytes(), level->totalBlocks()); // Erasing from the map will call BufferHubLevel destructor, // which properly frees all blocks via tearDownBlock @@ -307,7 +307,7 @@ BlockRawPtr BufferHub::getBlock(const Size& sz) { BlockRawPtr ret_block {nullptr}; if (buffers_.count(level_sz)) { auto& level = buffers_[level_sz]; - auto block = level.fetchOneFreeBlock(); + auto block = level->fetchOneFreeBlock(); if (block && block->isValid()) { ret_block = block; } @@ -329,7 +329,7 @@ void BufferHub::putBlock(BlockRawPtr block_ptr) { Size level_size(size); if (buffers_.count(level_size)) { auto& level = buffers_[level_size]; - level.putOneBlock(block_ptr); + level->putOneBlock(block_ptr); } else { LOG_ERROR("Level size %d is not found in buffers!", level_size.totalBytes()); } @@ -346,7 +346,7 @@ void BufferHub::putBlockFromBuffer(Buffer& buffer) { auto& level = buffers_[level_sz]; auto* data = static_cast(buffer.data); - if (!level.tryPutBlock(data)) { + if (!level->tryPutBlock(data)) { // Maybe log warning if data was expected to be there? // But original code just did nothing if not found in busy_map. // Actually original code: if (level.busy_map.count(data)) { ... } From a2a1e1fb09d6b9089aec205f064d447d33d7b96d Mon Sep 17 00:00:00 2001 From: peterlau123 Date: Thu, 27 Nov 2025 00:55:54 +0800 Subject: [PATCH 37/37] fix: fix capture error on macos --- test/source/buffer_hub_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp index ccb5182..3356a57 100644 --- a/test/source/buffer_hub_test.cpp +++ b/test/source/buffer_hub_test.cpp @@ -89,7 +89,7 @@ TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { // Each thread adds multiple size levels for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &success_count, num_levels_per_thread]() { + threads.emplace_back([this, t, &success_count, num_levels_per_thread=num_levels_per_thread]() { for (int i = 0; i < num_levels_per_thread; ++i) { // Create unique sizes for each thread to avoid conflicts uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ @@ -153,7 +153,7 @@ TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { // Multiple threads requesting blocks of the same size concurrently for (int t = 0; t < num_threads; ++t) { - threads.emplace_back([this, t, &thread_blocks, &successful_gets, blocks_per_thread]() { + threads.emplace_back([this, t, &thread_blocks, &successful_gets, blocks_per_thread=blocks_per_thread]() { for (int i = 0; i < blocks_per_thread; ++i) { auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks if (block != nullptr && block->data != nullptr) {