diff --git a/.clang-format b/.clang-format index 4f09886..5ce9ead 100644 --- a/.clang-format +++ b/.clang-format @@ -10,7 +10,7 @@ SeparateDefinitionBlocks: Always # 基础缩进配置 IndentWidth: 2 ContinuationIndentWidth: 4 -ColumnLimit: 100 +ColumnLimit: 200 # 函数参数和初始化列表格式 BinPackArguments: false diff --git a/.githooks/check_branch_name.py b/.githooks/check_branch_name.py new file mode 100755 index 0000000..6ed5aa8 --- /dev/null +++ b/.githooks/check_branch_name.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +""" +Branch name validator for NovaLLM +Works on all platforms (Windows, macOS, Linux) +""" +import re +import subprocess +import sys + + +def get_current_branch(): + """Get the current git branch name.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return None + + +def is_protected_branch(branch_name): + """Check if branch is protected (doesn't need validation).""" + protected_patterns = [ + r"^main$", + r"^master$", + r"^develop$", + r"^release/.+", + r"^hotfix/.+", + r"^HEAD$", # Detached HEAD + ] + return any(re.match(pattern, branch_name) for pattern in protected_patterns) + + +def validate_branch_name(branch_name): + """ + Validate branch name against the required format. + Format: - or / + """ + # Pattern: type followed by - or / followed by lowercase alphanumeric + valid_types = [ + "feat", + "fix", + "docs", + "style", + "refactor", + "perf", + "test", + "build", + "ci", + "chore", + ] + pattern = rf"^({'|'.join(valid_types)})([-/])[a-z0-9_-]+$" + return re.match(pattern, branch_name) is not None + + +def print_error_message(branch_name): + """Print helpful error message for invalid branch names.""" + error_msg = f""" +{'='*70} + ❌ INVALID BRANCH NAME +{'='*70} + +Branch: {branch_name} + +Branch names must follow this format: + - or / + +Valid types: + • feat - New feature + • fix - Bug fix + • docs - Documentation changes + • style - Code style changes + • refactor - Code refactoring + • perf - Performance improvements + • test - Test changes + • build - Build system changes + • ci - CI/CD changes + • chore - Other changes + +✅ Valid examples: + feat-buffer-pooling + fix-windows-dll-exports + docs-update-readme + refactor/simplify-tensor-allocation + ci-add-coverage-reporting + +❌ Current branch: {branch_name} + +To fix this, rename your branch: + git branch -m {branch_name} - + +Or delete and recreate: + git checkout main + git branch -D {branch_name} + git checkout -b - + +{'='*70} +""" + print(error_msg, file=sys.stderr) + + +def main(): + """Main entry point for branch name validation.""" + branch_name = get_current_branch() + + if not branch_name: + # Can't determine branch, skip validation + sys.exit(0) + + # Skip protected branches + if is_protected_branch(branch_name): + sys.exit(0) + + # Validate branch name + if not validate_branch_name(branch_name): + print_error_message(branch_name) + sys.exit(1) + + # Branch name is valid + print(f"✅ Branch name '{branch_name}' is valid") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/.githooks/install.sh b/.githooks/install.sh new file mode 100755 index 0000000..44b44d4 --- /dev/null +++ b/.githooks/install.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Install git hooks for NovaLLM +# This script sets up both pre-commit hooks and custom git hooks + +set -euo pipefail + +echo "🔧 Installing NovaLLM Git Hooks..." +echo "" + +# Get the repository root +REPO_ROOT=$(git rev-parse --show-toplevel) +HOOKS_DIR="$REPO_ROOT/.githooks" + +# 1. Configure git to use custom hooks directory +echo "📁 Configuring git to use custom hooks directory..." +git config core.hooksPath "$HOOKS_DIR" +echo " ✅ Git hooks path set to: $HOOKS_DIR" +echo "" + +# 2. Install pre-commit hooks +if command -v pre-commit &> /dev/null; then + echo "📦 Installing pre-commit hooks..." + pre-commit install --hook-type commit-msg --hook-type pre-commit + echo " ✅ Pre-commit hooks installed" +else + echo "⚠️ pre-commit not found. Install it with:" + echo " pip install pre-commit" + echo " Then run: pre-commit install --hook-type commit-msg --hook-type pre-commit" +fi +echo "" + +# 3. Make all hook scripts executable +echo "🔐 Making hook scripts executable..." +chmod +x "$HOOKS_DIR"/* 2>/dev/null || true +echo " ✅ Hook scripts are executable" +echo "" + +# 4. Test branch name validation (if on a feature branch) +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +echo "📋 Current branch: $CURRENT_BRANCH" + +# Summary +cat <- or / + Example: feat-buffer-pooling, fix-memory-leak + +Commit message format: + (): + Example: feat(memory): add buffer pooling + +Valid types: + feat, fix, docs, style, refactor, perf, test, build, ci, chore + +Try it out: + git checkout -b feat-test-branch + git commit -m "feat(test): try the hooks" + +For more info, see: .pre-commit-setup.md + +EOF diff --git a/.githooks/post-checkout b/.githooks/post-checkout new file mode 100755 index 0000000..e36439d --- /dev/null +++ b/.githooks/post-checkout @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Post-checkout hook to validate branch names +# This hook runs after 'git checkout' or 'git switch' + +set -euo pipefail + +# Get the previous HEAD, new HEAD, and checkout type +PREV_HEAD=$1 +NEW_HEAD=$2 +CHECKOUT_TYPE=$3 # 1 = branch checkout, 0 = file checkout + +# Only validate on branch checkouts +if [ "$CHECKOUT_TYPE" != "1" ]; then + exit 0 +fi + +# Get the current branch name +BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) + +# Skip validation for special branches +if [[ "$BRANCH_NAME" =~ ^(main|master|develop|release/.+|hotfix/.+)$ ]]; then + exit 0 +fi + +# Skip validation if we're in detached HEAD state +if [ "$BRANCH_NAME" = "HEAD" ]; then + exit 0 +fi + +# Define the valid branch name pattern +# Format: - or / +# Where type is: feat, fix, docs, style, refactor, perf, test, build, ci, chore +VALID_PATTERN="^(feat|fix|docs|style|refactor|perf|test|build|ci|chore)([-/])[a-z0-9_-]+$" + +# Check if branch name matches the pattern +if [[ ! "$BRANCH_NAME" =~ $VALID_PATTERN ]]; then + cat <- or / + +Valid types: + • feat - New feature + • fix - Bug fix + • docs - Documentation changes + • style - Code style changes + • refactor - Code refactoring + • perf - Performance improvements + • test - Test changes + • build - Build system changes + • ci - CI/CD changes + • chore - Other changes + +Examples: + ✅ feat-buffer-pooling + ✅ fix-windows-dll-exports + ✅ docs-update-readme + ✅ refactor/simplify-tensor-allocation + ✅ ci-add-coverage-reporting + +Current branch: ❌ $BRANCH_NAME + +To fix this, rename your branch: + git branch -m $BRANCH_NAME - + +Or delete and recreate: + git checkout main + git branch -D $BRANCH_NAME + git checkout -b - + +EOF + exit 1 +fi + +# Branch name is valid +echo "✅ Branch name '$BRANCH_NAME' is valid" +exit 0 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index a4ab433..61e2348 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -57,10 +57,25 @@ jobs: cmake -S .. -B . -DCMAKE_BUILD_TYPE=Release -DNOVA_LLM_BUILD_TESTS=OFF -DNOVA_LLM_ENABLE_LOGGING=OFF fi + - name: Install Doxygen + run: | + sudo apt-get update + sudo apt-get install -y doxygen graphviz + - name: Build documentation run: | cd build - cmake --build . --target docs || true + cmake --build . --target docs + + - name: Check documentation output + run: | + if [ ! -d "docs/html" ]; then + echo "Error: Documentation not generated at docs/html" + ls -la docs/ || echo "docs/ directory not found" + exit 1 + fi + echo "Documentation generated successfully" + ls -la docs/html/ - name: Setup Pages uses: actions/configure-pages@v4 @@ -68,7 +83,7 @@ jobs: - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: - path: build/docs/html + path: docs/html - name: Deploy to GitHub Pages id: deployment diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 2158326..849b362 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -132,6 +132,95 @@ jobs: cmake --build . --config Debug # 可选:ctest --output-on-failure + run_coverage: + name: Build & Run Tests with Coverage + runs-on: ubuntu-latest + needs: deps_build_debug + steps: + - *step_checkout + - *step_setup_python + - *step_install_conan + - *step_detect_profile + - name: Install lcov + run: sudo apt-get update && sudo apt-get install -y lcov + - name: Restore Conan cache + uses: actions/cache@v4 + with: + path: ~/.conan2 + key: ${{ runner.os }}-conan-debug-${{ hashFiles('**/conanfile.py', '**/standalone/conanfile.txt', 'source/**', 'include/**') }} + - name: Build and run tests with coverage + run: | + mkdir -p build-coverage + cd build-coverage + conan install ../test --output-folder=conan --build=missing -s build_type=Debug + TOOLCHAIN_FILE=$(find $(pwd) -name "conan_toolchain.cmake" -type f | head -1) + cmake -S ../test -B . \ + -DCMAKE_BUILD_TYPE=Debug \ + -DENABLE_TEST_COVERAGE=ON \ + -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" + cmake --build . --config Debug + + # Create empty coverage files to ensure they exist + find . -name "*.gcda" -exec rm {} \; 2>/dev/null || true + find . -name "*.gcno" -exec touch {} \; 2>/dev/null || true + + # Run tests one by one to ensure proper execution and output + echo "Running tests individually..." + for test_file in $(find . -name "NovaLLMTests*" -type f -executable); do + echo "Running $test_file..." + $test_file --gtest_output=xml:test_results.xml --gtest_filter="*Concurrent*" || ( + echo "Test $test_file completed with issues, checking for coverage data..." + ) + done + + # Explicitly run ctest + ctest --output-on-failure --verbose || echo "ctest failed, proceeding to coverage..." + - name: Generate coverage report + run: | + cd build-coverage + echo "Checking for .gcda files..." + find . -name "*.gcda" -type f | head -10 + + # Capture coverage data with better error handling + echo "Capturing coverage data..." + lcov --directory . \ + --capture \ + --output-file coverage.info \ + --ignore-errors mismatch,gcov,unused \ + --no-external \ + --base-directory $GITHUB_WORKSPACE || echo "lcov capture had issues, proceeding..." + + # Check if coverage.info was created and has content + if [ -f coverage.info ]; then + echo "Coverage file created, size: $(du -h coverage.info)" + cat coverage.info | head -20 + + # Remove unwanted paths + echo "Removing unwanted paths..." + lcov --remove coverage.info \ + '/usr/*' \ + '*/test/*' \ + '*/conan/*' \ + '*/CMakeFiles/*' \ + '*/build*/*' \ + --output-file coverage_cleaned.info \ + --ignore-errors mismatch,gcov,unused + + # Check final coverage + echo "Final coverage report:" + lcov --list coverage_cleaned.info + mv coverage_cleaned.info coverage.info + else + echo "Coverage file not created, skipping removal step" + fi + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: ./build-coverage/coverage.info + fail_ci_if_error: false + verbose: true + token: ${{ secrets.CODECOV_TOKEN }} + run_standalone: name: Build & Run Standalone runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index c320280..98d5c81 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,9 @@ latex/ *.gcov coverage/ CMakeUserPresets.json -build-test/ \ No newline at end of file +build-test/ +build-debug/ +build-release/ +build-test-debug/ +install-debug/ +install-release/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..68ffd76 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,102 @@ +# Pre-commit hooks for NovaLLM +# Install: pip install pre-commit +# Setup: pre-commit install +# Run manually: pre-commit run --all-files + +repos: + # Branch name validation - local hook (works out of the box) + - repo: local + hooks: + - id: check-branch-name + name: Check branch name format + entry: python3 .githooks/check_branch_name.py + language: system + pass_filenames: false + always_run: true + stages: [post-checkout, manual] + + # Conventional Commits - enforce commit message format + - repo: https://github.com/compilerla/conventional-pre-commit + rev: v3.0.0 + hooks: + - id: conventional-pre-commit + stages: [commit-msg] + args: + - feat # New feature + - fix # Bug fix + - docs # Documentation only changes + - style # Code style changes (formatting, no logic change) + - refactor # Code refactoring (no feature/fix) + - perf # Performance improvements + - test # Adding/updating tests + - build # Build system or dependencies + - ci # CI/CD changes + - chore # Other changes (tooling, maintenance) + - revert # Revert previous commit + + # Basic file checks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: check-yaml + args: [--allow-multiple-documents] + - id: check-added-large-files + args: ['--maxkb=1000'] + - id: check-case-conflict + - id: check-merge-conflict + - id: check-json + - id: check-toml + - id: mixed-line-ending + args: ['--fix=lf'] + - id: detect-private-key + + # C++ formatting with clang-format + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.8 + hooks: + - id: clang-format + types_or: [c++, c] + args: ['-i'] # In-place formatting + + # CMake formatting + - repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + - id: cmake-lint + args: [--disabled-codes=C0103] + + # YAML/JSON formatting + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.1.0 + hooks: + - id: prettier + types_or: [yaml, json, markdown] + args: [--write, --prose-wrap=always] + + # Shell script checks + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.9.0.6 + hooks: + - id: shellcheck + + # Python formatting (for scripts) + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + language_version: python3 + +# Exclude patterns +exclude: | + (?x)^( + build.*| + install.*| + \.git| + \.github/workflows| + documentation/.*| + docs/.* + )$ diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..2f688a9 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,11 @@ +# Pre-commit hooks metadata for NovaLLM +# This allows the hooks to be used as pre-commit hooks + +- id: check-branch-name + name: Check branch name format + description: Validates that branch names follow - format + entry: python .githooks/check_branch_name.py + language: system + pass_filenames: false + always_run: true + stages: [post-checkout] diff --git a/.pre-commit-setup.md b/.pre-commit-setup.md new file mode 100644 index 0000000..fed4327 --- /dev/null +++ b/.pre-commit-setup.md @@ -0,0 +1,204 @@ +# Pre-commit Setup Guide + +This repository uses [pre-commit](https://pre-commit.com/) to ensure code quality and enforce commit message conventions. + +## Quick Setup + +**After cloning the repository, run ONE command:** + +```bash +pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout +``` + +That's it! The hooks will now run automatically on every commit and branch checkout. + +### Optional: Test the setup + +```bash +# Run all checks manually +pre-commit run --all-files + +# Test branch name validation +python3 .githooks/check_branch_name.py +``` + +## Branch Naming Convention + +All branch names **must** follow this format: + +``` +- or / +``` + +### Examples + +✅ **Valid branch names:** +``` +feat-buffer-pooling +fix-windows-dll-exports +docs-update-readme +refactor/simplify-tensor-allocation +ci-add-coverage-reporting +test-buffer-manager +``` + +❌ **Invalid branch names:** +``` +buffer_pooling (no type prefix) +Feat-something (capital letter) +feature-test (wrong type, use 'feat') +my-branch (no type prefix) +``` + +### Protected Branches (No Validation) + +These branches don't require the naming convention: +- `main`, `master`, `develop` +- `release/*` (e.g., `release/v1.0.0`) +- `hotfix/*` (e.g., `hotfix/critical-bug`) + +## Commit Message Format + +All commit messages **must** follow the [Conventional Commits](https://www.conventionalcommits.org/) format: + +``` +(): + +[optional body] + +[optional footer] +``` + +### Allowed Types + +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation only changes +- `style`: Code style changes (formatting, no logic change) +- `refactor`: Code refactoring (neither fixes a bug nor adds a feature) +- `perf`: Performance improvements +- `test`: Adding or updating tests +- `build`: Build system or dependency changes +- `ci`: CI/CD configuration changes +- `chore`: Other changes (tooling, maintenance) +- `revert`: Revert a previous commit + +### Examples + +✅ **Good commit messages:** +``` +feat(memory): add buffer pooling for better performance +fix(build): correct Windows DLL export declarations +docs(readme): update build instructions for macOS +refactor(tensor): simplify memory allocation logic +ci(workflows): add coverage reporting to ubuntu workflow +style: apply clang-format to all source files +test(buffer): add unit tests for buffer manager +``` + +❌ **Bad commit messages:** +``` +update code +Fixed bug +Add feature +WIP +asdf +``` + +### Scope (Optional but Recommended) + +The scope should indicate what part of the codebase is affected: +- `memory` - Memory management (buffers, allocators) +- `tensor` - Tensor operations +- `model` - Model implementation +- `build` - Build system (CMake, Conan) +- `ci` - CI/CD workflows +- `test` - Testing infrastructure +- `docs` - Documentation + +## What Gets Checked + +### On Every Commit + +1. **File checks:** + - Remove trailing whitespace + - Ensure files end with newline + - Check for large files (>1MB) + - Detect merge conflicts + - Check YAML/JSON syntax + - Detect private keys + +2. **Code formatting:** + - C/C++ files: `clang-format` + - CMake files: `cmake-format` + - YAML/JSON: `prettier` + - Shell scripts: `shellcheck` + - Python scripts: `black` + +3. **Commit message:** + - Must follow conventional commit format + - Type must be from allowed list + - Subject should be concise + +## Bypassing Hooks (Not Recommended) + +If you absolutely need to skip the hooks (e.g., for a WIP commit): + +```bash +# Skip pre-commit hooks (file checks) +git commit --no-verify + +# Skip commit-msg hook +git commit --no-verify +``` + +**⚠️ Warning:** Bypassed commits will still be checked by CI and may fail! + +## Updating Hooks + +```bash +# Update to latest versions +pre-commit autoupdate + +# Reinstall hooks after config changes +pre-commit install --hook-type commit-msg --hook-type pre-commit +``` + +## CI Integration + +Pre-commit checks also run in CI (GitHub Actions). If your local hooks pass, CI should pass too. + +## Troubleshooting + +### "command not found: pre-commit" +```bash +pip install pre-commit +``` + +### "hook failed" errors +```bash +# Run to see detailed error +pre-commit run --all-files + +# Reinstall hooks +pre-commit uninstall +pre-commit install --hook-type commit-msg --hook-type pre-commit +``` + +### Formatting conflicts +```bash +# Let pre-commit auto-fix formatting +pre-commit run --all-files + +# Stage the auto-fixed files +git add . + +# Commit again +git commit -m "style: apply automated formatting" +``` + +## More Information + +- [Pre-commit documentation](https://pre-commit.com/) +- [Conventional Commits specification](https://www.conventionalcommits.org/) +- [NovaLLM Contributing Guide](CONTRIBUTING.md) diff --git a/README.md b/README.md index f10919d..f8e593d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![Ubuntu](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml) [![Windows](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml) +[![MacOS](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml) [![Code Quality](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml) [![Documentation](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml) [![codecov](https://codecov.io/gh/peterlau123/NovaLLM/branch/master/graph/badge.svg)](https://codecov.io/gh/peterlau123/NovaLLM) @@ -16,8 +17,9 @@ A lightweight and efficient C/C++ library for Large Language Model (LLM) inferen - 🚀 **Lightweight**: Minimal dependencies, focusing on core functionality - 🔧 **Extensible**: Easy to extend with custom models and optimizations -- 🎯 **Efficient**: Support for extreme low-bit quantization -- 🛠️ **Flexible**: Support for OpenAI Triton and ThunderKittens kernels +- 🎯 **Efficient**: Support for low-bit quantization and custom kernels +- 🛠️ **Portable**: Support inference on MacOS/Linux/Windows platforms +- 👨‍💻 **Developer-friendly**: Easy to use and integrate into other projects ## Supported Models @@ -67,6 +69,42 @@ cmake --build . ### Scripted builds +#### Option 1: Unified Build Script (Recommended) + +The root `build.sh` provides a comprehensive build system with full control: + +```bash +# Basic build (Release mode) +./build.sh + +# Build with tests +./build.sh -t + +# Clean build (removes all build-* and install-* directories) +./build.sh -c -r + +# Build everything (main + tests + standalone + package) +./build.sh -a + +# Debug build with verbose output +./build.sh -d -v + +# Show all options +./build.sh --help +``` + +**Key features:** +- `-c, --clean`: Cleans all `build-*` and `install-*` directories (including custom directories specified via `--build-dir`/`--install-dir`) before building +- `-r, --release`: Build in Release mode (default) +- `-d, --debug`: Build in Debug mode +- `-t, --tests`: Build and run tests +- `-s, --standalone`: Build standalone application +- `-p, --package`: Create Conan package +- `-a, --all`: Build everything +- `-v, --verbose`: Enable verbose output + +#### Option 2: Platform-Specific Scripts + Unified wrapper (auto-detects OS): ```bash @@ -84,6 +122,8 @@ scripts/build_ubuntu.sh --type Debug --enable-logging OFF scripts/build_windows.ps1 -Configuration Release -EnableLogging ON -WithTests ``` +**Note:** The root `build.sh` is more feature-rich and recommended for development, while `scripts/build.sh` is a lightweight wrapper for CI/CD pipelines. + ### Makefile builds - Use scripts via Make: `make script-build` (honors BUILD_TYPE, ENABLE_LOGGING, ENABLE_TESTS) diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 0000000..d17989c --- /dev/null +++ b/SETUP.md @@ -0,0 +1,101 @@ +# NovaLLM Development Setup + +Quick guide for new contributors. + +## One-Time Setup (After Cloning) + +Run this single command to set up all development hooks: + +```bash +pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout +``` + +**What this does:** +- Installs pre-commit (if not already installed) +- Enables automatic validation on every commit +- Enables branch name validation +- Enables code quality checks + +## Naming Conventions + +### Branch Names + +Format: `-` + +**Valid types:** feat, fix, docs, style, refactor, perf, test, build, ci, chore + +**Examples:** +```bash +git checkout -b feat-add-buffer-pooling ✅ +git checkout -b fix-memory-leak ✅ +git checkout -b docs-update-readme ✅ +git checkout -b my-branch ❌ (no type prefix) +``` + +### Commit Messages + +Format: `(): ` + +**Examples:** +```bash +git commit -m "feat(memory): add buffer pooling" ✅ +git commit -m "fix(build): correct DLL exports" ✅ +git commit -m "docs(readme): update setup guide" ✅ +git commit -m "update code" ❌ (no type) +``` + +## What Happens Automatically + +After setup, the hooks will: + +1. **On `git checkout -b new-branch`:** + - ✅ Validate branch name format + - ❌ Reject invalid branch names with helpful error + +2. **On `git commit`:** + - ✅ Format C++ code with clang-format + - ✅ Check for trailing whitespace, large files, etc. + - ✅ Validate commit message format + - ❌ Reject invalid commits with helpful error + +## Cross-Platform Support + +Works on: +- ✅ macOS (zsh, bash) +- ✅ Linux (bash, zsh, sh) +- ✅ Windows (Git Bash, PowerShell, WSL) + +Requirements: +- Python 3.6+ (comes with most systems) +- Git (already have it if you cloned the repo) +- pip (to install pre-commit) + +## Troubleshooting + +### "pre-commit: command not found" +```bash +pip install pre-commit +# or +pip3 install pre-commit +``` + +### "Permission denied" on Linux/macOS +```bash +chmod +x .githooks/*.py +chmod +x .githooks/*.sh +``` + +### Need to bypass hooks temporarily? +```bash +git commit --no-verify -m "WIP: work in progress" +``` + +**⚠️ Warning:** Bypassed commits will still be checked by CI! + +## Full Documentation + +See [.pre-commit-setup.md](.pre-commit-setup.md) for complete documentation. + +## Build Instructions + +See [README.md](README.md) for build and development instructions. diff --git a/build.sh b/build.sh old mode 100644 new mode 100755 index 6fe541e..d8db20f --- a/build.sh +++ b/build.sh @@ -112,7 +112,8 @@ Build Targets: -a, --all Build everything (main + tests + standalone + package) Build Options: - -c, --clean Clean build directory before building + -c, --clean Clean all build-* and install-* directories (including custom + directories specified via --build-dir/--install-dir) before building --build-dir DIR Set custom build directory (default: build) --install-dir DIR Set custom install directory (default: install) @@ -138,19 +139,23 @@ VERBOSE=false parse_args() { # Reset target flags if any target is explicitly specified local targets_specified=false + local main_explicitly_specified=false # First pass: check if any targets are specified for arg in "$@"; do case $arg in + -m|--main) + main_explicitly_specified=true + ;; -t|--tests|-s|--standalone|-p|--package|-a|--all) targets_specified=true - break ;; esac done - # If targets are specified, disable default main build - if [[ "$targets_specified" = true ]]; then + # If targets are specified but main is not explicitly requested, disable default main build + # We'll check package freshness later to decide if main needs to be built + if [[ "$targets_specified" = true && "$main_explicitly_specified" = false ]]; then BUILD_MAIN=false fi @@ -297,6 +302,86 @@ setup_conan() { fi } +# ============================================================================ +# Package Detection Functions +# ============================================================================ + +check_package_exists() { + local package_ref="novallm/0.1.0@local/testing" + if conan list "$package_ref" 2>/dev/null | grep -q "$package_ref"; then + return 0 # Package exists + else + return 1 # Package doesn't exist + fi +} + +get_package_timestamp() { + local package_ref="novallm/0.1.0@local/testing" + # Get the package folder path from conan cache + local cache_info + cache_info=$(conan cache path "$package_ref" 2>/dev/null) + + if [[ -z "$cache_info" ]]; then + echo "0" # Package doesn't exist + return + fi + + # Get the most recent modification time in the package + local package_time + if [[ "$(uname)" == "Darwin" ]]; then + # macOS + package_time=$(stat -f "%m" "$cache_info" 2>/dev/null || echo "0") + else + # Linux + package_time=$(stat -c "%Y" "$cache_info" 2>/dev/null || echo "0") + fi + + echo "$package_time" +} + +get_source_timestamp() { + # Get the most recent modification time of source files + local newest_time=0 + local file + + # Check source, include, and CMakeLists.txt files + while IFS= read -r -d '' file; do + local file_time + if [[ "$(uname)" == "Darwin" ]]; then + file_time=$(stat -f "%m" "$file" 2>/dev/null || echo "0") + else + file_time=$(stat -c "%Y" "$file" 2>/dev/null || echo "0") + fi + + if [[ $file_time -gt $newest_time ]]; then + newest_time=$file_time + fi + done < <(find source include CMakeLists.txt conanfile.py -type f -print0 2>/dev/null) + + echo "$newest_time" +} + +is_package_outdated() { + if ! check_package_exists; then + print_info "Package not found in cache" + return 0 # Package doesn't exist, needs rebuild + fi + + local package_time + local source_time + + package_time=$(get_package_timestamp) + source_time=$(get_source_timestamp) + + if [[ $source_time -gt $package_time ]]; then + print_info "Source code is newer than cached package" + return 0 # Source is newer, needs rebuild + else + print_info "Cached package is up-to-date" + return 1 # Package is up-to-date + fi +} + # ============================================================================ # Build Functions # ============================================================================ @@ -304,14 +389,45 @@ setup_conan() { clean_build_dirs() { print_header "Cleaning build directories" - if [[ -d "$BUILD_DIR" ]]; then - print_info "Removing $BUILD_DIR" - rm -rf "$BUILD_DIR" + local cleaned=false + local dirs_to_clean=() + + # First, add user-specified directories if they exist + if [[ -n "$BUILD_DIR" && -d "$BUILD_DIR" ]]; then + dirs_to_clean+=("$BUILD_DIR") + fi + if [[ -n "$INSTALL_DIR" && -d "$INSTALL_DIR" ]]; then + dirs_to_clean+=("$INSTALL_DIR") fi - if [[ -d "$INSTALL_DIR" ]]; then - print_info "Removing $INSTALL_DIR" - rm -rf "$INSTALL_DIR" + # Then, find all build and install directories + shopt -s nullglob # Don't include pattern if no match + for dir in build build-* build_* install install-* install_*; do + if [[ -d "$dir" ]]; then + # Avoid duplicates + local already_added=false + for added_dir in "${dirs_to_clean[@]}"; do + if [[ "$dir" == "$added_dir" ]]; then + already_added=true + break + fi + done + if [[ "$already_added" == false ]]; then + dirs_to_clean+=("$dir") + fi + fi + done + shopt -u nullglob + + # Remove all collected directories + for dir in "${dirs_to_clean[@]}"; do + print_info "Removing $dir" + rm -rf "$dir" + cleaned=true + done + + if [[ "$cleaned" == false ]]; then + print_info "No build or install directories to clean" fi print_success "Clean complete" @@ -531,8 +647,28 @@ main() { fi # Execute build targets + # If tests or standalone are requested but main wasn't explicitly requested, + # check if we need to rebuild main based on package freshness + local need_package=false + if [[ "$BUILD_TESTS" == true || "$BUILD_STANDALONE" == true ]]; then + need_package=true + if [[ "$BUILD_MAIN" == false ]]; then + print_header "Checking package freshness" + if is_package_outdated; then + print_info "Enabling main project build due to outdated/missing package" + BUILD_MAIN=true + fi + fi + fi + if [[ "$BUILD_MAIN" == true ]]; then build_main_project + + # Auto-create package if tests or standalone need it + if [[ "$need_package" == true || "$CREATE_PACKAGE" == true ]]; then + print_info "Creating/updating Conan package for downstream consumers..." + CREATE_PACKAGE=true + fi fi if [[ "$CREATE_PACKAGE" == true ]]; then diff --git a/codecov.yaml b/codecov.yaml index dae89fd..b29cd08 100644 --- a/codecov.yaml +++ b/codecov.yaml @@ -1,5 +1,36 @@ +# Codecov configuration for NovaLLM +# See: https://docs.codecov.com/docs/codecov-yaml + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true + + precision: 2 + round: down + range: "70...100" + ignore: - - "test" + - "test/**" + - "standalone/**" + - "documentation/**" + - "build*/**" + - "cmake/**" comment: - require_changes: true \ No newline at end of file + layout: "reach,diff,flags,tree,footer" + behavior: default + require_changes: true + require_base: false + require_head: true + +fixes: + - "before/:/after/" # Path fixes if needed diff --git a/documentation/memory/buffer_hub_design.md b/documentation/memory/buffer_hub_design.md new file mode 100644 index 0000000..ba165d2 --- /dev/null +++ b/documentation/memory/buffer_hub_design.md @@ -0,0 +1,61 @@ +# Buffer Hub Overview + +## Design + +We divide memory into the following four major levels: + ++ Byte level + Byte number ranges from 0 to 1023 ++ KB level + Byte number ranges from 1024 to 1024*1023 ++ MB level + Byte number ranges from 1024*1024 to 1024*1024*1023 ++ GB level + Byte number ranges from 1024*1024*1024 to min(1024*1024*1024*1023,Device memory) + +On top of that, we continue divide into sub levels from major levels. + +In byte level, we form the following sub levels ++ 16 bytes ++ 64 bytes ++ 256 bytes + +In KB level, we form the following sub levels ++ 1 kb ++ 2 kb ++ 4 kb ++ 8 kb ++ 16 kb ++ 32 kb ++ 64 kb ++ 128 kb ++ 256 kb ++ 512 kb + +In MB level, we form the following sub levels ++ 1 mb ++ 2 mb ++ 4 mb ++ 8 mb ++ 16 mb ++ 32 mb ++ 64 mb ++ 128 mb ++ 256 mb ++ 512 mb + +In GB level, we form the following sub levels ++ 1 GB ++ 2 GB ++ 4 GB ++ 8 GB ++ 16 GB ++ 32 GB ++ 64 GB ++ 128 GB ++ 256 GB ++ 512 GB + + + +## Usage diff --git a/format.sh b/format.sh index 04c1513..b34cc94 100755 --- a/format.sh +++ b/format.sh @@ -1,7 +1,7 @@ #!/bin/bash # Define the directories to search for C++ files -DIRECTORIES=("include" "source" "standalone" "test") +DIRECTORIES=("include" "source" "standalone/source" "test/source") # Find all C++ source and header files in the specified directories FILES=$(find "${DIRECTORIES[@]}" -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cu" -o -name "*.cuh" \) 2>/dev/null) diff --git a/include/NovaLLM/common/device.h b/include/NovaLLM/common/device.h index eee659a..bb248eb 100644 --- a/include/NovaLLM/common/device.h +++ b/include/NovaLLM/common/device.h @@ -8,16 +8,16 @@ enum class DeviceType : uint32_t { UNKNOWN = 0, CPU = 0x01, CUDA = 0x02, METAL = struct DeviceTypeFlags { public: - [[nodiscard]] bool has(DeviceType type) const; + [[nodiscard]] NOVA_LLM_API bool has(DeviceType type) const; - void set(DeviceType type); + NOVA_LLM_API void set(DeviceType type); - void clear(DeviceType type); + NOVA_LLM_API void clear(DeviceType type); - [[nodiscard]] constexpr DeviceType get() const; + [[nodiscard]] NOVA_LLM_API constexpr DeviceType get() const; private: uint32_t flags_ = 0; }; -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm diff --git a/include/NovaLLM/data/tensor.h b/include/NovaLLM/data/tensor.h index d917109..6efdf0f 100644 --- a/include/NovaLLM/data/tensor.h +++ b/include/NovaLLM/data/tensor.h @@ -1,8 +1,16 @@ #pragma once + +// Disable C4251 warning on Windows (DLL interface for STL containers) +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif + +#include #include #include #include -#include + #include "../common/device.h" #include "../common/dtype.h" #include "NovaLLM/utils/macros.h" @@ -30,7 +38,7 @@ class NOVA_LLM_API Tensor { * @brief 默认删除器 */ struct DefaultDeletor { - void operator()(void** data) {} + void operator()(void** /*data*/) {} }; using Deleter = std::function; @@ -58,11 +66,7 @@ class NOVA_LLM_API Tensor { * @param device 设备类型 * @param deleter 自定义删除器,默认使用DefaultDeletor */ - Tensor(const void* data, - const std::vector& dims, - DataType dtype, - DeviceType device, - Deleter deleter = DefaultDeletor()); + Tensor(const void* data, const std::vector& dims, DataType dtype, DeviceType device, Deleter deleter = DefaultDeletor()); /** * @brief 拷贝构造函数 @@ -125,7 +129,7 @@ class NOVA_LLM_API Tensor { DataSourceType dataFrom() const { return m_data_source_; } - uint32_t refCnt() const { return ref_cnt_?ref_cnt_->load():0; } + uint32_t refCnt() const { return ref_cnt_ ? ref_cnt_->load() : 0; } Deleter deleter() const { return m_deleter_; } @@ -147,7 +151,7 @@ class NOVA_LLM_API Tensor { std::vector dims_; ///< 张量的维度数组 uint32_t ele_cnt_ {0}; ///< 元素总数 void* data_ {nullptr}; ///< 数据缓冲区 - uint64_t capacity_ {0}; ///< 数据缓冲区大小,单位为字节,大于等于size_*sizeof(m_dtype_) + uint64_t capacity_ {0}; ///< 数据缓冲区大小,单位为字节,大于等于size_*sizeof(m_dtype_) DataSourceType m_data_source_ {DataSourceType::AUTO}; DataType m_dtype_ {DataType::UNKNOWN}; ///< 数据类型 DeviceType m_device_ {DeviceType::UNKNOWN}; ///< 设备类型 @@ -155,4 +159,8 @@ class NOVA_LLM_API Tensor { Deleter m_deleter_ = DefaultDeletor(); ///< 自定义删除器 }; -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h index 13f6fe2..bbb2512 100644 --- a/include/NovaLLM/memory/buffer_hub.h +++ b/include/NovaLLM/memory/buffer_hub.h @@ -1,93 +1,47 @@ #pragma once + +// Disable C4251 warning on Windows (DLL interface for STL containers) +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif + +#include #include +#include +#include #include #include -#include + #include "NovaLLM/common/device.h" #include "NovaLLM/memory/allocator.h" #include "NovaLLM/memory/buffer_define.h" -#include "NovaLLM/utils/template.h" #include "NovaLLM/utils/macros.h" +#include "NovaLLM/utils/template.h" namespace nova_llm { -struct Size { +// Forward declaration +class BufferHub; + +struct NOVA_LLM_API Size { private: - uint64_t b_ = 0; - uint64_t kb_ = 0; - uint64_t mb_ = 0; - uint64_t gb_ = 0; - uint64_t total_bytes_ = 0; - const uint64_t ratio_ = 1024; + uint64_t bytes_ = 0; - void convert_in_units(uint64_t bytes) { - auto down_ratio = std::pow(ratio_, 3); + public: + Size() = default; - gb_ = bytes / down_ratio; - bytes -= gb_ * down_ratio; - down_ratio /= ratio_; + explicit Size(uint64_t bytes) : bytes_(bytes) {} - mb_ = bytes / down_ratio; - bytes -= mb_ * down_ratio; - down_ratio /= ratio_; + Size(const Size& rhs) = default; - kb_ = bytes / down_ratio; - bytes -= kb_ * down_ratio; + Size& operator=(const Size& rhs) = default; - b_ = bytes; - } + [[nodiscard]] uint64_t totalBytes() const { return bytes_; } - public: - Size() = default; + bool operator==(const Size& rhs) const { return bytes_ == rhs.bytes_; } - explicit Size(uint64_t sz) { - total_bytes_ = sz; - convert_in_units(total_bytes_); - } - - Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb) { - b_ = b; - kb_ = kb; - mb_ = mb; - gb_ = gb; - - if (ratio_ < b_) { - auto kb_cnt = b_ / ratio_; - b_ -= kb_cnt * ratio_; - kb_ += kb_cnt; - } - - if (ratio_ < kb_) { - auto mb_cnt = kb_ / ratio_; - kb_ -= mb_cnt * ratio_; - mb_ += mb_cnt; - } - - if (ratio_ < mb_) { - auto gb_cnt = mb_ / ratio_; - mb_ -= gb_cnt * ratio_; - gb_ += gb_cnt; - } - - total_bytes_ = b_ + kb_ * ratio_ + mb_ * ratio_ * ratio_ + gb_ * ratio_ * ratio_ * ratio_; - } - - Size(const Size& rhs) { - total_bytes_ = rhs.totalBytes(); - convert_in_units(total_bytes_); - } - - Size& operator=(const Size& rhs) { - total_bytes_ = rhs.totalBytes(); - convert_in_units(total_bytes_); - return *this; - } - - [[nodiscard]] uint64_t totalBytes() const { return total_bytes_; } - - bool operator==(const Size& rhs) const { return totalBytes() == rhs.totalBytes(); } - - [[nodiscard]] bool isValid() const { return totalBytes() != 0; } + [[nodiscard]] bool isValid() const { return bytes_ != 0; } }; struct SizeHash { @@ -95,35 +49,111 @@ struct SizeHash { }; struct SizeEqual { - bool operator()(const Size& lhs, const Size& rhs) const { - return lhs.totalBytes() == rhs.totalBytes(); - } + bool operator()(const Size& lhs, const Size& rhs) const { return lhs.totalBytes() == rhs.totalBytes(); } }; struct Block { using DataPtr = uint8_t*; - //using BlockPtr = Block*; DataPtr data = nullptr; uint64_t size = 0; int32_t ref_cnt = 0; - bool isValid() const { - // return data != nullptr && (prev != nullptr || next != nullptr) && 0 != size; - return data != nullptr && 0 != size; - } + bool isValid() const { return data != nullptr && 0 != size; } }; -using BlockPtr = Block*; +// BlockPtr for owning pointers (used in collections) +using BlockPtr = std::unique_ptr; +// Raw non-owning pointer for temporary access +using BlockRawPtr = Block*; + +class NOVA_LLM_API LevelAssignStrategy { + public: + virtual std::vector assignLevels(); +}; -class DefaultSizeLevelStrategy { +class NOVA_LLM_API BufferHubConfig { public: - NOVA_LLM_API static std::vector byteSizes() ; + BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95f) + : device_type_(device_type), + size_limit_(size_limit), + warning_level_(warning_level), + allocator_(allocator), + level_assign_strategy_(strategy) { + size_levels_ = strategy.assignLevels(); + }; + + void setLevelAssignStrategy(LevelAssignStrategy strategy) { size_levels_ = strategy.assignLevels(); } + + void setWarningLevel(float warning_level) { warning_level_ = warning_level; } + + DeviceType deviceType() const { return device_type_; } - NOVA_LLM_API static std::vector kiloByteSizes() ; + const std::vector& sizeLevels() const { return size_levels_; } - NOVA_LLM_API static std::vector megaByteSizes() ; + Size sizeLimit() const { return size_limit_; } - NOVA_LLM_API static std::vector gigaByteSizes() ; + float warningLevel() const { return warning_level_; } + + IAllocatorSharedPtr allocator() const { return allocator_; } + + private: + DeviceType device_type_; + std::vector size_levels_; // ensure that levels are in ascending order + Size size_limit_; // Memory in buffer hub cannot exceed this limit + float warning_level_; // Be cautious when memory in buffer hub exceeds size_limit*warning_level + IAllocatorSharedPtr allocator_; + LevelAssignStrategy level_assign_strategy_; +}; + +class BufferHub; +/** + * @brief Buffers at the specified size level + * + */ +class NOVA_LLM_API BufferHubLevel { + public: + // Default constructor required for unordered_map + BufferHubLevel() = default; + + // Move constructor and assignment for unique_ptr compatibility + BufferHubLevel(BufferHubLevel&&) = default; + BufferHubLevel& operator=(BufferHubLevel&&) = default; + + // Copy operations deleted to prevent unique_ptr copying + BufferHubLevel(const BufferHubLevel&) = delete; + BufferHubLevel& operator=(const BufferHubLevel&) = delete; + + void initialize(uint32_t index, const Size& block_size, BufferHub* hub); + + // Returns non-owning pointer since pool retains ownership + BlockRawPtr fetchOneFreeBlock(); + + // Accepts non-owning pointer for blocks already in the pool + void putOneBlock(BlockRawPtr block_ptr); + + // Attempts to put a block back by its data pointer. Returns true if successful. + bool tryPutBlock(Block::DataPtr data); + + size_t busyBlockCount() const; + + size_t totalBlocks() const; + + ~BufferHubLevel(); + + private: + void refill(const Size& sz); + + uint32_t index_ = static_cast(-1); // level index in buffer hub + Size block_size_ {static_cast(0)}; // each block size at this level + uint32_t expand_factor_ = 2; + + std::list block_list_; // Owns the blocks + using BlockIterator = std::list::iterator; + + std::unordered_map free_map_; + std::unordered_map busy_map_; + + BufferHub* hub_ = nullptr; }; /* @@ -138,79 +168,70 @@ class DefaultSizeLevelStrategy { * */ class NOVA_LLM_API BufferHub { public: - struct Config { - DeviceType device_type; - std::vector size_levels; // ensure that levels are in ascending order - Size size_limit {0, 0, 0, 8}; // Memory in buffer hub cannot exceed this limit - float warning_level = - 0.95; // Be cautious when memory in buffer hub exceeds size_limit*warning_level - IAllocatorSharedPtr allocator; - }; - - struct Level { - public: - BlockPtr fetchOneFreeBlock(); - - void putOneBlock(const BlockPtr& block_ptr); - - void refill(const Size& sz); - - ~Level(); - - uint32_t index = -1; - Size level_size {static_cast(0)}; // each block size at this level - - //using BlockPtr = Block*; - std::list block_list; - using BlockIterator = std::list::iterator; - std::unordered_map free_map; - std::unordered_map busy_map; - BufferHub* hub; - }; + friend class BufferHubConfig; + friend class BufferHubLevel; class Builder { public: - NOVA_LLM_API static BufferHub* build(const Config& config); + NOVA_LLM_API static BufferHub* build(const BufferHubConfig& config); NOVA_LLM_API static void destroy(BufferHub** hub); }; - void initConfig(const Config& config); + void initConfig(const BufferHubConfig& config); + + // Returns non-owning pointer to block managed by pool + BlockRawPtr getBlock(const Size& sz); - BlockPtr getBlock(const Size& sz); + // Accepts non-owning pointer to block managed by pool + void putBlock(BlockRawPtr block); - void putBlock(const BlockPtr& block); + // Return a buffer to the pool and clear the Buffer to avoid dangling pointers. + void putBlockFromBuffer(Buffer& buffer); - void putBlockFromBuffer(const Buffer& buffer); + void addSizeLevel(uint32_t index, const Size& level_sz); + + void eraseSizeLevel(const Size& level_sz); private: Block::DataPtr allocData(uint64_t sz); void deallocData(Block::DataPtr& data_ptr); + // Creates a new block with ownership BlockPtr allocBlock(); - void deallocateBlock(BlockPtr& block_ptr); + void deallocateBlock(BlockPtr block); - BlockPtr setUpBlock(const Size& sz); // alloc and init block + // Creates and initializes a new block + BlockPtr setUpBlock(const Size& sz); - void tearDownBlock(BlockPtr& block); + // Cleans up and destroys a block + void tearDownBlock(BlockPtr block); - void addSizeLevel(uint32_t index, const Size& level_sz); + [[nodiscard]] Size gradeLevel(const Size& sz) const; - void eraseSizeLevel(const Size& level_sz); + BufferHub(); - [[nodiscard]] Size gradeLevel(const Size& sz) const; + ~BufferHub(); - BufferHub() = default; + // Thread safety: protects all mutable state + mutable std::mutex mutex_; + + std::unordered_map, SizeHash, SizeEqual> buffers_; - std::unordered_map buffers_; DeviceType device_type_; + std::vector size_levels_; // ensure that levels are in ascending order - Size size_limit_ {0, 0, 0, 4}; // Memory in buffer hub cannot exceed this limit - // Be cautious when memory in buffer hub exceeds size_limit*warning_level - float warning_level_ = 0.95; + Size size_limit_; // Memory in buffer hub cannot exceed this limit + + float warning_level_ = 0.95f; // Be cautious when memory in buffer hub exceeds size_limit*warning_level IAllocatorSharedPtr allocator_; + }; } // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h index 4333e71..8e19e41 100644 --- a/include/NovaLLM/memory/buffer_manager.h +++ b/include/NovaLLM/memory/buffer_manager.h @@ -1,12 +1,17 @@ #pragma once #include #include -#include #include +#include + #include "NovaLLM/common/device.h" #include "NovaLLM/memory/allocator.h" #include "NovaLLM/memory/buffer_define.h" #include "NovaLLM/memory/buffer_hub.h" +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) +#endif namespace nova_llm { /* @@ -54,18 +59,18 @@ class NOVA_LLM_API BufferManager { BufferManager& operator=(BufferManager&&) = delete; // Disable move assignment - [[nodiscard("Do not drop isInit return value")]] bool isInited() const { return is_init_; } + [[nodiscard]] bool isInited() const { return is_init_; } Buffer fetch(size_t size, DeviceType device_type); - void put(const Buffer& buffer); + // Return a buffer obtained from fetch back to the pool and clear it. + void put(Buffer& buffer); ~BufferManager(); - private: - void destroy(); + private: BufferManager() = default; bool init(const Config& config); @@ -76,3 +81,7 @@ class NOVA_LLM_API BufferManager { }; } // namespace nova_llm + +#ifdef _MSC_VER +#pragma warning(pop) +#endif \ No newline at end of file diff --git a/include/NovaLLM/utils/log.h b/include/NovaLLM/utils/log.h index ab7563e..ee8058a 100644 --- a/include/NovaLLM/utils/log.h +++ b/include/NovaLLM/utils/log.h @@ -4,18 +4,19 @@ // - If NOVA_LLM_ENABLE_LOGGING is defined and spdlog is available, use spdlog. // - Otherwise provide a no-op Logger and a minimal spdlog::level::level_enum so callers compile. -#include #include +#include // Prefer spdlog when logging is enabled and available #if defined(NOVA_LLM_ENABLE_LOGGING) && NOVA_LLM_ENABLE_LOGGING && __has_include() #if __has_include() -# include +#include #elif __has_include() -# include +#include #endif #include + #include namespace nova_llm { @@ -27,9 +28,7 @@ class Logger { return instance; } - void init(const std::string& name = "NovaLLM", - const std::string& logFile = "NovaLLM.log", - spdlog::level::level_enum level = spdlog::level::info); + void init(const std::string& name = "NovaLLM", const std::string& logFile = "NovaLLM.log", spdlog::level::level_enum level = spdlog::level::info); void setLevel(spdlog::level::level_enum level) { if (logger_) logger_->set_level(level); @@ -100,9 +99,7 @@ class Logger { return instance; } - void init(const std::string& /*name*/ = "NovaLLM", - const std::string& /*logFile*/ = "NovaLLM.log", - spdlog::level::level_enum /*level*/ = spdlog::level::info); + void init(const std::string& /*name*/ = "NovaLLM", const std::string& /*logFile*/ = "NovaLLM.log", spdlog::level::level_enum /*level*/ = spdlog::level::info); void setLevel(spdlog::level::level_enum /*level*/) {} diff --git a/include/NovaLLM/utils/macros.h b/include/NovaLLM/utils/macros.h index 25f5ad1..5d5c30d 100644 --- a/include/NovaLLM/utils/macros.h +++ b/include/NovaLLM/utils/macros.h @@ -8,31 +8,29 @@ #define NOVA_LLM_VERSION_MINOR 1 #define NOVA_LLM_VERSION_PATCH 0 #define NOVA_LLM_VERSION_STRING "0.1.0" -#define NOVA_LLM_VERSION \ - (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH) +#define NOVA_LLM_VERSION (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH) // For API export and import #if defined(_WIN32) - // When building the library define NOVA_LLM_EXPORTS (set by CMake) - #if defined(NOVA_LLM_EXPORTS) - #define NOVA_LLM_API __declspec(dllexport) - #else - #define NOVA_LLM_API __declspec(dllimport) - #endif +// When building the library define NOVA_LLM_EXPORTS (set by CMake) +#if defined(NOVA_LLM_EXPORTS) +#define NOVA_LLM_API __declspec(dllexport) #else - #define NOVA_LLM_API __attribute__((visibility("default"))) +#define NOVA_LLM_API __declspec(dllimport) +#endif +#else +#define NOVA_LLM_API __attribute__((visibility("default"))) #endif // For debugging and runtime check #ifdef NDEBUG #define ASSERT(condition, message) ((void)0) #else -#define ASSERT(condition, message) \ - do { \ - if (!(condition)) { \ - throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + \ - message); \ - } \ +#define ASSERT(condition, message) \ + do { \ + if (!(condition)) { \ + throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + message); \ + } \ } while (0) #endif diff --git a/scripts/build.sh b/scripts/build.sh old mode 100644 new mode 100755 diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh old mode 100644 new mode 100755 diff --git a/scripts/build_ubuntu.sh b/scripts/build_ubuntu.sh old mode 100644 new mode 100755 diff --git a/source/data/tensor.cpp b/source/data/tensor.cpp index 5f8f5a0..b2f9a31 100644 --- a/source/data/tensor.cpp +++ b/source/data/tensor.cpp @@ -57,15 +57,7 @@ Tensor::Tensor() , m_deleter_(DefaultDeletor()) {} Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType device) - : dims_(dims) - , ele_cnt_(0) - , data_(nullptr) - , capacity_(0) - , m_data_source_(DataSourceType::AUTO) - , m_dtype_(dtype) - , m_device_(device) - , ref_cnt_{nullptr} - , m_deleter_(DefaultDeletor()) { + : dims_(dims), ele_cnt_(0), data_(nullptr), capacity_(0), m_data_source_(DataSourceType::AUTO), m_dtype_(dtype), m_device_(device), ref_cnt_ {nullptr}, m_deleter_(DefaultDeletor()) { // Check if the data type is valid ASSERT(dtype >= DataType::INT8 && dtype < DataType::TOTAL, "Invalid data type"); // Check if the device type is valid @@ -77,9 +69,7 @@ Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType dev this->data_ = buffer.data; this->capacity_ = buffer.size; m_deleter_ = [&](void** data) { - Buffer buffer {static_cast().data)>(*data), - static_cast().size)>(capacity_), - m_device_}; + Buffer buffer {static_cast().data)>(*data), static_cast().size)>(capacity_), m_device_}; buffer_manager.put(buffer); *data = nullptr; }; @@ -87,11 +77,7 @@ Tensor::Tensor(const std::vector& dims, DataType dtype, DeviceType dev *ref_cnt_ = 1; } -Tensor::Tensor(const void* data, - const std::vector& dims, - DataType dtype, - DeviceType device, - Deleter deleter) { +Tensor::Tensor(const void* data, const std::vector& dims, DataType dtype, DeviceType device, Deleter deleter) { ASSERT(nullptr != data, "data cannot be null!"); ASSERT(DataType::UNKNOWN < dtype, "data type must be specified!"); ASSERT(DeviceType::UNKNOWN < device, "device type must be specified!"); @@ -133,7 +119,7 @@ Tensor& Tensor::operator=(const Tensor& other) { m_data_source_ = other.dataFrom(); m_dtype_ = other.dtype(); m_device_ = other.device(); - ref_cnt_ = other.ref_cnt_;//TODO:notice here + ref_cnt_ = other.ref_cnt_; // TODO:notice here m_deleter_ = other.deleter(); } return *this; diff --git a/source/device/device.cpp b/source/device/device.cpp index 7755ea3..749037d 100644 --- a/source/device/device.cpp +++ b/source/device/device.cpp @@ -2,9 +2,7 @@ namespace nova_llm { -bool DeviceTypeFlags::has(DeviceType type) const { - return (flags_ & static_cast(type)) != 0; -} +bool DeviceTypeFlags::has(DeviceType type) const { return (flags_ & static_cast(type)) != 0; } // 添加设备 void DeviceTypeFlags::set(DeviceType type) { flags_ |= static_cast(type); } diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp index 463e5cb..8836e4a 100644 --- a/source/memory/buffer_hub.cpp +++ b/source/memory/buffer_hub.cpp @@ -6,12 +6,26 @@ namespace nova_llm { +// Size class is now header-only with simplified implementation + +namespace { +class DefaultSizeLevelStrategy { + public: + static std::vector byteSizes(); + + static std::vector kiloByteSizes(); + + static std::vector megaByteSizes(); + + static std::vector gigaByteSizes(); +}; + std::vector DefaultSizeLevelStrategy::byteSizes() { std::vector ret; uint32_t base = 64; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(i, 0, 0, 0)); + ret.push_back(Size(i)); // bytes i *= ratio; } return ret; @@ -22,7 +36,7 @@ std::vector DefaultSizeLevelStrategy::kiloByteSizes() { uint32_t base = 4; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(0, i, 0, 0)); + ret.push_back(Size(i * 1024)); // kilobytes to bytes i *= ratio; } return ret; @@ -33,7 +47,7 @@ std::vector DefaultSizeLevelStrategy::megaByteSizes() { uint32_t base = 2; uint32_t ratio = 2; for (uint64_t i = base; i < 1024;) { - ret.push_back(Size(0, 0, i, 0)); + ret.push_back(Size(i * 1024 * 1024)); // megabytes to bytes i *= ratio; } return ret; @@ -44,103 +58,146 @@ std::vector DefaultSizeLevelStrategy::gigaByteSizes() { uint32_t base = 1; uint32_t ratio = 2; for (uint64_t i = base; i < 10;) { - ret.push_back(Size(0, 0, 0, i)); + ret.push_back(Size(i * 1024ULL * 1024 * 1024)); // gigabytes to bytes i *= ratio; } return ret; } +} // namespace + +std::vector LevelAssignStrategy::assignLevels() { + std::vector ret; + ret.insert(ret.end(), DefaultSizeLevelStrategy::byteSizes().begin(), DefaultSizeLevelStrategy::byteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::kiloByteSizes().begin(), DefaultSizeLevelStrategy::kiloByteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::megaByteSizes().begin(), DefaultSizeLevelStrategy::megaByteSizes().end()); + ret.insert(ret.end(), DefaultSizeLevelStrategy::gigaByteSizes().begin(), DefaultSizeLevelStrategy::gigaByteSizes().end()); + return ret; +} + +void BufferHubLevel::initialize(uint32_t index, const Size& block_size, BufferHub* hub) { + index_ = index; + block_size_ = block_size; + hub_ = hub; +} + +size_t BufferHubLevel::busyBlockCount() const { + return busy_map_.size(); +} + +size_t BufferHubLevel::totalBlocks() const { + return block_list_.size(); +} + +BlockRawPtr BufferHubLevel::fetchOneFreeBlock() { + BlockRawPtr ret_block {nullptr}; + + if (free_map_.empty()) { + LOG_INFO("No free block at level %d,refilling...", index_); + auto block_bytes = this->block_size_.totalBytes(); + refill(Size(expand_factor_ * block_bytes)); // allocate expand_factor blocks + } -BlockPtr BufferHub::Level::fetchOneFreeBlock() { - BlockPtr ret_block {nullptr}; - if (!free_map.empty()) { - auto it = free_map.begin(); + if (!free_map_.empty()) { + LOG_INFO("Found free block at level %d", index_); + auto it = free_map_.begin(); auto block_it = it->second; + // Transition from free to busy: increment ref_cnt from 0 to 1 (*block_it)->ref_cnt++; - busy_map.insert({it->first, it->second}); - free_map.erase(it); - ret_block = *block_it; + busy_map_.insert({it->first, it->second}); + free_map_.erase(it); + ret_block = block_it->get(); // Return non-owning pointer } else { - auto level_bytes = level_size.totalBytes(); - - auto kb_level = Size(0, 1, 0, 0); - auto mb_level = Size(0, 0, 1, 0); - auto gb_level = Size(0, 0, 0, 1); - auto kb_bytes = kb_level.totalBytes(); - auto mb_bytes = mb_level.totalBytes(); - auto gb_bytes = gb_level.totalBytes(); - - if (level_bytes < kb_bytes) { - refill(kb_level); - } else if (level_bytes < mb_bytes) { - refill(mb_level); - } else if (level_bytes < gb_bytes) { - refill(gb_level); - } - ret_block = *(free_map.begin()->second); + LOG_WARN("Unable to fetch free block at level %d even after refill", index_); } + return ret_block; } -void BufferHub::Level::refill(const nova_llm::Size& sz) { - auto dst_size = sz.totalBytes(); - auto level_bytes = this->level_size.totalBytes(); - uint64_t cnt = dst_size / level_bytes; +void BufferHubLevel::refill(const nova_llm::Size& dst_sz) { + if (!hub_) return; + auto dst_total_bytes = dst_sz.totalBytes(); + auto block_bytes = this->block_size_.totalBytes(); + uint64_t cnt = dst_total_bytes / block_bytes; - auto* data = this->hub->allocData(dst_size); + // Allocate data per block so that each pointer we free was directly allocated + // Blocks start in the free list with ref_cnt == 0. for (uint64_t i = 0; i < cnt; i++) { - auto* one_block = hub->allocBlock(); - one_block->data = data + i * level_bytes; - one_block->size = level_bytes; - one_block->ref_cnt = 1;// set ref_cnt to 1 when allocated - auto it = this->block_list.insert(this->block_list.end(), one_block); - this->free_map[one_block->data] = it; + auto one_block = hub_->setUpBlock(Size(block_bytes)); + one_block->ref_cnt = 0; // free blocks have ref_cnt == 0 + auto* block_ptr = one_block.get(); + auto it = this->block_list_.insert(this->block_list_.end(), std::move(one_block)); + this->free_map_[block_ptr->data] = it; } } -void BufferHub::Level::putOneBlock(const BlockPtr& block_ptr) { - BlockPtr dst_block(block_ptr); - if (block_list.empty()) { - auto ret_it = block_list.insert(block_list.end(), dst_block); - (*ret_it)->ref_cnt = 0; - free_map.insert({dst_block->data, ret_it}); - } else { - bool in_free_m = free_map.count(dst_block->data); - bool in_busy_m = busy_map.count(dst_block->data); - if (!in_free_m && !in_busy_m) { - auto it = block_list.insert(block_list.end(), dst_block); - (*it)->ref_cnt = 0; - free_map.insert({(*it)->data, it}); - } else if (in_free_m) { - LOG_WARN("Block %p already in block list at level %d", - static_cast(dst_block->data), - index); - } else { // in_busy_m is true - auto& it = busy_map[dst_block->data]; - auto& busy_block = *it; - if (0 == --busy_block->ref_cnt) { - busy_map.erase(busy_block->data); - busy_block->ref_cnt = 0; - free_map[dst_block->data] = it; - } else { - busy_block->ref_cnt--; - } +void BufferHubLevel::putOneBlock(BlockRawPtr block_ptr) { + if (block_ptr == nullptr) { + return; + } + + if (block_list_.empty()) { + LOG_WARN("putOneBlock called on empty block_list at level %d", index_); + return; + } + + bool in_free_m = free_map_.count(block_ptr->data); + bool in_busy_m = busy_map_.count(block_ptr->data); + + if (!in_free_m && !in_busy_m) { + LOG_WARN("Block %p not found in level %d", static_cast(block_ptr->data), index_); + return; + } else if (in_free_m) { + LOG_WARN("Block %p already in free list at level %d", static_cast(block_ptr->data), index_); + } else { // in_busy_m is true + auto it = busy_map_[block_ptr->data]; + auto& busy_block = *it; + // Decrease ref count once; when it reaches zero, move block back to free_map + if (busy_block->ref_cnt > 0) { + busy_block->ref_cnt--; + } + if (busy_block->ref_cnt == 0) { + free_map_[block_ptr->data] = it; // NOTE: Be cautious about the order of operations here + busy_map_.erase(busy_block->data); } } } -BufferHub::Level::~Level() { - free_map.clear(); - busy_map.clear(); - for (auto& block_ptr : block_list) { - hub->tearDownBlock(block_ptr); +bool BufferHubLevel::tryPutBlock(Block::DataPtr data) { + if (busy_map_.count(data)) { + auto block_it = busy_map_[data]; + putOneBlock(block_it->get()); + return true; + } + return false; +} + +BufferHubLevel::~BufferHubLevel() { + free_map_.clear(); + busy_map_.clear(); + // Blocks are automatically cleaned up when unique_ptrs are destroyed + // but we need to manually free the data + for (auto& block_ptr : block_list_) { + if (block_ptr && block_ptr->data && hub_) { + hub_->deallocData(block_ptr->data); + } } + block_list_.clear(); // unique_ptrs will deallocate Block structs +} + +BufferHub::BufferHub() {} + +BufferHub::~BufferHub() { + // Let the map manage BufferHubLevel destruction + buffers_.clear(); + // Clear configuration metadata + size_levels_.clear(); } -BufferHub* BufferHub::Builder::build(const Config& config) { +BufferHub* BufferHub::Builder::build(const BufferHubConfig& config) { auto* hub = new BufferHub; hub->initConfig(config); int index = 0; - for (auto v : config.size_levels) { + for (auto v : config.sizeLevels()) { hub->addSizeLevel(index, v); ++index; } @@ -151,26 +208,23 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) { if (hub && *hub) { // Deleting the BufferHub will call destructors of its members (including Level), // which will in turn call tearDownBlock to free internal allocations. + //(*hub)->~BufferHub(); + delete *hub; *hub = nullptr; } } -void BufferHub::initConfig(const Config& config) { - device_type_ = config.device_type; - size_levels_ = config.size_levels; - // sort size levels in ascending order - std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { - return a.totalBytes() < b.totalBytes(); - }); - size_limit_ = config.size_limit; - warning_level_ = config.warning_level; - allocator_ = config.allocator; +void BufferHub::initConfig(const BufferHubConfig& config) { + device_type_ = config.deviceType(); + this->size_levels_ = config.sizeLevels(); + std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); }); + this->size_limit_ = config.sizeLimit(); + this->warning_level_ = config.warningLevel(); + this->allocator_ = config.allocator(); } -Block::DataPtr BufferHub::allocData(uint64_t sz) { - return static_cast(this->allocator_->allocate(sz)); -} +Block::DataPtr BufferHub::allocData(uint64_t sz) { return static_cast(this->allocator_->allocate(sz)); } void BufferHub::deallocData(Block::DataPtr& data_ptr) { if (data_ptr) { @@ -180,13 +234,14 @@ void BufferHub::deallocData(Block::DataPtr& data_ptr) { } BlockPtr BufferHub::allocBlock() { - return static_cast(this->allocator_->allocate(sizeof(Block))); + auto* raw_ptr = static_cast(this->allocator_->allocate(sizeof(Block))); + return BlockPtr(raw_ptr); } -void BufferHub::deallocateBlock(BlockPtr& block_ptr) { - if (block_ptr) { - this->allocator_->deallocate(block_ptr); - block_ptr = nullptr; +void BufferHub::deallocateBlock(BlockPtr block) { + if (block) { + Block* raw = block.release(); + this->allocator_->deallocate(raw); } } @@ -198,49 +253,62 @@ BlockPtr BufferHub::setUpBlock(const Size& sz) { return block; } -void BufferHub::tearDownBlock(BlockPtr& block) { +void BufferHub::tearDownBlock(BlockPtr block) { if (block) { deallocData(block->data); block->size = 0; block->ref_cnt = 0; - deallocateBlock(block); + deallocateBlock(std::move(block)); } } -void BufferHub::addSizeLevel(uint32_t index, const Size& level_sz) { - auto& level = buffers_[level_sz]; - level.level_size = level_sz; - level.index = index; - level.hub = this; +void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) { + std::lock_guard lock(mutex_); + + auto& level = buffers_[level_block_sz]; + level->initialize(index, level_block_sz, this); } void BufferHub::eraseSizeLevel(const Size& level_sz) { - // NOTE:cautious,make sure the size level is not in use - if (buffers_.count(level_sz)) { - if (buffers_[level_sz].busy_map.empty()) { - auto& level = buffers_[level_sz]; - level.~Level(); - } else { - LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later", - level_sz.totalBytes()); - } - } else { - LOG_WARN("Level with size %d is not found!", level_sz.totalBytes()); // TODO:optimize + std::lock_guard lock(mutex_); + + auto it = buffers_.find(level_sz); + if (it == buffers_.end()) { + LOG_WARN("Level with size %llu is not found!", level_sz.totalBytes()); + return; + } + + auto& level = it->second; + if (level->busyBlockCount() > 0) { + LOG_ERROR("Level with size %llu has %zu busy blocks, cannot erase now", + level_sz.totalBytes(), level->busyBlockCount()); + return; } + + // Free all blocks in the block_list before erasing + // The destructor will be called, but let's be explicit about cleanup + LOG_INFO("Erasing level with size %llu, freeing %zu blocks", + level_sz.totalBytes(), level->totalBlocks()); + + // Erasing from the map will call BufferHubLevel destructor, + // which properly frees all blocks via tearDownBlock + buffers_.erase(it); } -BlockPtr BufferHub::getBlock(const Size& sz){ +BlockRawPtr BufferHub::getBlock(const Size& sz) { + std::lock_guard lock(mutex_); + // round it to ceil level auto level_sz = gradeLevel(sz); if (!level_sz.isValid()) { return nullptr; } // search the block list - BlockPtr ret_block {nullptr}; + BlockRawPtr ret_block {nullptr}; if (buffers_.count(level_sz)) { auto& level = buffers_[level_sz]; - auto block = level.fetchOneFreeBlock(); - if (block->isValid()) { + auto block = level->fetchOneFreeBlock(); + if (block && block->isValid()) { ret_block = block; } } @@ -250,18 +318,26 @@ BlockPtr BufferHub::getBlock(const Size& sz){ return ret_block; } -void BufferHub::putBlock(const BlockPtr& block_ptr) { +void BufferHub::putBlock(BlockRawPtr block_ptr) { + if (!block_ptr) { + return; + } + + std::lock_guard lock(mutex_); + auto size = block_ptr->size; Size level_size(size); if (buffers_.count(level_size)) { auto& level = buffers_[level_size]; - level.putOneBlock(block_ptr); + level->putOneBlock(block_ptr); } else { - LOG_ERROR("Level with size %d is not found!", level_size.totalBytes()); + LOG_ERROR("Level size %d is not found in buffers!", level_size.totalBytes()); } } -void BufferHub::putBlockFromBuffer(const Buffer& buffer) { +void BufferHub::putBlockFromBuffer(Buffer& buffer) { + std::lock_guard lock(mutex_); + if (0 == buffer.size || nullptr == buffer.data) { return; } @@ -269,30 +345,38 @@ void BufferHub::putBlockFromBuffer(const Buffer& buffer) { if (buffers_.count(level_sz)) { auto& level = buffers_[level_sz]; auto* data = static_cast(buffer.data); - if (level.busy_map.count(data)) { - auto block_it = level.busy_map[data]; - level.putOneBlock(*block_it); + + if (!level->tryPutBlock(data)) { + // Maybe log warning if data was expected to be there? + // But original code just did nothing if not found in busy_map. + // Actually original code: if (level.busy_map.count(data)) { ... } } + } else { LOG_ERROR("Level with size %d cannot be found in this memory hub", level_sz.totalBytes()); } + + // Clear the Buffer to avoid dangling pointers for callers. + buffer.data = nullptr; + buffer.size = 0; } +// TODO: optim the level selection algorithm Size BufferHub::gradeLevel(const Size& sz) const { Size ret; uint32_t level_index = 0; size_t i = 0; - for (; i < size_levels_.size(); i++) { - if (sz.totalBytes() < size_levels_[i].totalBytes()) { + for (; i < this->size_levels_.size(); i++) { + if (sz.totalBytes() <= this->size_levels_[i].totalBytes()) { level_index = i; break; } } - if (size_levels_.size() == i) { + if (this->size_levels_.size() == i) { LOG_ERROR("Cannot grade to current levels for size %d", sz.totalBytes()); return Size {}; } return size_levels_[level_index]; } -} // namespace nova_llm \ No newline at end of file +} // namespace nova_llm diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp index 2d18350..7790c74 100644 --- a/source/memory/buffer_manager.cpp +++ b/source/memory/buffer_manager.cpp @@ -1,9 +1,10 @@ #include "NovaLLM/memory/buffer_manager.h" #include "NovaLLM/memory/allocator.h" +#include "NovaLLM/memory/buffer_hub.h" #include "NovaLLM/utils/log.h" #include "NovaLLM/utils/macros.h" -#include "NovaLLM/memory/buffer_hub.h" +// Disable C4251 warning on Windows (DLL interface for STL containers) namespace nova_llm { @@ -28,24 +29,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) { } bool ret = false; if (config.device_flags.has(DeviceType::CPU)) { - BufferHub::Config cfg; - cfg.allocator = config.cpu.alloc; - - auto byte_sizes = DefaultSizeLevelStrategy::byteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), byte_sizes.begin(), byte_sizes.end()); - // for size below 1kb - auto kilobyte_sizes = DefaultSizeLevelStrategy::kiloByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), kilobyte_sizes.begin(), kilobyte_sizes.end()); - // for size below 1mb - auto mb_sizes = DefaultSizeLevelStrategy::megaByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), mb_sizes.begin(), mb_sizes.end()); - // for size below 1gb - auto gb_sizes = DefaultSizeLevelStrategy::gigaByteSizes(); - cfg.size_levels.insert(cfg.size_levels.end(), gb_sizes.begin(), gb_sizes.end()); - - cfg.size_limit = Size(0, 0, 0, 4); - cfg.warning_level = 0.95; - + BufferHubConfig cfg(DeviceType::CPU, config.cpu.alloc, Size(4UL*1024*1024*1024)); buffer_hubs_[DeviceType::CPU] = BufferHub::Builder::build(cfg); ret |= true; } @@ -54,7 +38,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) { return ret; } -void BufferManager::put(const Buffer &buffer) { +void BufferManager::put(Buffer &buffer) { if (nullptr == buffer.data || 0 == buffer.size) { return; } @@ -74,14 +58,13 @@ Buffer BufferManager::fetch(size_t size, DeviceType device_type) { return buffer; } -BufferManager::~BufferManager() { - destroy(); -} +BufferManager::~BufferManager() { destroy(); } void BufferManager::destroy() { - for (auto p : buffer_hubs_) { + for (auto& p : buffer_hubs_) { BufferHub::Builder::destroy(&(p.second)); } + buffer_hubs_.clear(); is_init_ = false; } diff --git a/source/memory/cpu_allocator.cpp b/source/memory/cpu_allocator.cpp index d60e9ff..7a3eff7 100644 --- a/source/memory/cpu_allocator.cpp +++ b/source/memory/cpu_allocator.cpp @@ -1,7 +1,7 @@ -#include "NovaLLM/memory/allocator.h" - #include +#include "NovaLLM/memory/allocator.h" + namespace nova_llm { diff --git a/source/memory/gpu_allocator.cpp b/source/memory/gpu_allocator.cpp index 6422ec3..6249c0f 100644 --- a/source/memory/gpu_allocator.cpp +++ b/source/memory/gpu_allocator.cpp @@ -16,9 +16,7 @@ void* CUDAAllocator::do_allocate(size_t size) { return ptr; } -void CUDAAllocator::do_deallocate(void* ptr) { - cudaFree(ptr); -} +void CUDAAllocator::do_deallocate(void* ptr) { cudaFree(ptr); } -} +} // namespace nova_llm #endif \ No newline at end of file diff --git a/source/utils/log.cpp b/source/utils/log.cpp index f07efc3..e82610b 100644 --- a/source/utils/log.cpp +++ b/source/utils/log.cpp @@ -10,9 +10,7 @@ namespace nova_llm { -void Logger::init(const std::string& name, - const std::string& logFile, - spdlog::level::level_enum level) { +void Logger::init(const std::string& name, const std::string& logFile, spdlog::level::level_enum level) { try { // Refer to // https://github.com/gabime/spdlog?tab=readme-ov-file#logger-with-multi-sinks---each-with-a-different-format-and-log-level @@ -25,8 +23,7 @@ void Logger::init(const std::string& name, console_sink->set_pattern(pattern_str); // Use set_pattern // Create file sink - auto file_sink = - std::make_shared(logFile, 1024 * 1024 * 5, 3); + auto file_sink = std::make_shared(logFile, 1024 * 1024 * 5, 3); file_sink->set_level(level); file_sink->set_pattern(pattern_str); // Use set_pattern diff --git a/standalone/source/main.cpp b/standalone/source/main.cpp index 3f83589..8eda34f 100644 --- a/standalone/source/main.cpp +++ b/standalone/source/main.cpp @@ -1,14 +1,15 @@ #include + #include #include #include #include auto main(int argc, char** argv) -> int { - int arg_num=argc; - std::cout<<"arg num:"< +#include +#include +#include +#include + using namespace nova_llm; class CPUBufferHubTest : public ::testing::Test { -public: - BufferHub* getBufferHub(){ - return buffer_hub_; - } + public: + BufferHub* getBufferHub() { return buffer_hub_; } -protected: - void SetUp() override { - BufferHub::Config config; - //set config - config.device_type = DeviceType::CPU; - config.size_levels=std::vector{Size(0, 0, 0, 1)}; - config.allocator = std::make_shared(); + protected: + void SetUp() override { + BufferHubConfig config(DeviceType::CPU, std::make_shared(), Size(4ULL * 1024 * 1024 * 1024)); + buffer_hub_ = BufferHub::Builder::build(config); + } - buffer_hub_ = BufferHub::Builder::build(config); - } + void TearDown() override { BufferHub::Builder::destroy(&buffer_hub_); } - void TearDown() override { - BufferHub::Builder::destroy(&buffer_hub_); - } - BufferHub* buffer_hub_; + BufferHub* buffer_hub_; }; -TEST_F(CPUBufferHubTest, Init) { - EXPECT_NE(getBufferHub(), nullptr); -} +TEST_F(CPUBufferHubTest, Init) { EXPECT_NE(getBufferHub(), nullptr); } TEST_F(CPUBufferHubTest, GetBlock) { - auto *block = getBufferHub()->getBlock(Size(1024)); + auto* block = getBufferHub()->getBlock(Size(1024)); - EXPECT_NE(block, nullptr); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); - getBufferHub()->putBlock(block); + getBufferHub()->putBlock(block); } TEST_F(CPUBufferHubTest, PutBlock) { - auto* block = getBufferHub()->getBlock(Size(1024)); + auto* block = getBufferHub()->getBlock(Size(1024)); - EXPECT_NE(block, nullptr); - EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); - getBufferHub()->putBlock(block); + // Return the block to the pool; block remains valid but is marked free + getBufferHub()->putBlock(block); - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); -} + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 0); // ref count reset when returned to pool + // Fetch another block of the same size and ensure we get a (possibly reused) block + auto* block2 = getBufferHub()->getBlock(Size(1024)); + EXPECT_NE(block2, nullptr); + EXPECT_NE(block2->data, nullptr); + EXPECT_GE(block2->size, 1024); + EXPECT_EQ(block2->ref_cnt, 1); +} TEST_F(CPUBufferHubTest, PutBlockFromBuffer) { - auto* block = getBufferHub()->getBlock(Size(1024)); - - EXPECT_NE(block, nullptr); - EXPECT_NE(block->data, nullptr); - EXPECT_EQ(block->size, 1024); - EXPECT_EQ(block->ref_cnt, 1); - - Buffer buffer; - buffer.data = block->data; - buffer.size = block->size; - buffer.device_type = DeviceType::CPU; - getBufferHub()->putBlockFromBuffer(buffer); - - EXPECT_EQ(block->data, nullptr); - EXPECT_EQ(block->size, 0); - EXPECT_EQ(block->ref_cnt, 0); - - EXPECT_EQ(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 0); -} \ No newline at end of file + auto* block = getBufferHub()->getBlock(Size(1024)); + + EXPECT_NE(block, nullptr); + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 1024); + EXPECT_EQ(block->ref_cnt, 1); + + Buffer buffer; + buffer.data = block->data; + buffer.size = block->size; + buffer.device_type = DeviceType::CPU; + getBufferHub()->putBlockFromBuffer(buffer); + + // After returning via Buffer, the underlying block should be returned to the pool. + // The Buffer should be cleared to avoid dangling pointers. + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); +} + +// Concurrent access tests +TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) { + constexpr int num_threads = 10; + constexpr int num_levels_per_thread = 5; + std::vector threads; + std::atomic success_count {0}; + + // Each thread adds multiple size levels + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &success_count, num_levels_per_thread=num_levels_per_thread]() { + for (int i = 0; i < num_levels_per_thread; ++i) { + // Create unique sizes for each thread to avoid conflicts + uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100); // 100MB+ + Size level_size(size_bytes); + uint32_t index = t * num_levels_per_thread + i + 1000; + + getBufferHub()->addSizeLevel(index, level_size); + success_count++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify all additions succeeded + EXPECT_EQ(success_count.load(), num_threads * num_levels_per_thread); +} + +TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) { + const int num_threads = 8; + std::vector threads; + std::vector sizes_to_add; + + // Pre-populate with size levels + for (int i = 0; i < num_threads * 2; ++i) { + uint64_t size_bytes = (1 << 20) * (i + 200); // 200MB+ + Size level_size(size_bytes); + sizes_to_add.push_back(level_size); + getBufferHub()->addSizeLevel(2000 + i, level_size); + } + + std::atomic erase_attempts {0}; + + // Each thread attempts to erase different size levels concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &sizes_to_add, &erase_attempts]() { + // Each thread erases 2 levels + for (int i = 0; i < 2; ++i) { + int idx = t * 2 + i; + getBufferHub()->eraseSizeLevel(sizes_to_add[idx]); + erase_attempts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(erase_attempts.load(), num_threads * 2); +} + +TEST_F(CPUBufferHubTest, ConcurrentGetBlock) { + constexpr int num_threads = 20; + constexpr int blocks_per_thread = 5; + std::vector threads; + std::vector> thread_blocks(num_threads); + std::atomic successful_gets {0}; + + // Multiple threads requesting blocks of the same size concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_blocks, &successful_gets, blocks_per_thread=blocks_per_thread]() { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(4096)); // 4KB blocks + if (block != nullptr && block->data != nullptr) { + thread_blocks[t].push_back(block); + successful_gets++; + + // Verify block properties + EXPECT_NE(block->data, nullptr); + EXPECT_GE(block->size, 4096); + EXPECT_EQ(block->ref_cnt, 1); + } + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify we got the expected number of blocks + EXPECT_EQ(successful_gets.load(), num_threads * blocks_per_thread); + + // Verify all blocks have unique data pointers (no double allocation) + std::vector all_data_ptrs; + for (const auto& blocks : thread_blocks) { + for (const auto& block : blocks) { + all_data_ptrs.push_back(block->data); + } + } + std::sort(all_data_ptrs.begin(), all_data_ptrs.end()); + auto last = std::unique(all_data_ptrs.begin(), all_data_ptrs.end()); + EXPECT_EQ(last - all_data_ptrs.begin(), num_threads * blocks_per_thread); + + // Clean up - return all blocks + for (auto& blocks : thread_blocks) { + for (auto* block : blocks) { + getBufferHub()->putBlock(block); + } + } +} + +TEST_F(CPUBufferHubTest, ConcurrentPutBlock) { + const int num_threads = 15; + const int blocks_per_thread = 4; + std::vector threads; + std::vector> thread_blocks(num_threads); + + // First, get blocks in a single-threaded manner + for (int t = 0; t < num_threads; ++t) { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(2048)); // 2KB blocks + ASSERT_NE(block, nullptr); + thread_blocks[t].push_back(block); + } + } + + std::atomic successful_puts {0}; + + // Now return blocks concurrently from multiple threads + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_blocks, &successful_puts]() { + for (auto* block : thread_blocks[t]) { + EXPECT_EQ(block->ref_cnt, 1); + getBufferHub()->putBlock(block); + successful_puts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); + + // Verify blocks are returned properly by checking ref_cnt + for (const auto& blocks : thread_blocks) { + for (const auto* block : blocks) { + EXPECT_EQ(block->ref_cnt, 0); + } + } +} + +TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) { + const int num_threads = 12; + const int blocks_per_thread = 3; + std::vector threads; + std::vector> thread_buffers(num_threads); + + // First, get blocks and create buffers in a single-threaded manner + for (int t = 0; t < num_threads; ++t) { + for (int i = 0; i < blocks_per_thread; ++i) { + auto* block = getBufferHub()->getBlock(Size(8192)); // 8KB blocks + ASSERT_NE(block, nullptr); + + Buffer buffer; + buffer.data = block->data; + buffer.size = block->size; + buffer.device_type = DeviceType::CPU; + thread_buffers[t].push_back(buffer); + } + } + + std::atomic successful_puts {0}; + + // Now return buffers concurrently from multiple threads + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &thread_buffers, &successful_puts]() { + for (auto& buffer : thread_buffers[t]) { + EXPECT_NE(buffer.data, nullptr); + EXPECT_NE(buffer.size, 0); + + getBufferHub()->putBlockFromBuffer(buffer); + + // Verify buffer was cleared + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); + + successful_puts++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread); +} + +// Mixed concurrent operations test +TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) { + const int num_threads = 16; + std::vector threads; + std::atomic total_operations {0}; + + // Mix of get and put operations happening concurrently + for (int t = 0; t < num_threads; ++t) { + threads.emplace_back([this, t, &total_operations]() { + std::vector blocks; + + // Perform alternating get and put operations + for (int i = 0; i < 10; ++i) { + // Get a block + auto* block = getBufferHub()->getBlock(Size(1024 * (t % 4 + 1))); // Varying sizes + if (block != nullptr) { + EXPECT_NE(block->data, nullptr); + EXPECT_EQ(block->ref_cnt, 1); + blocks.push_back(block); + total_operations++; + } + + // Return a previously acquired block if we have any + if (!blocks.empty() && i % 3 == 0) { + auto* return_block = blocks.back(); + blocks.pop_back(); + getBufferHub()->putBlock(return_block); + // Note: Don't check ref_cnt here as it's being modified concurrently + total_operations++; + } + } + + // Clean up remaining blocks + for (auto* block : blocks) { + getBufferHub()->putBlock(block); + total_operations++; + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + // Verify operations completed + EXPECT_GT(total_operations.load(), 0); +} diff --git a/test/source/buffer_manager_test.cpp b/test/source/buffer_manager_test.cpp index 280606f..be22c98 100644 --- a/test/source/buffer_manager_test.cpp +++ b/test/source/buffer_manager_test.cpp @@ -5,49 +5,47 @@ using namespace nova_llm; class BufferManagerTest : public ::testing::Test { -protected: - void SetUp() override { - BufferManager::Config config; - //set config - config.device_flags.set(DeviceType::CPU); - config.cpu.alloc = std::make_shared(); - #if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON - config.device_flags.set(DeviceType::CUDA); - config.gpu.alloc = std::make_shared(); - #endif - - BufferManager::Builder::build(config); - } - - void TearDown() override { - BufferManager::Builder::getInstance().~BufferManager(); - } + protected: + void SetUp() override { + BufferManager::Config config; + // set config + config.device_flags.set(DeviceType::CPU); + config.cpu.alloc = std::make_shared(); +#if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON + config.device_flags.set(DeviceType::CUDA); + config.gpu.alloc = std::make_shared(); +#endif + + BufferManager::Builder::build(config); + } + + void TearDown() override { BufferManager::Builder::getInstance().destroy(); } }; -TEST(BufferManagerTest, Init) { - auto& buffer_manager = BufferManager::Builder::getInstance(); - EXPECT_TRUE(buffer_manager.isInited()); +TEST_F(BufferManagerTest, Init) { + auto& buffer_manager = BufferManager::Builder::getInstance(); + EXPECT_TRUE(buffer_manager.isInited()); } -TEST(BufferManagerTest, FetchCpu) { - auto& buffer_manager = BufferManager::Builder::getInstance(); +TEST_F(BufferManagerTest, FetchCpu) { + auto& buffer_manager = BufferManager::Builder::getInstance(); - auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); + auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); - EXPECT_NE(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 1024); - EXPECT_EQ(buffer.device_type, DeviceType::CPU); + EXPECT_NE(buffer.data, nullptr); + EXPECT_GE(buffer.size, 1024); // Size should be at least requested (may be rounded up to next level) + EXPECT_EQ(buffer.device_type, DeviceType::CPU); - buffer_manager.put(buffer); + buffer_manager.put(buffer); } -TEST(BufferManagerTest, PutCpu) { - auto& buffer_manager = BufferManager::Builder::getInstance(); +TEST_F(BufferManagerTest, PutCpu) { + auto& buffer_manager = BufferManager::Builder::getInstance(); - auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); + auto buffer = buffer_manager.fetch(1024, DeviceType::CPU); - buffer_manager.put(buffer); - EXPECT_EQ(buffer.data, nullptr); - EXPECT_EQ(buffer.size, 0); - EXPECT_EQ(buffer.device_type, DeviceType::CPU); -} \ No newline at end of file + buffer_manager.put(buffer); + EXPECT_EQ(buffer.data, nullptr); + EXPECT_EQ(buffer.size, 0); + EXPECT_EQ(buffer.device_type, DeviceType::CPU); +} diff --git a/test/source/tensor_test.cpp b/test/source/tensor_test.cpp index c03e51a..189eeac 100644 --- a/test/source/tensor_test.cpp +++ b/test/source/tensor_test.cpp @@ -1,4 +1,5 @@ #include "NovaLLM/data/tensor.h" +#include "NovaLLM/memory/buffer_manager.h" #include @@ -7,11 +8,17 @@ using namespace nova_llm; class TensorTest : public ::testing::Test { protected: void SetUp() override { - // 测试前的设置 + BufferManager::Config config; + // set config + config.device_flags.set(DeviceType::CPU); + config.cpu.alloc = std::make_shared(); + // Use a smaller size for testing if needed, or default 4GB + // But since BufferManager is a singleton, we must ensure it's initialized. + BufferManager::Builder::build(config); } void TearDown() override { - // 测试后的清理 + BufferManager::Builder::getInstance().destroy(); } }; @@ -38,11 +45,12 @@ TEST_F(TensorTest, ConstructWithDims) { // 测试非法维度 TEST_F(TensorTest, InvalidDimensions) { std::vector empty_dims; - EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), - std::invalid_argument); + // ASSERT macro throws std::runtime_error + EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error); std::vector zero_dims = {2, 0, 4}; - EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument); + // ASSERT macro throws std::runtime_error + EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error); } // 测试拷贝构造 @@ -77,4 +85,4 @@ TEST_F(TensorTest, MemoryAllocation) { EXPECT_NE(tensor.data(), nullptr); EXPECT_EQ(tensor.totalElements(), 6); -} \ No newline at end of file +}