diff --git a/.clang-format b/.clang-format
index 4f09886..5ce9ead 100644
--- a/.clang-format
+++ b/.clang-format
@@ -10,7 +10,7 @@ SeparateDefinitionBlocks: Always
 # 基础缩进配置
 IndentWidth: 2
 ContinuationIndentWidth: 4
-ColumnLimit: 100
+ColumnLimit: 200
 
 # 函数参数和初始化列表格式
 BinPackArguments: false
diff --git a/.githooks/check_branch_name.py b/.githooks/check_branch_name.py
new file mode 100755
index 0000000..6ed5aa8
--- /dev/null
+++ b/.githooks/check_branch_name.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""
+Branch name validator for NovaLLM
+Works on all platforms (Windows, macOS, Linux)
+"""
+import re
+import subprocess
+import sys
+
+
+def get_current_branch():
+    """Get the current git branch name."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout.strip()
+    except subprocess.CalledProcessError:
+        return None
+
+
+def is_protected_branch(branch_name):
+    """Check if branch is protected (doesn't need validation)."""
+    protected_patterns = [
+        r"^main$",
+        r"^master$",
+        r"^develop$",
+        r"^release/.+",
+        r"^hotfix/.+",
+        r"^HEAD$",  # Detached HEAD
+    ]
+    return any(re.match(pattern, branch_name) for pattern in protected_patterns)
+
+
+def validate_branch_name(branch_name):
+    """
+    Validate branch name against the required format.
+    Format: <type>-<description> or <type>/<description>
+    """
+    # Pattern: type followed by - or / followed by lowercase alphanumeric
+    valid_types = [
+        "feat",
+        "fix",
+        "docs",
+        "style",
+        "refactor",
+        "perf",
+        "test",
+        "build",
+        "ci",
+        "chore",
+    ]
+    pattern = rf"^({'|'.join(valid_types)})([-/])[a-z0-9_-]+$"
+    return re.match(pattern, branch_name) is not None
+
+
+def print_error_message(branch_name):
+    """Print helpful error message for invalid branch names."""
+    error_msg = f"""
+{'='*70}
+              ❌ INVALID BRANCH NAME
+{'='*70}
+
+Branch: {branch_name}
+
+Branch names must follow this format:
+  <type>-<description>  or  <type>/<description>
+
+Valid types:
+  • feat      - New feature
+  • fix       - Bug fix
+  • docs      - Documentation changes
+  • style     - Code style changes
+  • refactor  - Code refactoring
+  • perf      - Performance improvements
+  • test      - Test changes
+  • build     - Build system changes
+  • ci        - CI/CD changes
+  • chore     - Other changes
+
+✅ Valid examples:
+  feat-buffer-pooling
+  fix-windows-dll-exports
+  docs-update-readme
+  refactor/simplify-tensor-allocation
+  ci-add-coverage-reporting
+
+❌ Current branch: {branch_name}
+
+To fix this, rename your branch:
+  git branch -m {branch_name} <type>-<proper-description>
+
+Or delete and recreate:
+  git checkout main
+  git branch -D {branch_name}
+  git checkout -b <type>-<proper-description>
+
+{'='*70}
+"""
+    print(error_msg, file=sys.stderr)
+
+
+def main():
+    """Main entry point for branch name validation."""
+    branch_name = get_current_branch()
+
+    if not branch_name:
+        # Can't determine branch, skip validation
+        sys.exit(0)
+
+    # Skip protected branches
+    if is_protected_branch(branch_name):
+        sys.exit(0)
+
+    # Validate branch name
+    if not validate_branch_name(branch_name):
+        print_error_message(branch_name)
+        sys.exit(1)
+
+    # Branch name is valid
+    print(f"✅ Branch name '{branch_name}' is valid")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.githooks/install.sh b/.githooks/install.sh
new file mode 100755
index 0000000..44b44d4
--- /dev/null
+++ b/.githooks/install.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Install git hooks for NovaLLM
+# This script sets up both pre-commit hooks and custom git hooks
+
+set -euo pipefail
+
+echo "🔧 Installing NovaLLM Git Hooks..."
+echo ""
+
+# Get the repository root
+REPO_ROOT=$(git rev-parse --show-toplevel)
+HOOKS_DIR="$REPO_ROOT/.githooks"
+
+# 1. Configure git to use custom hooks directory
+echo "📁 Configuring git to use custom hooks directory..."
+git config core.hooksPath "$HOOKS_DIR"
+echo "   ✅ Git hooks path set to: $HOOKS_DIR"
+echo ""
+
+# 2. Install pre-commit hooks
+if command -v pre-commit &> /dev/null; then
+  echo "📦 Installing pre-commit hooks..."
+  pre-commit install --hook-type commit-msg --hook-type pre-commit
+  echo "   ✅ Pre-commit hooks installed"
+else
+  echo "⚠️  pre-commit not found. Install it with:"
+  echo "   pip install pre-commit"
+  echo "   Then run: pre-commit install --hook-type commit-msg --hook-type pre-commit"
+fi
+echo ""
+
+# 3. Make all hook scripts executable
+echo "🔐 Making hook scripts executable..."
+chmod +x "$HOOKS_DIR"/* 2>/dev/null || true
+echo "   ✅ Hook scripts are executable"
+echo ""
+
+# 4. Test branch name validation (if on a feature branch)
+CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+echo "📋 Current branch: $CURRENT_BRANCH"
+
+# Summary
+cat <<EOF
+
+╔════════════════════════════════════════════════════════════════╗
+║                  ✅ HOOKS INSTALLED SUCCESSFULLY               ║
+╚════════════════════════════════════════════════════════════════╝
+
+Installed hooks:
+  ✓ post-checkout     - Validates branch names
+  ✓ commit-msg        - Validates commit messages
+  ✓ pre-commit        - Code quality checks
+
+Branch name format:
+  <type>-<description> or <type>/<description>
+  Example: feat-buffer-pooling, fix-memory-leak
+
+Commit message format:
+  <type>(<scope>): <subject>
+  Example: feat(memory): add buffer pooling
+
+Valid types:
+  feat, fix, docs, style, refactor, perf, test, build, ci, chore
+
+Try it out:
+  git checkout -b feat-test-branch
+  git commit -m "feat(test): try the hooks"
+
+For more info, see: .pre-commit-setup.md
+
+EOF
diff --git a/.githooks/post-checkout b/.githooks/post-checkout
new file mode 100755
index 0000000..e36439d
--- /dev/null
+++ b/.githooks/post-checkout
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Post-checkout hook to validate branch names
+# This hook runs after 'git checkout' or 'git switch'
+
+set -euo pipefail
+
+# Get the previous HEAD, new HEAD, and checkout type
+PREV_HEAD=$1
+NEW_HEAD=$2
+CHECKOUT_TYPE=$3  # 1 = branch checkout, 0 = file checkout
+
+# Only validate on branch checkouts
+if [ "$CHECKOUT_TYPE" != "1" ]; then
+  exit 0
+fi
+
+# Get the current branch name
+BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
+
+# Skip validation for special branches
+if [[ "$BRANCH_NAME" =~ ^(main|master|develop|release/.+|hotfix/.+)$ ]]; then
+  exit 0
+fi
+
+# Skip validation if we're in detached HEAD state
+if [ "$BRANCH_NAME" = "HEAD" ]; then
+  exit 0
+fi
+
+# Define the valid branch name pattern
+# Format: <type>-<description> or <type>/<description>
+# Where type is: feat, fix, docs, style, refactor, perf, test, build, ci, chore
+VALID_PATTERN="^(feat|fix|docs|style|refactor|perf|test|build|ci|chore)([-/])[a-z0-9_-]+$"
+
+# Check if branch name matches the pattern
+if [[ ! "$BRANCH_NAME" =~ $VALID_PATTERN ]]; then
+  cat <<EOF
+╔════════════════════════════════════════════════════════════════╗
+║                  ❌ INVALID BRANCH NAME                        ║
+╚════════════════════════════════════════════════════════════════╝
+
+Branch: $BRANCH_NAME
+
+Branch names must follow this format:
+  <type>-<description>  or  <type>/<description>
+
+Valid types:
+  • feat      - New feature
+  • fix       - Bug fix
+  • docs      - Documentation changes
+  • style     - Code style changes
+  • refactor  - Code refactoring
+  • perf      - Performance improvements
+  • test      - Test changes
+  • build     - Build system changes
+  • ci        - CI/CD changes
+  • chore     - Other changes
+
+Examples:
+  ✅ feat-buffer-pooling
+  ✅ fix-windows-dll-exports
+  ✅ docs-update-readme
+  ✅ refactor/simplify-tensor-allocation
+  ✅ ci-add-coverage-reporting
+
+Current branch: ❌ $BRANCH_NAME
+
+To fix this, rename your branch:
+  git branch -m $BRANCH_NAME <type>-<proper-description>
+
+Or delete and recreate:
+  git checkout main
+  git branch -D $BRANCH_NAME
+  git checkout -b <type>-<proper-description>
+
+EOF
+  exit 1
+fi
+
+# Branch name is valid
+echo "✅ Branch name '$BRANCH_NAME' is valid"
+exit 0
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index a4ab433..61e2348 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -57,10 +57,25 @@ jobs:
             cmake -S .. -B . -DCMAKE_BUILD_TYPE=Release -DNOVA_LLM_BUILD_TESTS=OFF -DNOVA_LLM_ENABLE_LOGGING=OFF
           fi
 
+      - name: Install Doxygen
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y doxygen graphviz
+
       - name: Build documentation
         run: |
           cd build
-          cmake --build . --target docs || true
+          cmake --build . --target docs
+
+      - name: Check documentation output
+        run: |
+          if [ ! -d "docs/html" ]; then
+            echo "Error: Documentation not generated at docs/html"
+            ls -la docs/ || echo "docs/ directory not found"
+            exit 1
+          fi
+          echo "Documentation generated successfully"
+          ls -la docs/html/
 
       - name: Setup Pages
         uses: actions/configure-pages@v4
@@ -68,7 +83,7 @@ jobs:
       - name: Upload artifact
         uses: actions/upload-pages-artifact@v3
         with:
-          path: build/docs/html
+          path: docs/html
 
       - name: Deploy to GitHub Pages
         id: deployment
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 2158326..849b362 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -132,6 +132,95 @@ jobs:
           cmake --build . --config Debug
           # 可选：ctest --output-on-failure
 
+  run_coverage:
+    name: Build & Run Tests with Coverage
+    runs-on: ubuntu-latest
+    needs: deps_build_debug
+    steps:
+      - *step_checkout
+      - *step_setup_python
+      - *step_install_conan
+      - *step_detect_profile
+      - name: Install lcov
+        run: sudo apt-get update && sudo apt-get install -y lcov
+      - name: Restore Conan cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.conan2
+          key: ${{ runner.os }}-conan-debug-${{ hashFiles('**/conanfile.py', '**/standalone/conanfile.txt', 'source/**', 'include/**') }}
+      - name: Build and run tests with coverage
+        run: |
+          mkdir -p build-coverage
+          cd build-coverage
+          conan install ../test --output-folder=conan --build=missing -s build_type=Debug
+          TOOLCHAIN_FILE=$(find $(pwd) -name "conan_toolchain.cmake" -type f | head -1)
+          cmake -S ../test -B . \
+            -DCMAKE_BUILD_TYPE=Debug \
+            -DENABLE_TEST_COVERAGE=ON \
+            -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE"
+          cmake --build . --config Debug
+
+          # Create empty coverage files to ensure they exist
+          find . -name "*.gcda" -exec rm {} \; 2>/dev/null || true
+          find . -name "*.gcno" -exec touch {} \; 2>/dev/null || true
+
+          # Run tests one by one to ensure proper execution and output
+          echo "Running tests individually..."
+          for test_file in $(find . -name "NovaLLMTests*" -type f -executable); do
+            echo "Running $test_file..."
+            $test_file --gtest_output=xml:test_results.xml --gtest_filter="*Concurrent*" || (
+              echo "Test $test_file completed with issues, checking for coverage data..."
+            )
+          done
+
+          # Explicitly run ctest
+          ctest --output-on-failure --verbose || echo "ctest failed, proceeding to coverage..."
+      - name: Generate coverage report
+        run: |
+          cd build-coverage
+          echo "Checking for .gcda files..."
+          find . -name "*.gcda" -type f | head -10
+
+          # Capture coverage data with better error handling
+          echo "Capturing coverage data..."
+          lcov --directory . \
+               --capture \
+               --output-file coverage.info \
+               --ignore-errors mismatch,gcov,unused \
+               --no-external \
+               --base-directory $GITHUB_WORKSPACE || echo "lcov capture had issues, proceeding..."
+
+          # Check if coverage.info was created and has content
+          if [ -f coverage.info ]; then
+            echo "Coverage file created, size: $(du -h coverage.info)"
+            cat coverage.info | head -20
+
+            # Remove unwanted paths
+            echo "Removing unwanted paths..."
+            lcov --remove coverage.info \
+                 '/usr/*' \
+                 '*/test/*' \
+                 '*/conan/*' \
+                 '*/CMakeFiles/*' \
+                 '*/build*/*' \
+                 --output-file coverage_cleaned.info \
+                 --ignore-errors mismatch,gcov,unused
+
+            # Check final coverage
+            echo "Final coverage report:"
+            lcov --list coverage_cleaned.info
+            mv coverage_cleaned.info coverage.info
+          else
+            echo "Coverage file not created, skipping removal step"
+          fi
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          files: ./build-coverage/coverage.info
+          fail_ci_if_error: false
+          verbose: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
   run_standalone:
     name: Build & Run Standalone
     runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
index c320280..98d5c81 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,4 +34,9 @@ latex/
 *.gcov
 coverage/
 CMakeUserPresets.json
-build-test/
\ No newline at end of file
+build-test/
+build-debug/
+build-release/
+build-test-debug/
+install-debug/
+install-release/
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..68ffd76
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,102 @@
+# Pre-commit hooks for NovaLLM
+# Install: pip install pre-commit
+# Setup: pre-commit install
+# Run manually: pre-commit run --all-files
+
+repos:
+  # Branch name validation - local hook (works out of the box)
+  - repo: local
+    hooks:
+      - id: check-branch-name
+        name: Check branch name format
+        entry: python3 .githooks/check_branch_name.py
+        language: system
+        pass_filenames: false
+        always_run: true
+        stages: [post-checkout, manual]
+
+  # Conventional Commits - enforce commit message format
+  - repo: https://github.com/compilerla/conventional-pre-commit
+    rev: v3.0.0
+    hooks:
+      - id: conventional-pre-commit
+        stages: [commit-msg]
+        args:
+          - feat        # New feature
+          - fix         # Bug fix
+          - docs        # Documentation only changes
+          - style       # Code style changes (formatting, no logic change)
+          - refactor    # Code refactoring (no feature/fix)
+          - perf        # Performance improvements
+          - test        # Adding/updating tests
+          - build       # Build system or dependencies
+          - ci          # CI/CD changes
+          - chore       # Other changes (tooling, maintenance)
+          - revert      # Revert previous commit
+
+  # Basic file checks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+      - id: end-of-file-fixer
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-json
+      - id: check-toml
+      - id: mixed-line-ending
+        args: ['--fix=lf']
+      - id: detect-private-key
+
+  # C++ formatting with clang-format
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v18.1.8
+    hooks:
+      - id: clang-format
+        types_or: [c++, c]
+        args: ['-i']  # In-place formatting
+
+  # CMake formatting
+  - repo: https://github.com/cheshirekow/cmake-format-precommit
+    rev: v0.6.13
+    hooks:
+      - id: cmake-format
+      - id: cmake-lint
+        args: [--disabled-codes=C0103]
+
+  # YAML/JSON formatting
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.1.0
+    hooks:
+      - id: prettier
+        types_or: [yaml, json, markdown]
+        args: [--write, --prose-wrap=always]
+
+  # Shell script checks
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.9.0.6
+    hooks:
+      - id: shellcheck
+
+  # Python formatting (for scripts)
+  - repo: https://github.com/psf/black
+    rev: 23.12.1
+    hooks:
+      - id: black
+        language_version: python3
+
+# Exclude patterns
+exclude: |
+  (?x)^(
+      build.*|
+      install.*|
+      \.git|
+      \.github/workflows|
+      documentation/.*|
+      docs/.*
+  )$
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
new file mode 100644
index 0000000..2f688a9
--- /dev/null
+++ b/.pre-commit-hooks.yaml
@@ -0,0 +1,11 @@
+# Pre-commit hooks metadata for NovaLLM
+# This allows the hooks to be used as pre-commit hooks
+
+- id: check-branch-name
+  name: Check branch name format
+  description: Validates that branch names follow <type>-<description> format
+  entry: python .githooks/check_branch_name.py
+  language: system
+  pass_filenames: false
+  always_run: true
+  stages: [post-checkout]
diff --git a/.pre-commit-setup.md b/.pre-commit-setup.md
new file mode 100644
index 0000000..fed4327
--- /dev/null
+++ b/.pre-commit-setup.md
@@ -0,0 +1,204 @@
+# Pre-commit Setup Guide
+
+This repository uses [pre-commit](https://pre-commit.com/) to ensure code quality and enforce commit message conventions.
+
+## Quick Setup
+
+**After cloning the repository, run ONE command:**
+
+```bash
+pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout
+```
+
+That's it! The hooks will now run automatically on every commit and branch checkout.
+
+### Optional: Test the setup
+
+```bash
+# Run all checks manually
+pre-commit run --all-files
+
+# Test branch name validation
+python3 .githooks/check_branch_name.py
+```
+
+## Branch Naming Convention
+
+All branch names **must** follow this format:
+
+```
+<type>-<description>  or  <type>/<description>
+```
+
+### Examples
+
+✅ **Valid branch names:**
+```
+feat-buffer-pooling
+fix-windows-dll-exports
+docs-update-readme
+refactor/simplify-tensor-allocation
+ci-add-coverage-reporting
+test-buffer-manager
+```
+
+❌ **Invalid branch names:**
+```
+buffer_pooling        (no type prefix)
+Feat-something        (capital letter)
+feature-test          (wrong type, use 'feat')
+my-branch             (no type prefix)
+```
+
+### Protected Branches (No Validation)
+
+These branches don't require the naming convention:
+- `main`, `master`, `develop`
+- `release/*` (e.g., `release/v1.0.0`)
+- `hotfix/*` (e.g., `hotfix/critical-bug`)
+
+## Commit Message Format
+
+All commit messages **must** follow the [Conventional Commits](https://www.conventionalcommits.org/) format:
+
+```
+<type>(<scope>): <subject>
+
+[optional body]
+
+[optional footer]
+```
+
+### Allowed Types
+
+- `feat`: New feature
+- `fix`: Bug fix
+- `docs`: Documentation only changes
+- `style`: Code style changes (formatting, no logic change)
+- `refactor`: Code refactoring (neither fixes a bug nor adds a feature)
+- `perf`: Performance improvements
+- `test`: Adding or updating tests
+- `build`: Build system or dependency changes
+- `ci`: CI/CD configuration changes
+- `chore`: Other changes (tooling, maintenance)
+- `revert`: Revert a previous commit
+
+### Examples
+
+✅ **Good commit messages:**
+```
+feat(memory): add buffer pooling for better performance
+fix(build): correct Windows DLL export declarations
+docs(readme): update build instructions for macOS
+refactor(tensor): simplify memory allocation logic
+ci(workflows): add coverage reporting to ubuntu workflow
+style: apply clang-format to all source files
+test(buffer): add unit tests for buffer manager
+```
+
+❌ **Bad commit messages:**
+```
+update code
+Fixed bug
+Add feature
+WIP
+asdf
+```
+
+### Scope (Optional but Recommended)
+
+The scope should indicate what part of the codebase is affected:
+- `memory` - Memory management (buffers, allocators)
+- `tensor` - Tensor operations
+- `model` - Model implementation
+- `build` - Build system (CMake, Conan)
+- `ci` - CI/CD workflows
+- `test` - Testing infrastructure
+- `docs` - Documentation
+
+## What Gets Checked
+
+### On Every Commit
+
+1. **File checks:**
+   - Remove trailing whitespace
+   - Ensure files end with newline
+   - Check for large files (>1MB)
+   - Detect merge conflicts
+   - Check YAML/JSON syntax
+   - Detect private keys
+
+2. **Code formatting:**
+   - C/C++ files: `clang-format`
+   - CMake files: `cmake-format`
+   - YAML/JSON: `prettier`
+   - Shell scripts: `shellcheck`
+   - Python scripts: `black`
+
+3. **Commit message:**
+   - Must follow conventional commit format
+   - Type must be from allowed list
+   - Subject should be concise
+
+## Bypassing Hooks (Not Recommended)
+
+If you absolutely need to skip the hooks (e.g., for a WIP commit):
+
+```bash
+# Skip pre-commit hooks (file checks)
+git commit --no-verify
+
+# Skip commit-msg hook
+git commit --no-verify
+```
+
+**⚠️ Warning:** Bypassed commits will still be checked by CI and may fail!
+
+## Updating Hooks
+
+```bash
+# Update to latest versions
+pre-commit autoupdate
+
+# Reinstall hooks after config changes
+pre-commit install --hook-type commit-msg --hook-type pre-commit
+```
+
+## CI Integration
+
+Pre-commit checks also run in CI (GitHub Actions). If your local hooks pass, CI should pass too.
+
+## Troubleshooting
+
+### "command not found: pre-commit"
+```bash
+pip install pre-commit
+```
+
+### "hook failed" errors
+```bash
+# Run to see detailed error
+pre-commit run --all-files
+
+# Reinstall hooks
+pre-commit uninstall
+pre-commit install --hook-type commit-msg --hook-type pre-commit
+```
+
+### Formatting conflicts
+```bash
+# Let pre-commit auto-fix formatting
+pre-commit run --all-files
+
+# Stage the auto-fixed files
+git add .
+
+# Commit again
+git commit -m "style: apply automated formatting"
+```
+
+## More Information
+
+- [Pre-commit documentation](https://pre-commit.com/)
+- [Conventional Commits specification](https://www.conventionalcommits.org/)
+- [NovaLLM Contributing Guide](CONTRIBUTING.md)
diff --git a/README.md b/README.md
index f10919d..f8e593d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 [![Ubuntu](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/ubuntu.yml)
 [![Windows](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/windows.yml)
+[![MacOS](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/macos.yml)
 [![Code Quality](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/code-quality.yml)
 [![Documentation](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml/badge.svg)](https://github.com/peterlau123/NovaLLM/actions/workflows/documentation.yml)
 [![codecov](https://codecov.io/gh/peterlau123/NovaLLM/branch/master/graph/badge.svg)](https://codecov.io/gh/peterlau123/NovaLLM)
@@ -16,8 +17,9 @@ A lightweight and efficient C/C++ library for Large Language Model (LLM) inferen
 
 - 🚀 **Lightweight**: Minimal dependencies, focusing on core functionality
 - 🔧 **Extensible**: Easy to extend with custom models and optimizations
-- 🎯 **Efficient**: Support for extreme low-bit quantization
-- 🛠️ **Flexible**: Support for OpenAI Triton and ThunderKittens kernels
+- 🎯 **Efficient**: Support for low-bit quantization and custom kernels
+- 🛠️ **Portable**: Support inference on MacOS/Linux/Windows platforms
+- 👨‍💻 **Developer-friendly**: Easy to use and integrate into other projects
 
 ## Supported Models
 
@@ -67,6 +69,42 @@ cmake --build .
 
 ### Scripted builds
 
+#### Option 1: Unified Build Script (Recommended)
+
+The root `build.sh` provides a comprehensive build system with full control:
+
+```bash
+# Basic build (Release mode)
+./build.sh
+
+# Build with tests
+./build.sh -t
+
+# Clean build (removes all build-* and install-* directories)
+./build.sh -c -r
+
+# Build everything (main + tests + standalone + package)
+./build.sh -a
+
+# Debug build with verbose output
+./build.sh -d -v
+
+# Show all options
+./build.sh --help
+```
+
+**Key features:**
+- `-c, --clean`: Cleans all `build-*` and `install-*` directories (including custom directories specified via `--build-dir`/`--install-dir`) before building
+- `-r, --release`: Build in Release mode (default)
+- `-d, --debug`: Build in Debug mode
+- `-t, --tests`: Build and run tests
+- `-s, --standalone`: Build standalone application
+- `-p, --package`: Create Conan package
+- `-a, --all`: Build everything
+- `-v, --verbose`: Enable verbose output
+
+#### Option 2: Platform-Specific Scripts
+
 Unified wrapper (auto-detects OS):
 
 ```bash
@@ -84,6 +122,8 @@ scripts/build_ubuntu.sh --type Debug --enable-logging OFF
 scripts/build_windows.ps1 -Configuration Release -EnableLogging ON -WithTests
 ```
 
+**Note:** The root `build.sh` is more feature-rich and recommended for development, while `scripts/build.sh` is a lightweight wrapper for CI/CD pipelines.
+
 ### Makefile builds
 - Use scripts via Make: `make script-build` (honors BUILD_TYPE, ENABLE_LOGGING, ENABLE_TESTS)
 
diff --git a/SETUP.md b/SETUP.md
new file mode 100644
index 0000000..d17989c
--- /dev/null
+++ b/SETUP.md
@@ -0,0 +1,101 @@
+# NovaLLM Development Setup
+
+Quick guide for new contributors.
+
+## One-Time Setup (After Cloning)
+
+Run this single command to set up all development hooks:
+
+```bash
+pip install pre-commit && pre-commit install --hook-type commit-msg --hook-type pre-commit --hook-type post-checkout
+```
+
+**What this does:**
+- Installs pre-commit (if not already installed)
+- Enables automatic validation on every commit
+- Enables branch name validation
+- Enables code quality checks
+
+## Naming Conventions
+
+### Branch Names
+
+Format: `<type>-<description>`
+
+**Valid types:** feat, fix, docs, style, refactor, perf, test, build, ci, chore
+
+**Examples:**
+```bash
+git checkout -b feat-add-buffer-pooling     ✅
+git checkout -b fix-memory-leak             ✅
+git checkout -b docs-update-readme          ✅
+git checkout -b my-branch                   ❌ (no type prefix)
+```
+
+### Commit Messages
+
+Format: `<type>(<scope>): <subject>`
+
+**Examples:**
+```bash
+git commit -m "feat(memory): add buffer pooling"     ✅
+git commit -m "fix(build): correct DLL exports"      ✅
+git commit -m "docs(readme): update setup guide"     ✅
+git commit -m "update code"                          ❌ (no type)
+```
+
+## What Happens Automatically
+
+After setup, the hooks will:
+
+1. **On `git checkout -b new-branch`:**
+   - ✅ Validate branch name format
+   - ❌ Reject invalid branch names with helpful error
+
+2. **On `git commit`:**
+   - ✅ Format C++ code with clang-format
+   - ✅ Check for trailing whitespace, large files, etc.
+   - ✅ Validate commit message format
+   - ❌ Reject invalid commits with helpful error
+
+## Cross-Platform Support
+
+Works on:
+- ✅ macOS (zsh, bash)
+- ✅ Linux (bash, zsh, sh)
+- ✅ Windows (Git Bash, PowerShell, WSL)
+
+Requirements:
+- Python 3.6+ (comes with most systems)
+- Git (already have it if you cloned the repo)
+- pip (to install pre-commit)
+
+## Troubleshooting
+
+### "pre-commit: command not found"
+```bash
+pip install pre-commit
+# or
+pip3 install pre-commit
+```
+
+### "Permission denied" on Linux/macOS
+```bash
+chmod +x .githooks/*.py
+chmod +x .githooks/*.sh
+```
+
+### Need to bypass hooks temporarily?
+```bash
+git commit --no-verify -m "WIP: work in progress"
+```
+
+**⚠️ Warning:** Bypassed commits will still be checked by CI!
+
+## Full Documentation
+
+See [.pre-commit-setup.md](.pre-commit-setup.md) for complete documentation.
+
+## Build Instructions
+
+See [README.md](README.md) for build and development instructions.
diff --git a/build.sh b/build.sh
old mode 100644
new mode 100755
index 6fe541e..d8db20f
--- a/build.sh
+++ b/build.sh
@@ -112,7 +112,8 @@ Build Targets:
   -a, --all               Build everything (main + tests + standalone + package)
 
 Build Options:
-  -c, --clean             Clean build directory before building
+  -c, --clean             Clean all build-* and install-* directories (including custom
+                          directories specified via --build-dir/--install-dir) before building
   --build-dir DIR         Set custom build directory (default: build)
   --install-dir DIR       Set custom install directory (default: install)
 
@@ -138,19 +139,23 @@ VERBOSE=false
 parse_args() {
     # Reset target flags if any target is explicitly specified
     local targets_specified=false
+    local main_explicitly_specified=false
     
     # First pass: check if any targets are specified
     for arg in "$@"; do
         case $arg in
+            -m|--main)
+                main_explicitly_specified=true
+                ;;
             -t|--tests|-s|--standalone|-p|--package|-a|--all)
                 targets_specified=true
-                break
                 ;;
         esac
     done
     
-    # If targets are specified, disable default main build
-    if [[ "$targets_specified" = true ]]; then
+    # If targets are specified but main is not explicitly requested, disable default main build
+    # We'll check package freshness later to decide if main needs to be built
+    if [[ "$targets_specified" = true && "$main_explicitly_specified" = false ]]; then
         BUILD_MAIN=false
     fi
     
@@ -297,6 +302,86 @@ setup_conan() {
     fi
 }
 
+# ============================================================================
+# Package Detection Functions
+# ============================================================================
+
+check_package_exists() {
+    local package_ref="novallm/0.1.0@local/testing"
+    if conan list "$package_ref" 2>/dev/null | grep -q "$package_ref"; then
+        return 0  # Package exists
+    else
+        return 1  # Package doesn't exist
+    fi
+}
+
+get_package_timestamp() {
+    local package_ref="novallm/0.1.0@local/testing"
+    # Get the package folder path from conan cache
+    local cache_info
+    cache_info=$(conan cache path "$package_ref" 2>/dev/null)
+    
+    if [[ -z "$cache_info" ]]; then
+        echo "0"  # Package doesn't exist
+        return
+    fi
+    
+    # Get the most recent modification time in the package
+    local package_time
+    if [[ "$(uname)" == "Darwin" ]]; then
+        # macOS
+        package_time=$(stat -f "%m" "$cache_info" 2>/dev/null || echo "0")
+    else
+        # Linux
+        package_time=$(stat -c "%Y" "$cache_info" 2>/dev/null || echo "0")
+    fi
+    
+    echo "$package_time"
+}
+
+get_source_timestamp() {
+    # Get the most recent modification time of source files
+    local newest_time=0
+    local file
+    
+    # Check source, include, and CMakeLists.txt files
+    while IFS= read -r -d '' file; do
+        local file_time
+        if [[ "$(uname)" == "Darwin" ]]; then
+            file_time=$(stat -f "%m" "$file" 2>/dev/null || echo "0")
+        else
+            file_time=$(stat -c "%Y" "$file" 2>/dev/null || echo "0")
+        fi
+        
+        if [[ $file_time -gt $newest_time ]]; then
+            newest_time=$file_time
+        fi
+    done < <(find source include CMakeLists.txt conanfile.py -type f -print0 2>/dev/null)
+    
+    echo "$newest_time"
+}
+
+is_package_outdated() {
+    if ! check_package_exists; then
+        print_info "Package not found in cache"
+        return 0  # Package doesn't exist, needs rebuild
+    fi
+    
+    local package_time
+    local source_time
+    
+    package_time=$(get_package_timestamp)
+    source_time=$(get_source_timestamp)
+    
+    if [[ $source_time -gt $package_time ]]; then
+        print_info "Source code is newer than cached package"
+        return 0  # Source is newer, needs rebuild
+    else
+        print_info "Cached package is up-to-date"
+        return 1  # Package is up-to-date
+    fi
+}
+
 # ============================================================================
 # Build Functions
 # ============================================================================
@@ -304,14 +389,45 @@ setup_conan() {
 clean_build_dirs() {
     print_header "Cleaning build directories"
     
-    if [[ -d "$BUILD_DIR" ]]; then
-        print_info "Removing $BUILD_DIR"
-        rm -rf "$BUILD_DIR"
+    local cleaned=false
+    local dirs_to_clean=()
+    
+    # First, add user-specified directories if they exist
+    if [[ -n "$BUILD_DIR" && -d "$BUILD_DIR" ]]; then
+        dirs_to_clean+=("$BUILD_DIR")
+    fi
+    if [[ -n "$INSTALL_DIR" && -d "$INSTALL_DIR" ]]; then
+        dirs_to_clean+=("$INSTALL_DIR")
     fi
     
-    if [[ -d "$INSTALL_DIR" ]]; then
-        print_info "Removing $INSTALL_DIR"
-        rm -rf "$INSTALL_DIR"
+    # Then, find all build and install directories
+    shopt -s nullglob  # Don't include pattern if no match
+    for dir in build build-* build_* install install-* install_*; do
+        if [[ -d "$dir" ]]; then
+            # Avoid duplicates
+            local already_added=false
+            for added_dir in "${dirs_to_clean[@]}"; do
+                if [[ "$dir" == "$added_dir" ]]; then
+                    already_added=true
+                    break
+                fi
+            done
+            if [[ "$already_added" == false ]]; then
+                dirs_to_clean+=("$dir")
+            fi
+        fi
+    done
+    shopt -u nullglob
+    
+    # Remove all collected directories
+    for dir in "${dirs_to_clean[@]}"; do
+        print_info "Removing $dir"
+        rm -rf "$dir"
+        cleaned=true
+    done
+    
+    if [[ "$cleaned" == false ]]; then
+        print_info "No build or install directories to clean"
     fi
     
     print_success "Clean complete"
@@ -531,8 +647,28 @@ main() {
     fi
     
     # Execute build targets
+    # If tests or standalone are requested but main wasn't explicitly requested,
+    # check if we need to rebuild main based on package freshness
+    local need_package=false
+    if [[ "$BUILD_TESTS" == true || "$BUILD_STANDALONE" == true ]]; then
+        need_package=true
+        if [[ "$BUILD_MAIN" == false ]]; then
+            print_header "Checking package freshness"
+            if is_package_outdated; then
+                print_info "Enabling main project build due to outdated/missing package"
+                BUILD_MAIN=true
+            fi
+        fi
+    fi
+    
     if [[ "$BUILD_MAIN" == true ]]; then
         build_main_project
+        
+        # Auto-create package if tests or standalone need it
+        if [[ "$need_package" == true || "$CREATE_PACKAGE" == true ]]; then
+            print_info "Creating/updating Conan package for downstream consumers..."
+            CREATE_PACKAGE=true
+        fi
     fi
     
     if [[ "$CREATE_PACKAGE" == true ]]; then
diff --git a/codecov.yaml b/codecov.yaml
index dae89fd..b29cd08 100644
--- a/codecov.yaml
+++ b/codecov.yaml
@@ -1,5 +1,36 @@
+# Codecov configuration for NovaLLM
+# See: https://docs.codecov.com/docs/codecov-yaml
+
+coverage:
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
+    patch:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
+
+  precision: 2
+  round: down
+  range: "70...100"
+
 ignore:
-  - "test"
+  - "test/**"
+  - "standalone/**"
+  - "documentation/**"
+  - "build*/**"
+  - "cmake/**"
 
 comment:
-  require_changes: true
\ No newline at end of file
+  layout: "reach,diff,flags,tree,footer"
+  behavior: default
+  require_changes: true
+  require_base: false
+  require_head: true
+
+fixes:
+  - "before/:/after/"  # Path fixes if needed
diff --git a/documentation/memory/buffer_hub_design.md b/documentation/memory/buffer_hub_design.md
new file mode 100644
index 0000000..ba165d2
--- /dev/null
+++ b/documentation/memory/buffer_hub_design.md
@@ -0,0 +1,61 @@
+# Buffer Hub Overview
+
+## Design
+
+We divide memory into the following four major levels:
+
++ Byte level
+  Byte number ranges from 0 to 1023
++ KB level
+  Byte number ranges from 1024 to 1024*1023
++ MB level
+  Byte number ranges from 1024*1024 to 1024*1024*1023 
++ GB level
+  Byte number ranges from 1024*1024*1024 to min(1024*1024*1024*1023,Device memory)
+
+On top of that, we continue  divide into sub levels from major levels.
+
+In byte level, we form the following sub levels
++ 16 bytes
++ 64 bytes
++ 256 bytes
+  
+In KB level, we form the following sub levels
++ 1 kb
++ 2 kb
++ 4 kb
++ 8 kb
++ 16 kb
++ 32 kb
++ 64 kb
++ 128 kb
++ 256 kb
++ 512 kb
+  
+In MB level, we form the following sub levels
++ 1 mb
++ 2 mb
++ 4 mb
++ 8 mb
++ 16 mb
++ 32 mb
++ 64 mb
++ 128 mb
++ 256 mb
++ 512 mb
+
+In GB level, we form the following sub levels
++ 1 GB
++ 2 GB
++ 4 GB
++ 8 GB
++ 16 GB
++ 32 GB
++ 64 GB
++ 128 GB
++ 256 GB
++ 512 GB
+
+
+
+## Usage
diff --git a/format.sh b/format.sh
index 04c1513..b34cc94 100755
--- a/format.sh
+++ b/format.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Define the directories to search for C++ files
-DIRECTORIES=("include" "source" "standalone" "test")
+DIRECTORIES=("include" "source" "standalone/source" "test/source")
 
 # Find all C++ source and header files in the specified directories
 FILES=$(find "${DIRECTORIES[@]}" -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cu" -o -name "*.cuh" \) 2>/dev/null)
diff --git a/include/NovaLLM/common/device.h b/include/NovaLLM/common/device.h
index eee659a..bb248eb 100644
--- a/include/NovaLLM/common/device.h
+++ b/include/NovaLLM/common/device.h
@@ -8,16 +8,16 @@ enum class DeviceType : uint32_t { UNKNOWN = 0, CPU = 0x01, CUDA = 0x02, METAL =
 
 struct DeviceTypeFlags {
  public:
-  [[nodiscard]] bool has(DeviceType type) const;
+   [[nodiscard]] NOVA_LLM_API bool has(DeviceType type) const;
 
-  void set(DeviceType type);
+  NOVA_LLM_API void set(DeviceType type);
 
-  void clear(DeviceType type);
+  NOVA_LLM_API void clear(DeviceType type);
 
-  [[nodiscard]] constexpr DeviceType get() const;
+  [[nodiscard]] NOVA_LLM_API constexpr DeviceType get() const;
 
  private:
   uint32_t flags_ = 0;
 };
 
-}  // namespace nova_llm
\ No newline at end of file
+}  // namespace nova_llm
diff --git a/include/NovaLLM/data/tensor.h b/include/NovaLLM/data/tensor.h
index d917109..6efdf0f 100644
--- a/include/NovaLLM/data/tensor.h
+++ b/include/NovaLLM/data/tensor.h
@@ -1,8 +1,16 @@
 #pragma once
+
+// Disable C4251 warning on Windows (DLL interface for STL containers)
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4251)
+#endif
+
+#include <atomic>
 #include <cstdint>
 #include <functional>
 #include <vector>
-#include <atomic>
+
 #include "../common/device.h"
 #include "../common/dtype.h"
 #include "NovaLLM/utils/macros.h"
@@ -30,7 +38,7 @@ class NOVA_LLM_API Tensor {
    * @brief 默认删除器
    */
   struct DefaultDeletor {
-    void operator()(void** data) {}
+    void operator()(void** /*data*/) {}
   };
 
   using Deleter = std::function<void(void**)>;
@@ -58,11 +66,7 @@ class NOVA_LLM_API Tensor {
    * @param device 设备类型
    * @param deleter 自定义删除器，默认使用DefaultDeletor
    */
-  Tensor(const void* data,
-         const std::vector<uint32_t>& dims,
-         DataType dtype,
-         DeviceType device,
-         Deleter deleter = DefaultDeletor());
+  Tensor(const void* data, const std::vector<uint32_t>& dims, DataType dtype, DeviceType device, Deleter deleter = DefaultDeletor());
 
   /**
    * @brief 拷贝构造函数
@@ -125,7 +129,7 @@ class NOVA_LLM_API Tensor {
 
   DataSourceType dataFrom() const { return m_data_source_; }
 
-  uint32_t refCnt() const { return ref_cnt_?ref_cnt_->load():0; }
+  uint32_t refCnt() const { return ref_cnt_ ? ref_cnt_->load() : 0; }
 
   Deleter deleter() const { return m_deleter_; }
 
@@ -147,7 +151,7 @@ class NOVA_LLM_API Tensor {
   std::vector<uint32_t> dims_;  ///< 张量的维度数组
   uint32_t ele_cnt_ {0};        ///< 元素总数
   void* data_ {nullptr};        ///< 数据缓冲区
-  uint64_t capacity_ {0};  ///< 数据缓冲区大小，单位为字节，大于等于size_*sizeof(m_dtype_)
+  uint64_t capacity_ {0};       ///< 数据缓冲区大小，单位为字节，大于等于size_*sizeof(m_dtype_)
   DataSourceType m_data_source_ {DataSourceType::AUTO};
   DataType m_dtype_ {DataType::UNKNOWN};       ///< 数据类型
   DeviceType m_device_ {DeviceType::UNKNOWN};  ///< 设备类型
@@ -155,4 +159,8 @@ class NOVA_LLM_API Tensor {
   Deleter m_deleter_ = DefaultDeletor();       ///< 自定义删除器
 };
 
-}  // namespace nova_llm
\ No newline at end of file
+}  // namespace nova_llm
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/include/NovaLLM/memory/buffer_hub.h b/include/NovaLLM/memory/buffer_hub.h
index 13f6fe2..bbb2512 100644
--- a/include/NovaLLM/memory/buffer_hub.h
+++ b/include/NovaLLM/memory/buffer_hub.h
@@ -1,93 +1,47 @@
 #pragma once
+
+// Disable C4251 warning on Windows (DLL interface for STL containers)
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4251)
+#endif
+
+#include <cmath>
 #include <list>
+#include <memory>
+#include <mutex>
 #include <unordered_map>
 #include <vector>
-#include <cmath>
+
 #include "NovaLLM/common/device.h"
 #include "NovaLLM/memory/allocator.h"
 #include "NovaLLM/memory/buffer_define.h"
-#include "NovaLLM/utils/template.h"
 #include "NovaLLM/utils/macros.h"
+#include "NovaLLM/utils/template.h"
 
 namespace nova_llm {
 
-struct Size {
+// Forward declaration
+class BufferHub;
+
+struct NOVA_LLM_API Size {
  private:
-  uint64_t b_ = 0;
-  uint64_t kb_ = 0;
-  uint64_t mb_ = 0;
-  uint64_t gb_ = 0;
-  uint64_t total_bytes_ = 0;
-  const uint64_t ratio_ = 1024;
+  uint64_t bytes_ = 0;
 
-  void convert_in_units(uint64_t bytes) {
-    auto down_ratio = std::pow(ratio_, 3);
+ public:
+  Size() = default;
 
-    gb_ = bytes / down_ratio;
-    bytes -= gb_ * down_ratio;
-    down_ratio /= ratio_;
+  explicit Size(uint64_t bytes) : bytes_(bytes) {}
 
-    mb_ = bytes / down_ratio;
-    bytes -= mb_ * down_ratio;
-    down_ratio /= ratio_;
+  Size(const Size& rhs) = default;
 
-    kb_ = bytes / down_ratio;
-    bytes -= kb_ * down_ratio;
+  Size& operator=(const Size& rhs) = default;
 
-    b_ = bytes;
-  }
+  [[nodiscard]] uint64_t totalBytes() const { return bytes_; }
 
- public:
-  Size() = default;
+  bool operator==(const Size& rhs) const { return bytes_ == rhs.bytes_; }
 
-  explicit Size(uint64_t sz) {
-    total_bytes_ = sz;
-    convert_in_units(total_bytes_);
-  }
-
-  Size(uint64_t b, uint64_t kb, uint64_t mb, uint64_t gb) {
-    b_ = b;
-    kb_ = kb;
-    mb_ = mb;
-    gb_ = gb;
-
-    if (ratio_ < b_) {
-      auto kb_cnt = b_ / ratio_;
-      b_ -= kb_cnt * ratio_;
-      kb_ += kb_cnt;
-    }
-
-    if (ratio_ < kb_) {
-      auto mb_cnt = kb_ / ratio_;
-      kb_ -= mb_cnt * ratio_;
-      mb_ += mb_cnt;
-    }
-
-    if (ratio_ < mb_) {
-      auto gb_cnt = mb_ / ratio_;
-      mb_ -= gb_cnt * ratio_;
-      gb_ += gb_cnt;
-    }
-
-    total_bytes_ = b_ + kb_ * ratio_ + mb_ * ratio_ * ratio_ + gb_ * ratio_ * ratio_ * ratio_;
-  }
-
-  Size(const Size& rhs) {
-    total_bytes_ = rhs.totalBytes();
-    convert_in_units(total_bytes_);
-  }
-
-  Size& operator=(const Size& rhs) {
-    total_bytes_ = rhs.totalBytes();
-    convert_in_units(total_bytes_);
-    return *this;
-  }
-
-  [[nodiscard]] uint64_t totalBytes() const { return total_bytes_; }
-
-  bool operator==(const Size& rhs) const { return totalBytes() == rhs.totalBytes(); }
-
-  [[nodiscard]] bool isValid() const { return totalBytes() != 0; }
+  [[nodiscard]] bool isValid() const { return bytes_ != 0; }
 };
 
 struct SizeHash {
@@ -95,35 +49,111 @@ struct SizeHash {
 };
 
 struct SizeEqual {
-  bool operator()(const Size& lhs, const Size& rhs) const {
-    return lhs.totalBytes() == rhs.totalBytes();
-  }
+  bool operator()(const Size& lhs, const Size& rhs) const { return lhs.totalBytes() == rhs.totalBytes(); }
 };
 
 struct Block {
   using DataPtr = uint8_t*;
-  //using BlockPtr = Block*;
   DataPtr data = nullptr;
   uint64_t size = 0;
   int32_t ref_cnt = 0;
 
-  bool isValid() const {
-    // return data != nullptr && (prev != nullptr || next != nullptr) && 0 != size;
-    return data != nullptr && 0 != size;
-  }
+  bool isValid() const { return data != nullptr && 0 != size; }
 };
 
-using BlockPtr = Block*;
+// BlockPtr for owning pointers (used in collections)
+using BlockPtr = std::unique_ptr<Block>;
+// Raw non-owning pointer for temporary access
+using BlockRawPtr = Block*;
+
+class NOVA_LLM_API LevelAssignStrategy {
+ public:
+  virtual std::vector<Size> assignLevels();
+};
 
-class DefaultSizeLevelStrategy {
+class NOVA_LLM_API BufferHubConfig {
  public:
-  NOVA_LLM_API static std::vector<Size> byteSizes() ;
+  BufferHubConfig(DeviceType device_type, IAllocatorSharedPtr allocator, Size size_limit=Size(4UL*1024*1024*1024), LevelAssignStrategy strategy = LevelAssignStrategy(), float warning_level = 0.95f)
+      : device_type_(device_type),
+        size_limit_(size_limit),
+        warning_level_(warning_level),
+        allocator_(allocator),
+        level_assign_strategy_(strategy) {
+    size_levels_ = strategy.assignLevels();
+  };
+
+  void setLevelAssignStrategy(LevelAssignStrategy strategy) { size_levels_ = strategy.assignLevels(); }
+
+  void setWarningLevel(float warning_level) { warning_level_ = warning_level; }
+
+  DeviceType deviceType() const { return device_type_; }
 
-  NOVA_LLM_API static std::vector<Size> kiloByteSizes() ;
+  const std::vector<Size>& sizeLevels() const { return size_levels_; }
 
-  NOVA_LLM_API static std::vector<Size> megaByteSizes() ;
+  Size sizeLimit() const { return size_limit_; }
 
-  NOVA_LLM_API static std::vector<Size> gigaByteSizes() ;
+  float warningLevel() const { return warning_level_; }
+
+  IAllocatorSharedPtr allocator() const { return allocator_; }
+
+ private:
+  DeviceType device_type_;
+  std::vector<Size> size_levels_;  // ensure that levels are in ascending order
+  Size size_limit_;                // Memory in buffer hub cannot exceed this limit
+  float warning_level_;            // Be cautious when memory in buffer hub exceeds size_limit*warning_level
+  IAllocatorSharedPtr allocator_;
+  LevelAssignStrategy level_assign_strategy_;
+};
+
+class BufferHub;
+/**
+ * @brief Buffers at the specified size level
+ *
+ */
+class NOVA_LLM_API BufferHubLevel {
+ public:
+  // Default constructor required for unordered_map
+  BufferHubLevel() = default;
+
+  // Move constructor and assignment for unique_ptr compatibility
+  BufferHubLevel(BufferHubLevel&&) = default;
+  BufferHubLevel& operator=(BufferHubLevel&&) = default;
+
+  // Copy operations deleted to prevent unique_ptr copying
+  BufferHubLevel(const BufferHubLevel&) = delete;
+  BufferHubLevel& operator=(const BufferHubLevel&) = delete;
+
+  void initialize(uint32_t index, const Size& block_size, BufferHub* hub);
+
+  // Returns non-owning pointer since pool retains ownership
+  BlockRawPtr fetchOneFreeBlock();
+
+  // Accepts non-owning pointer for blocks already in the pool
+  void putOneBlock(BlockRawPtr block_ptr);
+  
+  // Attempts to put a block back by its data pointer. Returns true if successful.
+  bool tryPutBlock(Block::DataPtr data);
+
+  size_t busyBlockCount() const;
+
+  size_t totalBlocks() const;
+  
+  ~BufferHubLevel();
+
+ private:
+  void refill(const Size& sz);
+
+  uint32_t index_ = static_cast<uint32_t>(-1); // level index in buffer hub
+  Size block_size_ {static_cast<uint64_t>(0)}; // each block size at this level
+  uint32_t expand_factor_ = 2;
+  
+  std::list<BlockPtr> block_list_; // Owns the blocks
+  using BlockIterator = std::list<BlockPtr>::iterator;
+  
+  std::unordered_map<Block::DataPtr, BlockIterator> free_map_;
+  std::unordered_map<Block::DataPtr, BlockIterator> busy_map_;
+  
+  BufferHub* hub_ = nullptr;
 };
 
 /*
@@ -138,79 +168,70 @@ class DefaultSizeLevelStrategy {
  * */
 class NOVA_LLM_API BufferHub {
  public:
-  struct Config {
-    DeviceType device_type;
-    std::vector<Size> size_levels;  // ensure that levels are in ascending order
-    Size size_limit {0, 0, 0, 8};   // Memory in buffer hub cannot exceed this limit
-    float warning_level =
-        0.95;  // Be cautious when memory in buffer hub exceeds size_limit*warning_level
-    IAllocatorSharedPtr allocator;
-  };
-
-  struct Level {
-   public:
-    BlockPtr fetchOneFreeBlock();
-
-    void putOneBlock(const BlockPtr& block_ptr);
-
-    void refill(const Size& sz);
-
-    ~Level();
-
-    uint32_t index = -1;
-    Size level_size {static_cast<uint64_t>(0)};  // each block size at this level
-
-    //using BlockPtr = Block*;
-    std::list<BlockPtr> block_list;
-    using BlockIterator = std::list<BlockPtr>::iterator;
-    std::unordered_map<Block::DataPtr, BlockIterator> free_map;
-    std::unordered_map<Block::DataPtr, BlockIterator> busy_map;
-    BufferHub* hub;
-  };
+  friend class BufferHubConfig;
+  friend class BufferHubLevel;
 
   class Builder {
    public:
-    NOVA_LLM_API static BufferHub* build(const Config& config);
+    NOVA_LLM_API static BufferHub* build(const BufferHubConfig& config);
 
     NOVA_LLM_API static void destroy(BufferHub** hub);
   };
 
-  void initConfig(const Config& config);
+  void initConfig(const BufferHubConfig& config);
+
+  // Returns non-owning pointer to block managed by pool
+  BlockRawPtr getBlock(const Size& sz);
 
-  BlockPtr getBlock(const Size& sz);
+  // Accepts non-owning pointer to block managed by pool
+  void putBlock(BlockRawPtr block);
 
-  void putBlock(const BlockPtr& block);
+  // Return a buffer to the pool and clear the Buffer to avoid dangling pointers.
+  void putBlockFromBuffer(Buffer& buffer);
 
-  void putBlockFromBuffer(const Buffer& buffer);
+  void addSizeLevel(uint32_t index, const Size& level_sz);
+
+  void eraseSizeLevel(const Size& level_sz);
 
  private:
   Block::DataPtr allocData(uint64_t sz);
   void deallocData(Block::DataPtr& data_ptr);
 
+  // Creates a new block with ownership
   BlockPtr allocBlock();
-  void deallocateBlock(BlockPtr& block_ptr);
+  void deallocateBlock(BlockPtr block);
 
-  BlockPtr setUpBlock(const Size& sz);  // alloc and init block
+  // Creates and initializes a new block
+  BlockPtr setUpBlock(const Size& sz);
 
-  void tearDownBlock(BlockPtr& block);
+  // Cleans up and destroys a block
+  void tearDownBlock(BlockPtr block);
 
-  void addSizeLevel(uint32_t index, const Size& level_sz);
+  [[nodiscard]] Size gradeLevel(const Size& sz) const;
 
-  void eraseSizeLevel(const Size& level_sz);
+  BufferHub();
 
-  [[nodiscard]] Size gradeLevel(const Size& sz) const;
+  ~BufferHub();
 
-  BufferHub() = default;
+  // Thread safety: protects all mutable state
+  mutable std::mutex mutex_;
+
+  std::unordered_map<Size, std::unique_ptr<BufferHubLevel>, SizeHash, SizeEqual> buffers_;
 
-  std::unordered_map<Size, Level, SizeHash, SizeEqual> buffers_;
   DeviceType device_type_;
+
   std::vector<Size> size_levels_;  // ensure that levels are in ascending order
-  Size size_limit_ {0, 0, 0, 4};   // Memory in buffer hub cannot exceed this limit
 
-  // Be cautious when memory in buffer hub exceeds size_limit*warning_level
-  float warning_level_ = 0.95;
+  Size size_limit_;  // Memory in buffer hub cannot exceed this limit
+
+  float warning_level_ = 0.95f; // Be cautious when memory in buffer hub exceeds size_limit*warning_level
 
   IAllocatorSharedPtr allocator_;
+
 };
 
 }  // namespace nova_llm
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/include/NovaLLM/memory/buffer_manager.h b/include/NovaLLM/memory/buffer_manager.h
index 4333e71..8e19e41 100644
--- a/include/NovaLLM/memory/buffer_manager.h
+++ b/include/NovaLLM/memory/buffer_manager.h
@@ -1,12 +1,17 @@
 #pragma once
 #include <cstddef>
 #include <functional>
-#include <unordered_map>
 #include <memory>
+#include <unordered_map>
+
 #include "NovaLLM/common/device.h"
 #include "NovaLLM/memory/allocator.h"
 #include "NovaLLM/memory/buffer_define.h"
 #include "NovaLLM/memory/buffer_hub.h"
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4251)
+#endif
 
 namespace nova_llm {
 /*
@@ -54,18 +59,18 @@ class NOVA_LLM_API BufferManager {
 
   BufferManager& operator=(BufferManager&&) = delete;  // Disable move assignment
 
-  [[nodiscard("Do not drop isInit return value")]] bool isInited() const { return is_init_; }
+  [[nodiscard]] bool isInited() const { return is_init_; }
 
   Buffer fetch(size_t size, DeviceType device_type);
 
-  void put(const Buffer& buffer);
+  // Return a buffer obtained from fetch back to the pool and clear it.
+  void put(Buffer& buffer);
 
   ~BufferManager();
 
- private:
-
   void destroy();
 
+ private:
   BufferManager() = default;
 
   bool init(const Config& config);
@@ -76,3 +81,7 @@ class NOVA_LLM_API BufferManager {
 };
 
 }  // namespace nova_llm
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
\ No newline at end of file
diff --git a/include/NovaLLM/utils/log.h b/include/NovaLLM/utils/log.h
index ab7563e..ee8058a 100644
--- a/include/NovaLLM/utils/log.h
+++ b/include/NovaLLM/utils/log.h
@@ -4,18 +4,19 @@
 //  - If NOVA_LLM_ENABLE_LOGGING is defined and spdlog is available, use spdlog.
 //  - Otherwise provide a no-op Logger and a minimal spdlog::level::level_enum so callers compile.
 
-#include <string>
 #include <memory>
+#include <string>
 
 // Prefer spdlog when logging is enabled and available
 #if defined(NOVA_LLM_ENABLE_LOGGING) && NOVA_LLM_ENABLE_LOGGING && __has_include(<spdlog/spdlog.h>)
 
 #if __has_include(<fmt/ostream.h>)
-#  include <fmt/ostream.h>
+#include <fmt/ostream.h>
 #elif __has_include(<spdlog/fmt/ostr.h>)
-#  include <spdlog/fmt/ostr.h>
+#include <spdlog/fmt/ostr.h>
 #endif
 #include <spdlog/spdlog.h>
+
 #include <filesystem>
 
 namespace nova_llm {
@@ -27,9 +28,7 @@ class Logger {
     return instance;
   }
 
-  void init(const std::string& name = "NovaLLM",
-            const std::string& logFile = "NovaLLM.log",
-            spdlog::level::level_enum level = spdlog::level::info);
+  void init(const std::string& name = "NovaLLM", const std::string& logFile = "NovaLLM.log", spdlog::level::level_enum level = spdlog::level::info);
 
   void setLevel(spdlog::level::level_enum level) {
     if (logger_) logger_->set_level(level);
@@ -100,9 +99,7 @@ class Logger {
     return instance;
   }
 
-  void init(const std::string& /*name*/ = "NovaLLM",
-            const std::string& /*logFile*/ = "NovaLLM.log",
-            spdlog::level::level_enum /*level*/ = spdlog::level::info);
+  void init(const std::string& /*name*/ = "NovaLLM", const std::string& /*logFile*/ = "NovaLLM.log", spdlog::level::level_enum /*level*/ = spdlog::level::info);
 
   void setLevel(spdlog::level::level_enum /*level*/) {}
 
diff --git a/include/NovaLLM/utils/macros.h b/include/NovaLLM/utils/macros.h
index 25f5ad1..5d5c30d 100644
--- a/include/NovaLLM/utils/macros.h
+++ b/include/NovaLLM/utils/macros.h
@@ -8,31 +8,29 @@
 #define NOVA_LLM_VERSION_MINOR 1
 #define NOVA_LLM_VERSION_PATCH 0
 #define NOVA_LLM_VERSION_STRING "0.1.0"
-#define NOVA_LLM_VERSION \
-  (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH)
+#define NOVA_LLM_VERSION (NOVA_LLM_VERSION_MAJOR * 10000 + NOVA_LLM_VERSION_MINOR * 100 + NOVA_LLM_VERSION_PATCH)
 
 // For API export and import
 #if defined(_WIN32)
-  // When building the library define NOVA_LLM_EXPORTS (set by CMake)
-  #if defined(NOVA_LLM_EXPORTS)
-    #define NOVA_LLM_API __declspec(dllexport)
-  #else
-    #define NOVA_LLM_API __declspec(dllimport)
-  #endif
+// When building the library define NOVA_LLM_EXPORTS (set by CMake)
+#if defined(NOVA_LLM_EXPORTS)
+#define NOVA_LLM_API __declspec(dllexport)
 #else
-  #define NOVA_LLM_API __attribute__((visibility("default")))
+#define NOVA_LLM_API __declspec(dllimport)
+#endif
+#else
+#define NOVA_LLM_API __attribute__((visibility("default")))
 #endif
 
 // For debugging and runtime check
 #ifdef NDEBUG
 #define ASSERT(condition, message) ((void)0)
 #else
-#define ASSERT(condition, message)                                                            \
-  do {                                                                                        \
-    if (!(condition)) {                                                                       \
-      throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + \
-                               message);                                                      \
-    }                                                                                         \
+#define ASSERT(condition, message)                                                                      \
+  do {                                                                                                  \
+    if (!(condition)) {                                                                                 \
+      throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + " " + message); \
+    }                                                                                                   \
   } while (0)
 #endif
 
diff --git a/scripts/build.sh b/scripts/build.sh
old mode 100644
new mode 100755
diff --git a/scripts/build_macos.sh b/scripts/build_macos.sh
old mode 100644
new mode 100755
diff --git a/scripts/build_ubuntu.sh b/scripts/build_ubuntu.sh
old mode 100644
new mode 100755
diff --git a/source/data/tensor.cpp b/source/data/tensor.cpp
index 5f8f5a0..b2f9a31 100644
--- a/source/data/tensor.cpp
+++ b/source/data/tensor.cpp
@@ -57,15 +57,7 @@ Tensor::Tensor()
     , m_deleter_(DefaultDeletor()) {}
 
 Tensor::Tensor(const std::vector<uint32_t>& dims, DataType dtype, DeviceType device)
-    : dims_(dims)
-    , ele_cnt_(0)
-    , data_(nullptr)
-    , capacity_(0)
-    , m_data_source_(DataSourceType::AUTO)
-    , m_dtype_(dtype)
-    , m_device_(device)
-    , ref_cnt_{nullptr}
-    , m_deleter_(DefaultDeletor()) {
+    : dims_(dims), ele_cnt_(0), data_(nullptr), capacity_(0), m_data_source_(DataSourceType::AUTO), m_dtype_(dtype), m_device_(device), ref_cnt_ {nullptr}, m_deleter_(DefaultDeletor()) {
   // Check if the data type is valid
   ASSERT(dtype >= DataType::INT8 && dtype < DataType::TOTAL, "Invalid data type");
   // Check if the device type is valid
@@ -77,9 +69,7 @@ Tensor::Tensor(const std::vector<uint32_t>& dims, DataType dtype, DeviceType dev
   this->data_ = buffer.data;
   this->capacity_ = buffer.size;
   m_deleter_ = [&](void** data) {
-    Buffer buffer {static_cast<decltype(std::declval<Buffer>().data)>(*data),
-                   static_cast<decltype(std::declval<Buffer>().size)>(capacity_),
-                   m_device_};
+    Buffer buffer {static_cast<decltype(std::declval<Buffer>().data)>(*data), static_cast<decltype(std::declval<Buffer>().size)>(capacity_), m_device_};
     buffer_manager.put(buffer);
     *data = nullptr;
   };
@@ -87,11 +77,7 @@ Tensor::Tensor(const std::vector<uint32_t>& dims, DataType dtype, DeviceType dev
   *ref_cnt_ = 1;
 }
 
-Tensor::Tensor(const void* data,
-               const std::vector<uint32_t>& dims,
-               DataType dtype,
-               DeviceType device,
-               Deleter deleter) {
+Tensor::Tensor(const void* data, const std::vector<uint32_t>& dims, DataType dtype, DeviceType device, Deleter deleter) {
   ASSERT(nullptr != data, "data cannot be null!");
   ASSERT(DataType::UNKNOWN < dtype, "data type must be specified!");
   ASSERT(DeviceType::UNKNOWN < device, "device type must be specified!");
@@ -133,7 +119,7 @@ Tensor& Tensor::operator=(const Tensor& other) {
     m_data_source_ = other.dataFrom();
     m_dtype_ = other.dtype();
     m_device_ = other.device();
-    ref_cnt_ = other.ref_cnt_;//TODO:notice here
+    ref_cnt_ = other.ref_cnt_;  // TODO:notice here
     m_deleter_ = other.deleter();
   }
   return *this;
diff --git a/source/device/device.cpp b/source/device/device.cpp
index 7755ea3..749037d 100644
--- a/source/device/device.cpp
+++ b/source/device/device.cpp
@@ -2,9 +2,7 @@
 
 namespace nova_llm {
 
-bool DeviceTypeFlags::has(DeviceType type) const {
-  return (flags_ & static_cast<uint32_t>(type)) != 0;
-}
+bool DeviceTypeFlags::has(DeviceType type) const { return (flags_ & static_cast<uint32_t>(type)) != 0; }
 
 // 添加设备
 void DeviceTypeFlags::set(DeviceType type) { flags_ |= static_cast<uint32_t>(type); }
diff --git a/source/memory/buffer_hub.cpp b/source/memory/buffer_hub.cpp
index 463e5cb..8836e4a 100644
--- a/source/memory/buffer_hub.cpp
+++ b/source/memory/buffer_hub.cpp
@@ -6,12 +6,26 @@
 
 namespace nova_llm {
 
+// Size class is now header-only with simplified implementation
+
+namespace {
+class DefaultSizeLevelStrategy {
+ public:
+  static std::vector<Size> byteSizes();
+
+  static std::vector<Size> kiloByteSizes();
+
+  static std::vector<Size> megaByteSizes();
+
+  static std::vector<Size> gigaByteSizes();
+};
+
 std::vector<Size> DefaultSizeLevelStrategy::byteSizes() {
   std::vector<Size> ret;
   uint32_t base = 64;
   uint32_t ratio = 2;
   for (uint64_t i = base; i < 1024;) {
-    ret.push_back(Size(i, 0, 0, 0));
+    ret.push_back(Size(i));  // bytes
     i *= ratio;
   }
   return ret;
@@ -22,7 +36,7 @@ std::vector<Size> DefaultSizeLevelStrategy::kiloByteSizes() {
   uint32_t base = 4;
   uint32_t ratio = 2;
   for (uint64_t i = base; i < 1024;) {
-    ret.push_back(Size(0, i, 0, 0));
+    ret.push_back(Size(i * 1024));  // kilobytes to bytes
     i *= ratio;
   }
   return ret;
@@ -33,7 +47,7 @@ std::vector<Size> DefaultSizeLevelStrategy::megaByteSizes() {
   uint32_t base = 2;
   uint32_t ratio = 2;
   for (uint64_t i = base; i < 1024;) {
-    ret.push_back(Size(0, 0, i, 0));
+    ret.push_back(Size(i * 1024 * 1024));  // megabytes to bytes
     i *= ratio;
   }
   return ret;
@@ -44,103 +58,146 @@ std::vector<Size> DefaultSizeLevelStrategy::gigaByteSizes() {
   uint32_t base = 1;
   uint32_t ratio = 2;
   for (uint64_t i = base; i < 10;) {
-    ret.push_back(Size(0, 0, 0, i));
+    ret.push_back(Size(i * 1024ULL * 1024 * 1024));  // gigabytes to bytes
     i *= ratio;
   }
   return ret;
 }
+}  // namespace
+
+std::vector<Size> LevelAssignStrategy::assignLevels() {
+  std::vector<Size> ret;
+  ret.insert(ret.end(), DefaultSizeLevelStrategy::byteSizes().begin(), DefaultSizeLevelStrategy::byteSizes().end());
+  ret.insert(ret.end(), DefaultSizeLevelStrategy::kiloByteSizes().begin(), DefaultSizeLevelStrategy::kiloByteSizes().end());
+  ret.insert(ret.end(), DefaultSizeLevelStrategy::megaByteSizes().begin(), DefaultSizeLevelStrategy::megaByteSizes().end());
+  ret.insert(ret.end(), DefaultSizeLevelStrategy::gigaByteSizes().begin(), DefaultSizeLevelStrategy::gigaByteSizes().end());
+  return ret;
+}
+
+void BufferHubLevel::initialize(uint32_t index, const Size& block_size, BufferHub* hub) {
+  index_ = index;
+  block_size_ = block_size;
+  hub_ = hub;
+}
+
+size_t BufferHubLevel::busyBlockCount() const {
+  return busy_map_.size();
+}
+
+size_t BufferHubLevel::totalBlocks() const {
+  return block_list_.size();
+}
+
+BlockRawPtr BufferHubLevel::fetchOneFreeBlock() {
+  BlockRawPtr ret_block {nullptr};
+
+  if (free_map_.empty()) {
+    LOG_INFO("No free block at level %d,refilling...", index_);
+    auto block_bytes = this->block_size_.totalBytes();
+    refill(Size(expand_factor_ * block_bytes));  // allocate expand_factor blocks
+  }
 
-BlockPtr BufferHub::Level::fetchOneFreeBlock() {
-  BlockPtr ret_block {nullptr};
-  if (!free_map.empty()) {
-    auto it = free_map.begin();
+  if (!free_map_.empty()) {
+    LOG_INFO("Found free block at level %d", index_);
+    auto it = free_map_.begin();
     auto block_it = it->second;
+    // Transition from free to busy: increment ref_cnt from 0 to 1
     (*block_it)->ref_cnt++;
-    busy_map.insert({it->first, it->second});
-    free_map.erase(it);
-    ret_block = *block_it;
+    busy_map_.insert({it->first, it->second});
+    free_map_.erase(it);
+    ret_block = block_it->get();  // Return non-owning pointer
   } else {
-    auto level_bytes = level_size.totalBytes();
-
-    auto kb_level = Size(0, 1, 0, 0);
-    auto mb_level = Size(0, 0, 1, 0);
-    auto gb_level = Size(0, 0, 0, 1);
-    auto kb_bytes = kb_level.totalBytes();
-    auto mb_bytes = mb_level.totalBytes();
-    auto gb_bytes = gb_level.totalBytes();
-
-    if (level_bytes < kb_bytes) {
-      refill(kb_level);
-    } else if (level_bytes < mb_bytes) {
-      refill(mb_level);
-    } else if (level_bytes < gb_bytes) {
-      refill(gb_level);
-    }
-    ret_block = *(free_map.begin()->second);
+    LOG_WARN("Unable to fetch free block at level %d even after refill", index_);
   }
+
   return ret_block;
 }
 
-void BufferHub::Level::refill(const nova_llm::Size& sz) {
-  auto dst_size = sz.totalBytes();
-  auto level_bytes = this->level_size.totalBytes();
-  uint64_t cnt = dst_size / level_bytes;
+void BufferHubLevel::refill(const nova_llm::Size& dst_sz) {
+  if (!hub_) return;
+  auto dst_total_bytes = dst_sz.totalBytes();
+  auto block_bytes = this->block_size_.totalBytes();
+  uint64_t cnt = dst_total_bytes / block_bytes;
 
-  auto* data = this->hub->allocData(dst_size);
+  // Allocate data per block so that each pointer we free was directly allocated
+  // Blocks start in the free list with ref_cnt == 0.
   for (uint64_t i = 0; i < cnt; i++) {
-    auto* one_block = hub->allocBlock();
-    one_block->data = data + i * level_bytes;
-    one_block->size = level_bytes;
-    one_block->ref_cnt = 1;// set ref_cnt to 1 when allocated
-    auto it = this->block_list.insert(this->block_list.end(), one_block);
-    this->free_map[one_block->data] = it;
+    auto one_block = hub_->setUpBlock(Size(block_bytes));
+    one_block->ref_cnt = 0;  // free blocks have ref_cnt == 0
+    auto* block_ptr = one_block.get();
+    auto it = this->block_list_.insert(this->block_list_.end(), std::move(one_block));
+    this->free_map_[block_ptr->data] = it;
   }
 }
 
-void BufferHub::Level::putOneBlock(const BlockPtr& block_ptr) {
-  BlockPtr dst_block(block_ptr);
-  if (block_list.empty()) {
-    auto ret_it = block_list.insert(block_list.end(), dst_block);
-    (*ret_it)->ref_cnt = 0;
-    free_map.insert({dst_block->data, ret_it});
-  } else {
-    bool in_free_m = free_map.count(dst_block->data);
-    bool in_busy_m = busy_map.count(dst_block->data);
-    if (!in_free_m && !in_busy_m) {
-      auto it = block_list.insert(block_list.end(), dst_block);
-      (*it)->ref_cnt = 0;
-      free_map.insert({(*it)->data, it});
-    } else if (in_free_m) {
-      LOG_WARN("Block %p already in block list at level %d",
-               static_cast<void*>(dst_block->data),
-               index);
-    } else {  // in_busy_m is true
-      auto& it = busy_map[dst_block->data];
-      auto& busy_block = *it;
-      if (0 == --busy_block->ref_cnt) {
-        busy_map.erase(busy_block->data);
-        busy_block->ref_cnt = 0;
-        free_map[dst_block->data] = it;
-      } else {
-        busy_block->ref_cnt--;
-      }
+void BufferHubLevel::putOneBlock(BlockRawPtr block_ptr) {
+  if (block_ptr == nullptr) {
+    return;
+  }
+  
+  if (block_list_.empty()) {
+    LOG_WARN("putOneBlock called on empty block_list at level %d", index_);
+    return;
+  }
+  
+  bool in_free_m = free_map_.count(block_ptr->data);
+  bool in_busy_m = busy_map_.count(block_ptr->data);
+  
+  if (!in_free_m && !in_busy_m) {
+    LOG_WARN("Block %p not found in level %d", static_cast<void*>(block_ptr->data), index_);
+    return;
+  } else if (in_free_m) {
+    LOG_WARN("Block %p already in free list at level %d", static_cast<void*>(block_ptr->data), index_);
+  } else {  // in_busy_m is true
+    auto it = busy_map_[block_ptr->data];
+    auto& busy_block = *it;
+    // Decrease ref count once; when it reaches zero, move block back to free_map
+    if (busy_block->ref_cnt > 0) {
+      busy_block->ref_cnt--;
+    }
+    if (busy_block->ref_cnt == 0) {
+      free_map_[block_ptr->data] = it;  // NOTE: Be cautious about the order of operations here
+      busy_map_.erase(busy_block->data);
     }
   }
 }
 
-BufferHub::Level::~Level() {
-  free_map.clear();
-  busy_map.clear();
-  for (auto& block_ptr : block_list) {
-    hub->tearDownBlock(block_ptr);
+bool BufferHubLevel::tryPutBlock(Block::DataPtr data) {
+  if (busy_map_.count(data)) {
+    auto block_it = busy_map_[data];
+    putOneBlock(block_it->get());
+    return true;
+  }
+  return false;
+}
+
+BufferHubLevel::~BufferHubLevel() {
+  free_map_.clear();
+  busy_map_.clear();
+  // Blocks are automatically cleaned up when unique_ptrs are destroyed
+  // but we need to manually free the data
+  for (auto& block_ptr : block_list_) {
+    if (block_ptr && block_ptr->data && hub_) {
+      hub_->deallocData(block_ptr->data);
+    }
   }
+  block_list_.clear();  // unique_ptrs will deallocate Block structs
+}
+
+BufferHub::BufferHub() {}
+
+BufferHub::~BufferHub() {
+  // Let the map manage BufferHubLevel destruction
+  buffers_.clear();
+  // Clear configuration metadata
+  size_levels_.clear();
 }
 
-BufferHub* BufferHub::Builder::build(const Config& config) {
+BufferHub* BufferHub::Builder::build(const BufferHubConfig& config) {
   auto* hub = new BufferHub;
   hub->initConfig(config);
   int index = 0;
-  for (auto v : config.size_levels) {
+  for (auto v : config.sizeLevels()) {
     hub->addSizeLevel(index, v);
     ++index;
   }
@@ -151,26 +208,23 @@ void BufferHub::Builder::destroy(nova_llm::BufferHub** hub) {
   if (hub && *hub) {
     // Deleting the BufferHub will call destructors of its members (including Level),
     // which will in turn call tearDownBlock to free internal allocations.
+    //(*hub)->~BufferHub();
+
     delete *hub;
     *hub = nullptr;
   }
 }
 
-void BufferHub::initConfig(const Config& config) {
-  device_type_ = config.device_type;
-  size_levels_ = config.size_levels;
-  // sort size levels in ascending order
-  std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) {
-    return a.totalBytes() < b.totalBytes();
-  });
-  size_limit_ = config.size_limit;
-  warning_level_ = config.warning_level;
-  allocator_ = config.allocator;
+void BufferHub::initConfig(const BufferHubConfig& config) {
+  device_type_ = config.deviceType();
+  this->size_levels_ = config.sizeLevels();
+  std::sort(size_levels_.begin(), size_levels_.end(), [](const Size& a, const Size& b) { return a.totalBytes() < b.totalBytes(); });
+  this->size_limit_ = config.sizeLimit();
+  this->warning_level_ = config.warningLevel();
+  this->allocator_ = config.allocator();
 }
 
-Block::DataPtr BufferHub::allocData(uint64_t sz) {
-  return static_cast<Block::DataPtr>(this->allocator_->allocate(sz));
-}
+Block::DataPtr BufferHub::allocData(uint64_t sz) { return static_cast<Block::DataPtr>(this->allocator_->allocate(sz)); }
 
 void BufferHub::deallocData(Block::DataPtr& data_ptr) {
   if (data_ptr) {
@@ -180,13 +234,14 @@ void BufferHub::deallocData(Block::DataPtr& data_ptr) {
 }
 
 BlockPtr BufferHub::allocBlock() {
-  return static_cast<BlockPtr>(this->allocator_->allocate(sizeof(Block)));
+  auto* raw_ptr = static_cast<Block*>(this->allocator_->allocate(sizeof(Block)));
+  return BlockPtr(raw_ptr);
 }
 
-void BufferHub::deallocateBlock(BlockPtr& block_ptr) {
-  if (block_ptr) {
-    this->allocator_->deallocate(block_ptr);
-    block_ptr = nullptr;
+void BufferHub::deallocateBlock(BlockPtr block) {
+  if (block) {
+    Block* raw = block.release();
+    this->allocator_->deallocate(raw);
   }
 }
 
@@ -198,49 +253,62 @@ BlockPtr BufferHub::setUpBlock(const Size& sz) {
   return block;
 }
 
-void BufferHub::tearDownBlock(BlockPtr& block) {
+void BufferHub::tearDownBlock(BlockPtr block) {
   if (block) {
     deallocData(block->data);
     block->size = 0;
     block->ref_cnt = 0;
-    deallocateBlock(block);
+    deallocateBlock(std::move(block));
   }
 }
 
-void BufferHub::addSizeLevel(uint32_t index, const Size& level_sz) {
-  auto& level = buffers_[level_sz];
-  level.level_size = level_sz;
-  level.index = index;
-  level.hub = this;
+void BufferHub::addSizeLevel(uint32_t index, const Size& level_block_sz) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  
+  auto& level = buffers_[level_block_sz];
+  level->initialize(index, level_block_sz, this);
 }
 
 void BufferHub::eraseSizeLevel(const Size& level_sz) {
-  // NOTE:cautious,make sure the size level is not in use
-  if (buffers_.count(level_sz)) {
-    if (buffers_[level_sz].busy_map.empty()) {
-      auto& level = buffers_[level_sz];
-      level.~Level();
-    } else {
-      LOG_WARN("Level with size %d is in use,cannot erase now,please try some time later",
-               level_sz.totalBytes());
-    }
-  } else {
-    LOG_WARN("Level with size %d is not found!", level_sz.totalBytes());  // TODO:optimize
+  std::lock_guard<std::mutex> lock(mutex_);
+  
+  auto it = buffers_.find(level_sz);
+  if (it == buffers_.end()) {
+    LOG_WARN("Level with size %llu is not found!", level_sz.totalBytes());
+    return;
+  }
+
+  auto& level = it->second;
+  if (level->busyBlockCount() > 0) {
+    LOG_ERROR("Level with size %llu has %zu busy blocks, cannot erase now", 
+              level_sz.totalBytes(), level->busyBlockCount());
+    return;
   }
+
+  // Free all blocks in the block_list before erasing
+  // The destructor will be called, but let's be explicit about cleanup
+  LOG_INFO("Erasing level with size %llu, freeing %zu blocks", 
+           level_sz.totalBytes(), level->totalBlocks());
+  
+  // Erasing from the map will call BufferHubLevel destructor,
+  // which properly frees all blocks via tearDownBlock
+  buffers_.erase(it);
 }
 
-BlockPtr BufferHub::getBlock(const Size& sz){
+BlockRawPtr BufferHub::getBlock(const Size& sz) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  
   // round it to ceil level
   auto level_sz = gradeLevel(sz);
   if (!level_sz.isValid()) {
     return nullptr;
   }
   // search the block list
-  BlockPtr ret_block {nullptr};
+  BlockRawPtr ret_block {nullptr};
   if (buffers_.count(level_sz)) {
     auto& level = buffers_[level_sz];
-    auto block = level.fetchOneFreeBlock();
-    if (block->isValid()) {
+    auto block = level->fetchOneFreeBlock();
+    if (block && block->isValid()) {
       ret_block = block;
     }
   }
@@ -250,18 +318,26 @@ BlockPtr BufferHub::getBlock(const Size& sz){
   return ret_block;
 }
 
-void BufferHub::putBlock(const BlockPtr& block_ptr) {
+void BufferHub::putBlock(BlockRawPtr block_ptr) {
+  if (!block_ptr) {
+    return;
+  }
+  
+  std::lock_guard<std::mutex> lock(mutex_);
+  
   auto size = block_ptr->size;
   Size level_size(size);
   if (buffers_.count(level_size)) {
     auto& level = buffers_[level_size];
-    level.putOneBlock(block_ptr);
+    level->putOneBlock(block_ptr);
   } else {
-    LOG_ERROR("Level with size %d is not found!", level_size.totalBytes());
+    LOG_ERROR("Level size %d is not found in buffers!", level_size.totalBytes());
   }
 }
 
-void BufferHub::putBlockFromBuffer(const Buffer& buffer) {
+void BufferHub::putBlockFromBuffer(Buffer& buffer) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  
   if (0 == buffer.size || nullptr == buffer.data) {
     return;
   }
@@ -269,30 +345,38 @@ void BufferHub::putBlockFromBuffer(const Buffer& buffer) {
   if (buffers_.count(level_sz)) {
     auto& level = buffers_[level_sz];
     auto* data = static_cast<Block::DataPtr>(buffer.data);
-    if (level.busy_map.count(data)) {
-      auto block_it = level.busy_map[data];
-      level.putOneBlock(*block_it);
+    
+    if (!level->tryPutBlock(data)) {
+       // Maybe log warning if data was expected to be there?
+       // But original code just did nothing if not found in busy_map.
+       // Actually original code: if (level.busy_map.count(data)) { ... }
     }
+    
   } else {
     LOG_ERROR("Level with size %d cannot be found in this memory hub", level_sz.totalBytes());
   }
+
+  // Clear the Buffer to avoid dangling pointers for callers.
+  buffer.data = nullptr;
+  buffer.size = 0;
 }
 
+// TODO: optim the level selection algorithm
 Size BufferHub::gradeLevel(const Size& sz) const {
   Size ret;
   uint32_t level_index = 0;
   size_t i = 0;
-  for (; i < size_levels_.size(); i++) {
-    if (sz.totalBytes() < size_levels_[i].totalBytes()) {
+  for (; i < this->size_levels_.size(); i++) {
+    if (sz.totalBytes() <= this->size_levels_[i].totalBytes()) {
       level_index = i;
       break;
     }
   }
-  if (size_levels_.size() == i) {
+  if (this->size_levels_.size() == i) {
     LOG_ERROR("Cannot grade to current levels for size %d", sz.totalBytes());
     return Size {};
   }
   return size_levels_[level_index];
 }
 
-}  // namespace nova_llm
\ No newline at end of file
+}  // namespace nova_llm
diff --git a/source/memory/buffer_manager.cpp b/source/memory/buffer_manager.cpp
index 2d18350..7790c74 100644
--- a/source/memory/buffer_manager.cpp
+++ b/source/memory/buffer_manager.cpp
@@ -1,9 +1,10 @@
 #include "NovaLLM/memory/buffer_manager.h"
 
 #include "NovaLLM/memory/allocator.h"
+#include "NovaLLM/memory/buffer_hub.h"
 #include "NovaLLM/utils/log.h"
 #include "NovaLLM/utils/macros.h"
-#include "NovaLLM/memory/buffer_hub.h"
+// Disable C4251 warning on Windows (DLL interface for STL containers)
 
 namespace nova_llm {
 
@@ -28,24 +29,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) {
   }
   bool ret = false;
   if (config.device_flags.has(DeviceType::CPU)) {
-    BufferHub::Config cfg;
-    cfg.allocator = config.cpu.alloc;
-
-    auto byte_sizes = DefaultSizeLevelStrategy::byteSizes();
-    cfg.size_levels.insert(cfg.size_levels.end(), byte_sizes.begin(), byte_sizes.end());
-    // for size below 1kb
-    auto kilobyte_sizes = DefaultSizeLevelStrategy::kiloByteSizes();
-    cfg.size_levels.insert(cfg.size_levels.end(), kilobyte_sizes.begin(), kilobyte_sizes.end());
-    // for size below 1mb
-    auto mb_sizes = DefaultSizeLevelStrategy::megaByteSizes();
-    cfg.size_levels.insert(cfg.size_levels.end(), mb_sizes.begin(), mb_sizes.end());
-    // for size below 1gb
-    auto gb_sizes = DefaultSizeLevelStrategy::gigaByteSizes();
-    cfg.size_levels.insert(cfg.size_levels.end(), gb_sizes.begin(), gb_sizes.end());
-
-    cfg.size_limit = Size(0, 0, 0, 4);
-    cfg.warning_level = 0.95;
-
+    BufferHubConfig cfg(DeviceType::CPU, config.cpu.alloc, Size(4UL*1024*1024*1024));
     buffer_hubs_[DeviceType::CPU] = BufferHub::Builder::build(cfg);
     ret |= true;
   }
@@ -54,7 +38,7 @@ bool BufferManager::init(const nova_llm::BufferManager::Config &config) {
   return ret;
 }
 
-void BufferManager::put(const Buffer &buffer) {
+void BufferManager::put(Buffer &buffer) {
   if (nullptr == buffer.data || 0 == buffer.size) {
     return;
   }
@@ -74,14 +58,13 @@ Buffer BufferManager::fetch(size_t size, DeviceType device_type) {
   return buffer;
 }
 
-BufferManager::~BufferManager() {
-  destroy();
-}
+BufferManager::~BufferManager() { destroy(); }
 
 void BufferManager::destroy() {
-  for (auto p : buffer_hubs_) {
+  for (auto& p : buffer_hubs_) {
     BufferHub::Builder::destroy(&(p.second));
   }
+  buffer_hubs_.clear();
   is_init_ = false;
 }
 
diff --git a/source/memory/cpu_allocator.cpp b/source/memory/cpu_allocator.cpp
index d60e9ff..7a3eff7 100644
--- a/source/memory/cpu_allocator.cpp
+++ b/source/memory/cpu_allocator.cpp
@@ -1,7 +1,7 @@
-#include "NovaLLM/memory/allocator.h"
-
 #include <cstdlib>
 
+#include "NovaLLM/memory/allocator.h"
+
 namespace nova_llm {
 
 
diff --git a/source/memory/gpu_allocator.cpp b/source/memory/gpu_allocator.cpp
index 6422ec3..6249c0f 100644
--- a/source/memory/gpu_allocator.cpp
+++ b/source/memory/gpu_allocator.cpp
@@ -16,9 +16,7 @@ void* CUDAAllocator::do_allocate(size_t size) {
   return ptr;
 }
 
-void CUDAAllocator::do_deallocate(void* ptr) {
-  cudaFree(ptr);
-}
+void CUDAAllocator::do_deallocate(void* ptr) { cudaFree(ptr); }
 
-}
+}  // namespace nova_llm
 #endif
\ No newline at end of file
diff --git a/source/utils/log.cpp b/source/utils/log.cpp
index f07efc3..e82610b 100644
--- a/source/utils/log.cpp
+++ b/source/utils/log.cpp
@@ -10,9 +10,7 @@
 
 namespace nova_llm {
 
-void Logger::init(const std::string& name,
-                  const std::string& logFile,
-                  spdlog::level::level_enum level) {
+void Logger::init(const std::string& name, const std::string& logFile, spdlog::level::level_enum level) {
   try {
     // Refer to
     // https://github.com/gabime/spdlog?tab=readme-ov-file#logger-with-multi-sinks---each-with-a-different-format-and-log-level
@@ -25,8 +23,7 @@ void Logger::init(const std::string& name,
     console_sink->set_pattern(pattern_str);  // Use set_pattern
 
     // Create file sink
-    auto file_sink =
-        std::make_shared<spdlog::sinks::rotating_file_sink_mt>(logFile, 1024 * 1024 * 5, 3);
+    auto file_sink = std::make_shared<spdlog::sinks::rotating_file_sink_mt>(logFile, 1024 * 1024 * 5, 3);
     file_sink->set_level(level);
     file_sink->set_pattern(pattern_str);  // Use set_pattern
 
diff --git a/standalone/source/main.cpp b/standalone/source/main.cpp
index 3f83589..8eda34f 100644
--- a/standalone/source/main.cpp
+++ b/standalone/source/main.cpp
@@ -1,14 +1,15 @@
 #include <NovaLLM/NovaLLM-cpp.h>
+
 #include <cxxopts.hpp>
 #include <iostream>
 #include <string>
 #include <unordered_map>
 
 auto main(int argc, char** argv) -> int {
-  int arg_num=argc;
-  std::cout<<"arg num:"<<arg_num<<std::endl;
-  for(int i=0;i<arg_num;i++){
-    std::cout<<argv[i]<<std::endl;
-  } 
+  int arg_num = argc;
+  std::cout << "arg num:" << arg_num << std::endl;
+  for (int i = 0; i < arg_num; i++) {
+    std::cout << argv[i] << std::endl;
+  }
   return 0;
 }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 732cb7d..394c538 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -28,12 +28,13 @@ add_executable(${PROJECT_NAME} ${sources})
 
 # Add include directories
 target_include_directories(${PROJECT_NAME} PRIVATE
+    ${PROJECT_ROOT}/include
     ${CMAKE_CURRENT_SOURCE_DIR}/include
     ${CMAKE_BINARY_DIR}/conan/include
 )
 
-target_link_libraries(${PROJECT_NAME} 
-    PRIVATE 
+target_link_libraries(${PROJECT_NAME}
+    PRIVATE
     NovaLLM::NovaLLM
     GTest::gtest
     GTest::gtest_main
@@ -45,6 +46,11 @@ target_link_libraries(${PROJECT_NAME}
 
 set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 17)
 
+# Define import macro for Windows (tests consume the DLL, library exports)
+if(WIN32)
+  target_compile_definitions(${PROJECT_NAME} PRIVATE NOVA_LLM_IMPORTS)
+endif()
+
 # enable compiler warnings
 if(NOT TEST_INSTALLED_VERSION)
   if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU")
diff --git a/test/source/buffer_hub_test.cpp b/test/source/buffer_hub_test.cpp
index b78c82f..3356a57 100644
--- a/test/source/buffer_hub_test.cpp
+++ b/test/source/buffer_hub_test.cpp
@@ -2,80 +2,332 @@
 
 #include <gtest/gtest.h>
 
+#include <algorithm>
+#include <atomic>
+#include <thread>
+#include <vector>
+
 using namespace nova_llm;
 
 class CPUBufferHubTest : public ::testing::Test {
-public:
-    BufferHub* getBufferHub(){
-        return buffer_hub_;
-    }
+ public:
+  BufferHub* getBufferHub() { return buffer_hub_; }
 
-protected:
-    void SetUp() override {
-        BufferHub::Config config;
-        //set config
-        config.device_type = DeviceType::CPU;
-        config.size_levels=std::vector<Size>{Size(0, 0, 0, 1)};
-        config.allocator = std::make_shared<CPUAllocator>();
+ protected:
+  void SetUp() override {
+    BufferHubConfig config(DeviceType::CPU, std::make_shared<CPUAllocator>(), Size(4ULL * 1024 * 1024 * 1024));
+    buffer_hub_ = BufferHub::Builder::build(config);
+  }
 
-        buffer_hub_ = BufferHub::Builder::build(config);
-    }
+  void TearDown() override { BufferHub::Builder::destroy(&buffer_hub_); }
 
-    void TearDown() override {
-        BufferHub::Builder::destroy(&buffer_hub_);
-    }
-    BufferHub* buffer_hub_;
+  BufferHub* buffer_hub_;
 };
 
-TEST_F(CPUBufferHubTest, Init) {
-    EXPECT_NE(getBufferHub(), nullptr);
-}
+TEST_F(CPUBufferHubTest, Init) { EXPECT_NE(getBufferHub(), nullptr); }
 
 TEST_F(CPUBufferHubTest, GetBlock) {
-    auto *block = getBufferHub()->getBlock(Size(1024));
+  auto* block = getBufferHub()->getBlock(Size(1024));
 
-    EXPECT_NE(block, nullptr);
-    EXPECT_EQ(block->data, nullptr);
-    EXPECT_EQ(block->size, 1024);
-    EXPECT_EQ(block->ref_cnt, 1);
+  EXPECT_NE(block, nullptr);
+  EXPECT_NE(block->data, nullptr);
+  EXPECT_GE(block->size, 1024);
+  EXPECT_EQ(block->ref_cnt, 1);
 
-    getBufferHub()->putBlock(block);
+  getBufferHub()->putBlock(block);
 }
 
 TEST_F(CPUBufferHubTest, PutBlock) {
-    auto* block = getBufferHub()->getBlock(Size(1024));
+  auto* block = getBufferHub()->getBlock(Size(1024));
 
-    EXPECT_NE(block, nullptr);
-    EXPECT_NE(block->data, nullptr);
-    EXPECT_EQ(block->size, 1024);
-    EXPECT_EQ(block->ref_cnt, 1);
+  EXPECT_NE(block, nullptr);
+  EXPECT_NE(block->data, nullptr);
+  EXPECT_GE(block->size, 1024);
+  EXPECT_EQ(block->ref_cnt, 1);
 
-    getBufferHub()->putBlock(block);
+  // Return the block to the pool; block remains valid but is marked free
+  getBufferHub()->putBlock(block);
 
-    EXPECT_EQ(block->data, nullptr);
-    EXPECT_EQ(block->size, 0);
-    EXPECT_EQ(block->ref_cnt, 0);
-}
+  EXPECT_NE(block->data, nullptr);
+  EXPECT_GE(block->size, 1024);
+  EXPECT_EQ(block->ref_cnt, 0);  // ref count reset when returned to pool
 
+  // Fetch another block of the same size and ensure we get a (possibly reused) block
+  auto* block2 = getBufferHub()->getBlock(Size(1024));
+  EXPECT_NE(block2, nullptr);
+  EXPECT_NE(block2->data, nullptr);
+  EXPECT_GE(block2->size, 1024);
+  EXPECT_EQ(block2->ref_cnt, 1);
+}
 
 TEST_F(CPUBufferHubTest, PutBlockFromBuffer) {
-    auto* block = getBufferHub()->getBlock(Size(1024));
-
-    EXPECT_NE(block, nullptr);
-    EXPECT_NE(block->data, nullptr);
-    EXPECT_EQ(block->size, 1024);
-    EXPECT_EQ(block->ref_cnt, 1);
-
-    Buffer buffer;
-    buffer.data = block->data;
-    buffer.size = block->size;
-    buffer.device_type = DeviceType::CPU;
-    getBufferHub()->putBlockFromBuffer(buffer);
-
-    EXPECT_EQ(block->data, nullptr);
-    EXPECT_EQ(block->size, 0);
-    EXPECT_EQ(block->ref_cnt, 0);
-
-    EXPECT_EQ(buffer.data, nullptr);
-    EXPECT_EQ(buffer.size, 0);
-}
\ No newline at end of file
+  auto* block = getBufferHub()->getBlock(Size(1024));
+
+  EXPECT_NE(block, nullptr);
+  EXPECT_NE(block->data, nullptr);
+  EXPECT_GE(block->size, 1024);
+  EXPECT_EQ(block->ref_cnt, 1);
+
+  Buffer buffer;
+  buffer.data = block->data;
+  buffer.size = block->size;
+  buffer.device_type = DeviceType::CPU;
+  getBufferHub()->putBlockFromBuffer(buffer);
+
+  // After returning via Buffer, the underlying block should be returned to the pool.
+  // The Buffer should be cleared to avoid dangling pointers.
+  EXPECT_EQ(buffer.data, nullptr);
+  EXPECT_EQ(buffer.size, 0);
+}
+
+// Concurrent access tests
+TEST_F(CPUBufferHubTest, ConcurrentAddSizeLevel) {
+  constexpr int num_threads = 10;
+  constexpr int num_levels_per_thread = 5;
+  std::vector<std::thread> threads;
+  std::atomic<int> success_count {0};
+
+  // Each thread adds multiple size levels
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &success_count, num_levels_per_thread=num_levels_per_thread]() {
+      for (int i = 0; i < num_levels_per_thread; ++i) {
+        // Create unique sizes for each thread to avoid conflicts
+        uint64_t size_bytes = (1 << 20) * (t * num_levels_per_thread + i + 100);  // 100MB+
+        Size level_size(size_bytes);
+        uint32_t index = t * num_levels_per_thread + i + 1000;
+
+        getBufferHub()->addSizeLevel(index, level_size);
+        success_count++;
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  // Verify all additions succeeded
+  EXPECT_EQ(success_count.load(), num_threads * num_levels_per_thread);
+}
+
+TEST_F(CPUBufferHubTest, ConcurrentEraseSizeLevel) {
+  const int num_threads = 8;
+  std::vector<std::thread> threads;
+  std::vector<Size> sizes_to_add;
+
+  // Pre-populate with size levels
+  for (int i = 0; i < num_threads * 2; ++i) {
+    uint64_t size_bytes = (1 << 20) * (i + 200);  // 200MB+
+    Size level_size(size_bytes);
+    sizes_to_add.push_back(level_size);
+    getBufferHub()->addSizeLevel(2000 + i, level_size);
+  }
+
+  std::atomic<int> erase_attempts {0};
+
+  // Each thread attempts to erase different size levels concurrently
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &sizes_to_add, &erase_attempts]() {
+      // Each thread erases 2 levels
+      for (int i = 0; i < 2; ++i) {
+        int idx = t * 2 + i;
+        getBufferHub()->eraseSizeLevel(sizes_to_add[idx]);
+        erase_attempts++;
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  EXPECT_EQ(erase_attempts.load(), num_threads * 2);
+}
+
+TEST_F(CPUBufferHubTest, ConcurrentGetBlock) {
+  constexpr int num_threads = 20;
+  constexpr int blocks_per_thread = 5;
+  std::vector<std::thread> threads;
+  std::vector<std::vector<BlockRawPtr>> thread_blocks(num_threads);
+  std::atomic<int> successful_gets {0};
+
+  // Multiple threads requesting blocks of the same size concurrently
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &thread_blocks, &successful_gets, blocks_per_thread=blocks_per_thread]() {
+      for (int i = 0; i < blocks_per_thread; ++i) {
+        auto* block = getBufferHub()->getBlock(Size(4096));  // 4KB blocks
+        if (block != nullptr && block->data != nullptr) {
+          thread_blocks[t].push_back(block);
+          successful_gets++;
+
+          // Verify block properties
+          EXPECT_NE(block->data, nullptr);
+          EXPECT_GE(block->size, 4096);
+          EXPECT_EQ(block->ref_cnt, 1);
+        }
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  // Verify we got the expected number of blocks
+  EXPECT_EQ(successful_gets.load(), num_threads * blocks_per_thread);
+
+  // Verify all blocks have unique data pointers (no double allocation)
+  std::vector<Block::DataPtr> all_data_ptrs;
+  for (const auto& blocks : thread_blocks) {
+    for (const auto& block : blocks) {
+      all_data_ptrs.push_back(block->data);
+    }
+  }
+  std::sort(all_data_ptrs.begin(), all_data_ptrs.end());
+  auto last = std::unique(all_data_ptrs.begin(), all_data_ptrs.end());
+  EXPECT_EQ(last - all_data_ptrs.begin(), num_threads * blocks_per_thread);
+
+  // Clean up - return all blocks
+  for (auto& blocks : thread_blocks) {
+    for (auto* block : blocks) {
+      getBufferHub()->putBlock(block);
+    }
+  }
+}
+
+TEST_F(CPUBufferHubTest, ConcurrentPutBlock) {
+  const int num_threads = 15;
+  const int blocks_per_thread = 4;
+  std::vector<std::thread> threads;
+  std::vector<std::vector<BlockRawPtr>> thread_blocks(num_threads);
+
+  // First, get blocks in a single-threaded manner
+  for (int t = 0; t < num_threads; ++t) {
+    for (int i = 0; i < blocks_per_thread; ++i) {
+      auto* block = getBufferHub()->getBlock(Size(2048));  // 2KB blocks
+      ASSERT_NE(block, nullptr);
+      thread_blocks[t].push_back(block);
+    }
+  }
+
+  std::atomic<int> successful_puts {0};
+
+  // Now return blocks concurrently from multiple threads
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &thread_blocks, &successful_puts]() {
+      for (auto* block : thread_blocks[t]) {
+        EXPECT_EQ(block->ref_cnt, 1);
+        getBufferHub()->putBlock(block);
+        successful_puts++;
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread);
+
+  // Verify blocks are returned properly by checking ref_cnt
+  for (const auto& blocks : thread_blocks) {
+    for (const auto* block : blocks) {
+      EXPECT_EQ(block->ref_cnt, 0);
+    }
+  }
+}
+
+TEST_F(CPUBufferHubTest, ConcurrentPutBlockFromBuffer) {
+  const int num_threads = 12;
+  const int blocks_per_thread = 3;
+  std::vector<std::thread> threads;
+  std::vector<std::vector<Buffer>> thread_buffers(num_threads);
+
+  // First, get blocks and create buffers in a single-threaded manner
+  for (int t = 0; t < num_threads; ++t) {
+    for (int i = 0; i < blocks_per_thread; ++i) {
+      auto* block = getBufferHub()->getBlock(Size(8192));  // 8KB blocks
+      ASSERT_NE(block, nullptr);
+
+      Buffer buffer;
+      buffer.data = block->data;
+      buffer.size = block->size;
+      buffer.device_type = DeviceType::CPU;
+      thread_buffers[t].push_back(buffer);
+    }
+  }
+
+  std::atomic<int> successful_puts {0};
+
+  // Now return buffers concurrently from multiple threads
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &thread_buffers, &successful_puts]() {
+      for (auto& buffer : thread_buffers[t]) {
+        EXPECT_NE(buffer.data, nullptr);
+        EXPECT_NE(buffer.size, 0);
+
+        getBufferHub()->putBlockFromBuffer(buffer);
+
+        // Verify buffer was cleared
+        EXPECT_EQ(buffer.data, nullptr);
+        EXPECT_EQ(buffer.size, 0);
+
+        successful_puts++;
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  EXPECT_EQ(successful_puts.load(), num_threads * blocks_per_thread);
+}
+
+// Mixed concurrent operations test
+TEST_F(CPUBufferHubTest, ConcurrentMixedOperations) {
+  const int num_threads = 16;
+  std::vector<std::thread> threads;
+  std::atomic<int> total_operations {0};
+
+  // Mix of get and put operations happening concurrently
+  for (int t = 0; t < num_threads; ++t) {
+    threads.emplace_back([this, t, &total_operations]() {
+      std::vector<BlockRawPtr> blocks;
+
+      // Perform alternating get and put operations
+      for (int i = 0; i < 10; ++i) {
+        // Get a block
+        auto* block = getBufferHub()->getBlock(Size(1024 * (t % 4 + 1)));  // Varying sizes
+        if (block != nullptr) {
+          EXPECT_NE(block->data, nullptr);
+          EXPECT_EQ(block->ref_cnt, 1);
+          blocks.push_back(block);
+          total_operations++;
+        }
+
+        // Return a previously acquired block if we have any
+        if (!blocks.empty() && i % 3 == 0) {
+          auto* return_block = blocks.back();
+          blocks.pop_back();
+          getBufferHub()->putBlock(return_block);
+          // Note: Don't check ref_cnt here as it's being modified concurrently
+          total_operations++;
+        }
+      }
+
+      // Clean up remaining blocks
+      for (auto* block : blocks) {
+        getBufferHub()->putBlock(block);
+        total_operations++;
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  // Verify operations completed
+  EXPECT_GT(total_operations.load(), 0);
+}
diff --git a/test/source/buffer_manager_test.cpp b/test/source/buffer_manager_test.cpp
index 280606f..be22c98 100644
--- a/test/source/buffer_manager_test.cpp
+++ b/test/source/buffer_manager_test.cpp
@@ -5,49 +5,47 @@
 using namespace nova_llm;
 
 class BufferManagerTest : public ::testing::Test {
-protected:
-    void SetUp() override {
-        BufferManager::Config config;
-        //set config
-        config.device_flags.set(DeviceType::CPU);
-        config.cpu.alloc = std::make_shared<CPUAllocator>();
-        #if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON
-        config.device_flags.set(DeviceType::CUDA);
-        config.gpu.alloc = std::make_shared<CUDAAllocator>();
-        #endif
-        
-        BufferManager::Builder::build(config);
-    }
-
-    void TearDown() override {
-        BufferManager::Builder::getInstance().~BufferManager();
-    }
+ protected:
+  void SetUp() override {
+    BufferManager::Config config;
+    // set config
+    config.device_flags.set(DeviceType::CPU);
+    config.cpu.alloc = std::make_shared<CPUAllocator>();
+#if defined(NOVA_LLM_CUDA_ON) && NOVA_LLM_CUDA_ON
+    config.device_flags.set(DeviceType::CUDA);
+    config.gpu.alloc = std::make_shared<CUDAAllocator>();
+#endif
+
+    BufferManager::Builder::build(config);
+  }
+
+  void TearDown() override { BufferManager::Builder::getInstance().destroy(); }
 };
 
-TEST(BufferManagerTest, Init) {
-    auto& buffer_manager = BufferManager::Builder::getInstance();
-    EXPECT_TRUE(buffer_manager.isInited());
+TEST_F(BufferManagerTest, Init) {
+  auto& buffer_manager = BufferManager::Builder::getInstance();
+  EXPECT_TRUE(buffer_manager.isInited());
 }
 
-TEST(BufferManagerTest, FetchCpu) {
-    auto& buffer_manager = BufferManager::Builder::getInstance();
+TEST_F(BufferManagerTest, FetchCpu) {
+  auto& buffer_manager = BufferManager::Builder::getInstance();
 
-    auto buffer = buffer_manager.fetch(1024, DeviceType::CPU);
+  auto buffer = buffer_manager.fetch(1024, DeviceType::CPU);
 
-    EXPECT_NE(buffer.data, nullptr);
-    EXPECT_EQ(buffer.size, 1024);
-    EXPECT_EQ(buffer.device_type, DeviceType::CPU);
+  EXPECT_NE(buffer.data, nullptr);
+  EXPECT_GE(buffer.size, 1024);  // Size should be at least requested (may be rounded up to next level)
+  EXPECT_EQ(buffer.device_type, DeviceType::CPU);
 
-    buffer_manager.put(buffer);
+  buffer_manager.put(buffer);
 }
 
-TEST(BufferManagerTest, PutCpu) {
-    auto& buffer_manager = BufferManager::Builder::getInstance();
+TEST_F(BufferManagerTest, PutCpu) {
+  auto& buffer_manager = BufferManager::Builder::getInstance();
 
-    auto buffer = buffer_manager.fetch(1024, DeviceType::CPU);
+  auto buffer = buffer_manager.fetch(1024, DeviceType::CPU);
 
-    buffer_manager.put(buffer);
-    EXPECT_EQ(buffer.data, nullptr);
-    EXPECT_EQ(buffer.size, 0);
-    EXPECT_EQ(buffer.device_type, DeviceType::CPU);
-}
\ No newline at end of file
+  buffer_manager.put(buffer);
+  EXPECT_EQ(buffer.data, nullptr);
+  EXPECT_EQ(buffer.size, 0);
+  EXPECT_EQ(buffer.device_type, DeviceType::CPU);
+}
diff --git a/test/source/tensor_test.cpp b/test/source/tensor_test.cpp
index c03e51a..189eeac 100644
--- a/test/source/tensor_test.cpp
+++ b/test/source/tensor_test.cpp
@@ -1,4 +1,5 @@
 #include "NovaLLM/data/tensor.h"
+#include "NovaLLM/memory/buffer_manager.h"
 
 #include <gtest/gtest.h>
 
@@ -7,11 +8,17 @@ using namespace nova_llm;
 class TensorTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    // 测试前的设置
+    BufferManager::Config config;
+    // set config
+    config.device_flags.set(DeviceType::CPU);
+    config.cpu.alloc = std::make_shared<CPUAllocator>();
+    // Use a smaller size for testing if needed, or default 4GB
+    // But since BufferManager is a singleton, we must ensure it's initialized.
+    BufferManager::Builder::build(config);
   }
 
   void TearDown() override {
-    // 测试后的清理
+    BufferManager::Builder::getInstance().destroy();
   }
 };
 
@@ -38,11 +45,12 @@ TEST_F(TensorTest, ConstructWithDims) {
 // 测试非法维度
 TEST_F(TensorTest, InvalidDimensions) {
   std::vector<uint32_t> empty_dims;
-  EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU),
-               std::invalid_argument);
+  // ASSERT macro throws std::runtime_error
+  EXPECT_THROW(Tensor tensor(empty_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error);
 
   std::vector<uint32_t> zero_dims = {2, 0, 4};
-  EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::invalid_argument);
+  // ASSERT macro throws std::runtime_error
+  EXPECT_THROW(Tensor tensor(zero_dims, DataType::FLOAT32, DeviceType::CPU), std::runtime_error);
 }
 
 // 测试拷贝构造
@@ -77,4 +85,4 @@ TEST_F(TensorTest, MemoryAllocation) {
 
   EXPECT_NE(tensor.data(), nullptr);
   EXPECT_EQ(tensor.totalElements(), 6);
-}
\ No newline at end of file
+}