diff --git a/.env.example b/.env.example
index 04609c9..5e349dd 100644
--- a/.env.example
+++ b/.env.example
@@ -243,3 +243,76 @@ GRAPH_STORE_MAX_HOPS=3
 # GRAPH_STORE_CUSTOM_EXTRACT_RELATIONS_PROMPT=
 # GRAPH_STORE_CUSTOM_UPDATE_GRAPH_PROMPT=
 # GRAPH_STORE_CUSTOM_DELETE_RELATIONS_PROMPT=
+
+# =============================================================================
+# 12. PowerMem HTTP API Server Configuration
+# =============================================================================
+# Configuration for the PowerMem HTTP API Server
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Server Settings
+# -----------------------------------------------------------------------------
+# Server host address (0.0.0.0 to listen on all interfaces)
+POWERMEM_SERVER_HOST=0.0.0.0
+
+# Server port number
+POWERMEM_SERVER_PORT=8000
+
+# Number of worker processes (only used when reload=false)
+POWERMEM_SERVER_WORKERS=4
+
+# Enable auto-reload for development (true/false)
+POWERMEM_SERVER_RELOAD=false
+
+# -----------------------------------------------------------------------------
+# Authentication Settings
+# -----------------------------------------------------------------------------
+# Enable API key authentication (true/false)
+POWERMEM_SERVER_AUTH_ENABLED=false
+
+# API keys (comma-separated list)
+# Example: POWERMEM_SERVER_API_KEYS=key1,key2,key3
+POWERMEM_SERVER_API_KEYS=
+
+# -----------------------------------------------------------------------------
+# Rate Limiting Settings
+# -----------------------------------------------------------------------------
+# Enable rate limiting (true/false)
+POWERMEM_SERVER_RATE_LIMIT_ENABLED=true
+
+# Rate limit per minute per IP address
+POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE=100
+
+# -----------------------------------------------------------------------------
+# Logging Settings
+# -----------------------------------------------------------------------------
+POWERMEM_SERVER_LOG_FILE=server.log
+
+# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
+POWERMEM_SERVER_LOG_LEVEL=INFO
+
+# Log format: json or text
+POWERMEM_SERVER_LOG_FORMAT=text
+
+# -----------------------------------------------------------------------------
+# API Settings
+# -----------------------------------------------------------------------------
+# API title (shown in Swagger UI)
+POWERMEM_SERVER_API_TITLE=PowerMem API
+
+# API version
+POWERMEM_SERVER_API_VERSION=v1
+
+# API description (shown in Swagger UI)
+POWERMEM_SERVER_API_DESCRIPTION=PowerMem HTTP API Server - Intelligent Memory System
+
+# -----------------------------------------------------------------------------
+# CORS Settings
+# -----------------------------------------------------------------------------
+# Enable CORS (true/false)
+POWERMEM_SERVER_CORS_ENABLED=true
+
+# CORS allowed origins (comma-separated, use * for all origins)
+# Example: POWERMEM_SERVER_CORS_ORIGINS=http://localhost:3000,https://example.com
+POWERMEM_SERVER_CORS_ORIGINS=*
diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml
new file mode 100644
index 0000000..ff4ee1d
--- /dev/null
+++ b/.github/workflows/build_docker.yml
@@ -0,0 +1,140 @@
+name: build powermem-server docker
+
+on: 
+  push:
+    branches: [main, develop]
+    tags:
+      - 'v*'
+  pull_request:
+    branches: [main, develop]
+
+jobs:
+  build-docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Get SDK version
+        id: sdk-version
+        run: |
+          # Extract version from version.py file
+          VERSION=$(grep -E '^__version__\s*=' src/powermem/version.py | sed -E "s/^__version__\s*=\s*['\"](.*)['\"]/\1/")
+          echo "SDK version: $VERSION"
+          echo "sdk-version=$VERSION" >> $GITHUB_OUTPUT
+          echo "image-tag=$VERSION" >> $GITHUB_OUTPUT
+          echo "Using SDK version ($VERSION) as Docker image tag"
+
+      - name: Determine if should push
+        id: should-push
+        run: |
+          EVENT_NAME="${{ github.event_name }}"
+          REF="${{ github.ref }}"
+          
+          echo "Event name: $EVENT_NAME"
+          echo "Ref: $REF"
+          
+          # Only push when it's a push event and the ref starts with refs/tags/v
+          if [[ "$EVENT_NAME" == "push" ]] && [[ "$REF" == refs/tags/v* ]]; then
+            echo "should-push=true" >> $GITHUB_OUTPUT
+            echo "Will push to remote repository"
+          else
+            echo "should-push=false" >> $GITHUB_OUTPUT
+            echo "Will save as artifacts"
+          fi
+          
+          # Debug: verify the output
+          echo "Output should-push value:"
+          grep should-push $GITHUB_OUTPUT || echo "No should-push found in output"
+
+      - name: Log in to Docker hub (for tag push only)
+        if: steps.should-push.outputs.should-push == 'true'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push Docker image (for tags)
+        if: steps.should-push.outputs.should-push == 'true'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          file: docker/Dockerfile
+          push: true
+          tags: |
+            ${{ vars.DOCKER_PUSH_BASE }}/powermem-server:latest
+            ${{ vars.DOCKER_PUSH_BASE }}/powermem-server:${{ steps.sdk-version.outputs.image-tag }}
+
+      - name: Debug should-push output
+        run: |
+          echo "should-push output value: '${{ steps.should-push.outputs.should-push }}'"
+          echo "Condition check: should-push == 'false'"
+          if [ "${{ steps.should-push.outputs.should-push }}" == "false" ]; then
+            echo "✓ Condition is true, will execute build step"
+          else
+            echo "✗ Condition is false, step will be skipped"
+          fi
+
+      - name: Build and save Docker images (for PR and branches)
+        if: steps.should-push.outputs.should-push != 'true'
+        run: |
+          mkdir -p docker-images
+          
+          IMAGE_NAME="powermem-server"
+          SDK_VERSION="${{ steps.sdk-version.outputs.sdk-version }}"
+          IMAGE_VERSION="${{ steps.sdk-version.outputs.image-tag }}"
+          
+          echo "SDK version: $SDK_VERSION"
+          echo "Image version tag: $IMAGE_VERSION"
+          
+          # Build and save for each platform
+          for platform in linux/amd64 linux/arm64; do
+            platform_suffix=$(echo $platform | tr '/' '-')
+            image_tag="${IMAGE_NAME}:${IMAGE_VERSION}"
+            output_file="docker-images/${IMAGE_NAME}-${IMAGE_VERSION}-${platform_suffix}.tar"
+            
+            echo "Building image for platform: $platform"
+            
+            # Build the image for the specific platform and load it
+            docker buildx build \
+              . \
+              --file docker/Dockerfile \
+              --platform $platform \
+              --load \
+              --tag ${image_tag}
+            
+            # Save the image as tar file
+            echo "Saving image to: ${output_file}"
+            docker save ${image_tag} -o ${output_file}
+            
+            # Verify the file was created
+            if [ -f "${output_file}" ]; then
+              echo "✓ Successfully saved: ${output_file} ($(du -h ${output_file} | cut -f1))"
+            else
+              echo "✗ Failed to save: ${output_file}"
+              exit 1
+            fi
+          done
+          
+          # List all saved files
+          echo ""
+          echo "All saved Docker images:"
+          ls -lh docker-images/
+          echo ""
+          echo "To load these images, use: docker load -i <tar-file>"
+
+      - name: Upload Docker image artifacts (for PR and branches)
+        if: steps.should-push.outputs.should-push != 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: powermem-server-docker-images
+          path: docker-images/*.tar
+          retention-days: 30
+          if-no-files-found: error
diff --git a/.gitignore b/.gitignore
index 031315c..ff0dede 100644
--- a/.gitignore
+++ b/.gitignore
@@ -198,3 +198,23 @@ performance_logs/
 
 # Docker
 .dockerignore
+
+# PowerMem API Server
+.server.pid
+server.log
+*.pid
+server_*.log
+api_server.log
+
+# Server runtime files
+.pid
+*.pid.lock
+
+# Server data directories (if any)
+server_data/
+api_data/
+
+# Server backups
+server_backup/
+*.backup
+
diff --git a/Makefile b/Makefile
index 9d270c0..85a6d71 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help install install-dev test test-unit test-integration test-e2e test-coverage test-fast test-slow lint format clean build build-package build-check publish-pypi publish-testpypi install-build-tools upload docs bump-version
+.PHONY: help install install-dev test test-unit test-integration test-e2e test-coverage test-fast test-slow lint format clean build build-package build-check publish-pypi publish-testpypi install-build-tools upload docs bump-version server-start server-stop server-restart server-status server-logs docker-build docker-run docker-up docker-down docker-logs docker-stop docker-restart docker-clean docker-ps
 
 help: ## Show help information
 	@echo "powermem Project Build Tools"
@@ -190,6 +190,259 @@ bump-version: ## Bump version number (usage: make bump-version VERSION=0.2.0)
 	@echo ""
 	@echo "Note: Don't forget to update VERSION_HISTORY in src/powermem/version.py manually!"
 
+# Server management
+SERVER_PID_FILE := .server.pid
+
+# Load server configuration from .env file if it exists
+# This allows users to configure POWERMEM_SERVER_PORT, POWERMEM_SERVER_HOST, etc. in .env
+# Read from .env file, stripping quotes and whitespace
+ENV_SERVER_HOST := $(shell grep -E '^POWERMEM_SERVER_HOST=' .env 2>/dev/null | cut -d '=' -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$$//' | sed "s/^['\"]//;s/['\"]$$//")
+ENV_SERVER_PORT := $(shell grep -E '^POWERMEM_SERVER_PORT=' .env 2>/dev/null | cut -d '=' -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$$//' | sed "s/^['\"]//;s/['\"]$$//")
+ENV_SERVER_WORKERS := $(shell grep -E '^POWERMEM_SERVER_WORKERS=' .env 2>/dev/null | cut -d '=' -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$$//' | sed "s/^['\"]//;s/['\"]$$//")
+
+# Use values from .env if they exist and are non-empty, otherwise use defaults
+SERVER_HOST := $(or $(ENV_SERVER_HOST),0.0.0.0)
+SERVER_PORT := $(or $(ENV_SERVER_PORT),8000)
+SERVER_WORKERS := $(or $(ENV_SERVER_WORKERS),4)
+
+server-start: ## Start the PowerMem API server
+	@echo "Starting PowerMem API server..."
+	@if [ -f $(SERVER_PID_FILE) ]; then \
+		echo "Server is already running (PID: $$(cat $(SERVER_PID_FILE)))"; \
+		echo "Use 'make server-stop' to stop it first, or 'make server-restart' to restart"; \
+		exit 1; \
+	fi
+	@powermem-server --host $(SERVER_HOST) --port $(SERVER_PORT) --workers $(SERVER_WORKERS) > /dev/null 2>&1 & \
+	echo $$! > $(SERVER_PID_FILE); \
+	echo "Server started with PID: $$!"; \
+	echo "Server running at http://$(SERVER_HOST):$(SERVER_PORT)"; \
+	echo "API docs available at http://$(SERVER_HOST):$(SERVER_PORT)/docs"; \
+	echo "Logs are being written to server.log (configured via POWERMEM_SERVER_LOG_FILE)"; \
+	echo "Use 'make server-stop' to stop the server"
+
+server-start-reload: ## Start the PowerMem API server with auto-reload (development mode)
+	@echo "Starting PowerMem API server with auto-reload..."
+	@if [ -f $(SERVER_PID_FILE) ]; then \
+		echo "Server is already running (PID: $$(cat $(SERVER_PID_FILE)))"; \
+		echo "Use 'make server-stop' to stop it first"; \
+		exit 1; \
+	fi
+	@powermem-server --host $(SERVER_HOST) --port $(SERVER_PORT) --reload > /dev/null 2>&1 & \
+	echo $$! > $(SERVER_PID_FILE); \
+	echo "Server started with PID: $$! (auto-reload enabled)"; \
+	echo "Server running at http://$(SERVER_HOST):$(SERVER_PORT)"; \
+	echo "API docs available at http://$(SERVER_HOST):$(SERVER_PORT)/docs"; \
+	echo "Logs are being written to server.log (configured via POWERMEM_SERVER_LOG_FILE)"; \
+	echo "Use 'make server-stop' to stop the server"
+
+server-stop: ## Stop the PowerMem API server
+	@if [ ! -f $(SERVER_PID_FILE) ]; then \
+		echo "Server PID file not found. Checking for running processes..."; \
+		PID=$$(lsof -t -i:$(SERVER_PORT) 2>/dev/null || echo ""); \
+		if [ -z "$$PID" ]; then \
+			echo "No server process found on port $(SERVER_PORT)"; \
+			exit 0; \
+		else \
+			echo "Found process $$PID on port $(SERVER_PORT), stopping..."; \
+			kill $$PID 2>/dev/null || kill -9 $$PID 2>/dev/null; \
+			echo "Server stopped"; \
+			exit 0; \
+		fi; \
+	fi
+	@PID=$$(cat $(SERVER_PID_FILE) 2>/dev/null || echo ""); \
+	if [ -z "$$PID" ]; then \
+		echo "PID file exists but is empty"; \
+		rm -f $(SERVER_PID_FILE); \
+		exit 0; \
+	fi; \
+	if ps -p $$PID > /dev/null 2>&1; then \
+		echo "Stopping server (PID: $$PID)..."; \
+		kill $$PID 2>/dev/null || kill -9 $$PID 2>/dev/null; \
+		sleep 1; \
+		if ps -p $$PID > /dev/null 2>&1; then \
+			echo "Force killing server (PID: $$PID)..."; \
+			kill -9 $$PID 2>/dev/null; \
+		fi; \
+		echo "Server stopped"; \
+	else \
+		echo "Server process (PID: $$PID) not found, cleaning up PID file"; \
+	fi; \
+	rm -f $(SERVER_PID_FILE); \
+	echo "✓ Server stopped"
+
+server-restart: server-stop server-start ## Restart the PowerMem API server
+	@echo "✓ Server restarted"
+
+server-status: ## Check the status of the PowerMem API server
+	@if [ -f $(SERVER_PID_FILE) ]; then \
+		PID=$$(cat $(SERVER_PID_FILE) 2>/dev/null || echo ""); \
+		if [ -z "$$PID" ]; then \
+			echo "Server PID file exists but is empty"; \
+			rm -f $(SERVER_PID_FILE); \
+			exit 1; \
+		fi; \
+		if ps -p $$PID > /dev/null 2>&1; then \
+			echo "✓ Server is running (PID: $$PID)"; \
+			echo "  URL: http://$(SERVER_HOST):$(SERVER_PORT)"; \
+			echo "  Docs: http://$(SERVER_HOST):$(SERVER_PORT)/docs"; \
+			echo "  Health: http://$(SERVER_HOST):$(SERVER_PORT)/api/v1/health"; \
+		else \
+			echo "✗ Server is not running (stale PID file)"; \
+			rm -f $(SERVER_PID_FILE); \
+			exit 1; \
+		fi; \
+	else \
+		PID=$$(lsof -t -i:$(SERVER_PORT) 2>/dev/null || echo ""); \
+		if [ -z "$$PID" ]; then \
+			echo "✗ Server is not running"; \
+			exit 1; \
+		else \
+			echo "✓ Server is running on port $(SERVER_PORT) (PID: $$PID)"; \
+			echo "  URL: http://$(SERVER_HOST):$(SERVER_PORT)"; \
+			echo "  Docs: http://$(SERVER_HOST):$(SERVER_PORT)/docs"; \
+		fi; \
+	fi
+
+server-logs: ## Show server logs (tail -f server.log)
+	@if [ ! -f server.log ]; then \
+		echo "No log file found (server.log)"; \
+		exit 1; \
+	fi
+	@tail -f server.log
+
+server-logs-last: ## Show last 50 lines of server logs
+	@if [ ! -f server.log ]; then \
+		echo "No log file found (server.log)"; \
+		exit 1; \
+	fi
+	@tail -n 50 server.log
+
+# Docker commands
+DOCKER_IMAGE := oceanbase/powermem-server
+DOCKER_TAG := latest
+DOCKER_COMPOSE_FILE := docker/docker-compose.yml
+
+docker-build: ## Build Docker image
+	@echo "Building Docker image $(DOCKER_IMAGE):$(DOCKER_TAG)..."
+	docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) -f docker/Dockerfile .
+	@echo "✓ Docker image built successfully"
+
+docker-build-mirror: ## Build Docker image with pip mirror source (usage: make docker-build-mirror MIRROR=tsinghua)
+	@if [ -z "$(MIRROR)" ]; then \
+		echo "Error: MIRROR is required. Usage: make docker-build-mirror MIRROR=tsinghua"; \
+		echo "Available mirrors: tsinghua, aliyun"; \
+		exit 1; \
+	fi
+	@case "$(MIRROR)" in \
+		tsinghua) \
+			PIP_URL="https://pypi.tuna.tsinghua.edu.cn/simple"; \
+			PIP_HOST="pypi.tuna.tsinghua.edu.cn"; \
+			DEBIAN_MIRROR="mirrors.tuna.tsinghua.edu.cn"; \
+			;; \
+		aliyun) \
+			PIP_URL="https://mirrors.aliyun.com/pypi/simple"; \
+			PIP_HOST="mirrors.aliyun.com"; \
+			DEBIAN_MIRROR="mirrors.aliyun.com"; \
+			;; \
+		*) \
+			echo "Error: Unknown mirror '$(MIRROR)'. Available: tsinghua, aliyun"; \
+			exit 1; \
+			;; \
+	esac; \
+	echo "Building Docker image $(DOCKER_IMAGE):$(DOCKER_TAG) with $(MIRROR) mirror..."; \
+	docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) -f docker/Dockerfile \
+		--build-arg PIP_INDEX_URL=$$PIP_URL \
+		--build-arg PIP_TRUSTED_HOST=$$PIP_HOST \
+		--build-arg DEBIAN_MIRROR=$$DEBIAN_MIRROR .
+	@echo "✓ Docker image built successfully with $(MIRROR) mirror"
+
+docker-build-tag: ## Build Docker image with custom tag (usage: make docker-build-tag TAG=v0.2.1)
+	@if [ -z "$(TAG)" ]; then \
+		echo "Error: TAG is required. Usage: make docker-build-tag TAG=v0.2.1"; \
+		exit 1; \
+	fi
+	@echo "Building Docker image $(DOCKER_IMAGE):$(TAG)..."
+	docker build -t $(DOCKER_IMAGE):$(TAG) -f docker/Dockerfile .
+	@echo "✓ Docker image built successfully with tag $(TAG)"
+
+docker-run: ## Run Docker container
+	@echo "Running Docker container..."
+	@if [ ! -f .env ]; then \
+		echo "Warning: .env file not found. Container will use default configuration."; \
+	fi
+	docker run -d \
+		--name powermem-server \
+		-p 8000:8000 \
+		-v $$(pwd)/.env:/app/.env:ro \
+		--env-file .env \
+		$(DOCKER_IMAGE):$(DOCKER_TAG) || \
+		(echo "Container may already exist. Use 'make docker-stop' first or 'make docker-restart'"; exit 1)
+	@echo "✓ Container started"
+	@echo "Server running at http://localhost:8000"
+	@echo "API docs at http://localhost:8000/docs"
+
+docker-up: ## Start services using docker-compose
+	@echo "Starting services with docker-compose..."
+	docker-compose -f $(DOCKER_COMPOSE_FILE) up -d
+	@echo "✓ Services started"
+	@echo "Server running at http://localhost:8000"
+	@echo "API docs at http://localhost:8000/docs"
+
+docker-down: ## Stop services using docker-compose
+	@echo "Stopping services with docker-compose..."
+	docker-compose -f $(DOCKER_COMPOSE_FILE) down
+	@echo "✓ Services stopped"
+
+docker-logs: ## Show Docker container logs (docker-compose)
+	@docker-compose -f $(DOCKER_COMPOSE_FILE) logs -f
+
+docker-logs-container: ## Show Docker container logs (single container)
+	@docker logs -f powermem-server 2>/dev/null || echo "Container 'powermem-server' not found. Use 'make docker-run' first."
+
+docker-stop: ## Stop Docker container
+	@echo "Stopping Docker container..."
+	@docker stop powermem-server 2>/dev/null && docker rm powermem-server 2>/dev/null && echo "✓ Container stopped and removed" || echo "Container not found or already stopped"
+
+docker-restart: docker-stop docker-run ## Restart Docker container
+	@echo "✓ Container restarted"
+
+docker-restart-compose: docker-down docker-up ## Restart services using docker-compose
+	@echo "✓ Services restarted"
+
+docker-ps: ## Show running Docker containers
+	@echo "Running containers:"
+	@docker ps --filter "name=powermem-server" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+
+docker-status: ## Check Docker container status
+	@if docker ps --filter "name=powermem-server" --format "{{.Names}}" | grep -q powermem-server; then \
+		echo "✓ Container is running"; \
+		docker ps --filter "name=powermem-server" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"; \
+	else \
+		echo "✗ Container is not running"; \
+		exit 1; \
+	fi
+
+docker-clean: ## Clean Docker resources (containers, images, volumes)
+	@echo "Cleaning Docker resources..."
+	@docker stop powermem-server 2>/dev/null || true
+	@docker rm powermem-server 2>/dev/null || true
+	@docker-compose -f $(DOCKER_COMPOSE_FILE) down -v 2>/dev/null || true
+	@echo "✓ Docker resources cleaned"
+
+docker-clean-all: ## Clean all Docker resources including images
+	@echo "Cleaning all Docker resources (including images)..."
+	@docker stop powermem-server 2>/dev/null || true
+	@docker rm powermem-server 2>/dev/null || true
+	@docker-compose -f $(DOCKER_COMPOSE_FILE) down -v 2>/dev/null || true
+	@docker rmi $(DOCKER_IMAGE):$(DOCKER_TAG) 2>/dev/null || true
+	@echo "✓ All Docker resources cleaned"
+
+docker-rebuild: docker-clean docker-build ## Rebuild Docker image from scratch
+	@echo "✓ Docker image rebuilt"
+
+docker-rebuild-up: docker-rebuild docker-up ## Rebuild and start services
+	@echo "✓ Docker image rebuilt and services started"
+
 # CI/CD helpers
 ci-test: install-test test-unit test-integration ## Run tests for CI (unit + integration)
 	@echo "✓ All CI tests passed"
diff --git a/README.md b/README.md
index 561bad4..c4fce50 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ In AI application development, enabling large language models to persistently "r
 pip install powermem
 ```
 
-### 💡 Basic Usage
+### 💡 Basic Usage(SDK)
 
 **✨ Simplest Way**: Create memory from `.env` file automatically! [Configuration Reference](.env.example)
 
@@ -99,6 +99,96 @@ for result in results.get('results', []):
 
 For more detailed examples and usage patterns, see the [Getting Started Guide](docs/guides/0001-getting_started.md).
 
+### 🌐 HTTP API Server
+
+PowerMem also provides a production-ready HTTP API server that exposes all core memory management capabilities through RESTful APIs. This enables any application that supports HTTP calls to integrate PowerMem's intelligent memory system, regardless of programming language.
+
+**Relationship with SDK**: The API server uses the same PowerMem SDK under the hood and shares the same configuration (`.env` file). It provides an HTTP interface to the same memory management features available in the Python SDK, making PowerMem accessible to non-Python applications.
+
+**Starting the API Server**:
+
+```bash
+# Method 1: Using CLI command (after pip install)
+powermem-server --host 0.0.0.0 --port 8000
+
+# Method 2: Using Docker
+# Build and run with Docker
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+
+# Or use Docker Compose (recommended)
+docker-compose -f docker/docker-compose.yml up -d
+
+```
+
+Once started, the API server provides:
+- RESTful API endpoints for all memory operations
+- Interactive API documentation at `http://localhost:8000/docs`
+- API Key authentication and rate limiting support
+- Same configuration as SDK (via `.env` file)
+
+For complete API documentation and usage examples, see the [API Server Documentation](docs/api/0005-api_server.md).
+
+### 🔌 MCP Server
+
+PowerMem also provides a Model Context Protocol (MCP) server that enables integration with MCP-compatible clients such as Claude Desktop. The MCP server exposes PowerMem's memory management capabilities through the MCP protocol, allowing AI assistants to access and manage memories seamlessly.
+
+**Relationship with SDK**: The MCP server uses the same PowerMem SDK and shares the same configuration (`.env` file). It provides an MCP interface to the same memory management features, making PowerMem accessible to MCP-compatible AI assistants.
+
+**Installation**:
+
+```bash
+# Install PowerMem (required)
+pip install powermem
+
+# Install uvx (if not already installed)
+# On macOS/Linux:
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# On Windows:
+powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+```
+
+**Starting the MCP Server**:
+
+```bash
+# SSE mode (recommended, default port 8000)
+uvx powermem-mcp sse
+
+# SSE mode with custom port
+uvx powermem-mcp sse 8001
+
+# Stdio mode
+uvx powermem-mcp stdio
+
+# Streamable HTTP mode (default port 8000)
+uvx powermem-mcp streamable-http
+
+# Streamable HTTP mode with custom port
+uvx powermem-mcp streamable-http 8001
+```
+
+**Integration with Claude Desktop**:
+
+Add the following configuration to your Claude Desktop config file:
+
+```json
+{
+  "mcpServers": {
+    "powermem": {
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
+```
+
+The MCP server provides tools for memory management including adding, searching, updating, and deleting memories. For complete MCP documentation and usage examples, see the [MCP Server Documentation](docs/api/0004-mcp.md).
+
 ## 🔗 Integrations & Demos
 
 - 🔗 **LangChain Integration**: Build medical support chatbot using LangChain + PowerMem + OceanBase, [View Example](examples/langchain/README.md)
@@ -118,10 +208,11 @@ For more detailed examples and usage patterns, see the [Getting Started Guide](d
 
 ## ⭐ Highlights Release Notes
 
-| Version | Iteration Period | Release Date | Function |
-|---------|--------|-------|---------|
-| 0.2.0 | 2025.12 | 2025.12.16 | <ul><li>Advanced user profile management, supporting "personalized experience" for AI applications</li><li>Expanded multimodal support, including text, image, and audio memory</li></ul> |
-| 0.1.0 | 2025.11 | 2025.11.14 | <ul><li>Core memory management functionality, supporting persistent storage of memories</li><li>Hybrid retrieval supporting vector, full-text, and graph search</li><li>Intelligent memory extraction based on LLM fact extraction</li><li>Full lifecycle memory management supporting Ebbinghaus forgetting curve</li><li>Multi-Agent memory management support</li><li>Multiple storage backend support (OceanBase, PostgreSQL, SQLite)</li><li>Support for knowledge graph retrieval through multi-hop graph search</li></ul> |
+| Version | Release Date | Function |
+|---------|-------|---------|
+| 0.3.0 | TBD | <ul><li>Production-ready HTTP API Server with RESTful endpoints for all memory operations</li><li>Docker support for easy deployment and containerization</li></ul> |
+| 0.2.0 | 2025.12.16 | <ul><li>Advanced user profile management, supporting "personalized experience" for AI applications</li><li>Expanded multimodal support, including text, image, and audio memory</li></ul> |
+| 0.1.0 | 2025.11.14 | <ul><li>Core memory management functionality, supporting persistent storage of memories</li><li>Hybrid retrieval supporting vector, full-text, and graph search</li><li>Intelligent memory extraction based on LLM fact extraction</li><li>Full lifecycle memory management supporting Ebbinghaus forgetting curve</li><li>Multi-Agent memory management support</li><li>Multiple storage backend support (OceanBase, PostgreSQL, SQLite)</li><li>Support for knowledge graph retrieval through multi-hop graph search</li></ul> |
 
 ## 💬 Support
 
diff --git a/README_CN.md b/README_CN.md
index b26bfa7..02953ee 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -78,7 +78,7 @@
 pip install powermem
 ```
 
-### 💡 基本使用
+### 💡 基本使用（SDK）
 
 **✨ 最简单的方式**：从 `.env` 文件读取配置自动创建记忆！[配置文件参考](.env.example)
 
@@ -100,6 +100,96 @@ for result in results.get('results', []):
 
 更多详细示例和使用模式，请参阅[入门指南](docs/guides/0001-getting_started.md)。
 
+### 🌐 HTTP API Server
+
+PowerMem 还提供了生产就绪的 HTTP API 服务器，通过 RESTful API 暴露所有核心记忆管理功能。这使得任何支持 HTTP 调用的应用程序都能集成 PowerMem 的智能记忆系统，无论使用何种编程语言。
+
+**与 SDK 的关系**：API 服务器底层使用相同的 PowerMem SDK，并共享相同的配置（`.env` 文件）。它提供了与 Python SDK 相同的记忆管理功能的 HTTP 接口，使 PowerMem 可供非 Python 应用程序使用。
+
+**启动 API 服务器**：
+
+```bash
+# 方法 1：使用 CLI 命令（pip 安装后）
+powermem-server --host 0.0.0.0 --port 8000
+
+# 方法 2：使用 Docker
+# 构建并运行 Docker 容器
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+
+# 或使用 Docker Compose（推荐）
+docker-compose -f docker/docker-compose.yml up -d
+
+```
+
+启动后，API 服务器提供：
+- 所有记忆操作的 RESTful API 端点
+- 交互式 API 文档，访问 `http://localhost:8000/docs`
+- API Key 认证和限流支持
+- 与 SDK 相同的配置（通过 `.env` 文件）
+
+完整的 API 文档和使用示例，请参阅 [API 服务器文档](docs/api/0005-api_server.md)。
+
+### 🔌 MCP Server
+
+PowerMem 还提供了模型上下文协议（MCP）服务器，支持与 Claude Desktop 等 MCP 兼容客户端集成。MCP 服务器通过 MCP 协议暴露 PowerMem 的记忆管理功能，使 AI 助手能够无缝访问和管理记忆。
+
+**与 SDK 的关系**：MCP 服务器使用相同的 PowerMem SDK 并共享相同的配置（`.env` 文件）。它提供了与 Python SDK 相同的记忆管理功能的 MCP 接口，使 PowerMem 可供 MCP 兼容的 AI 助手使用。
+
+**安装**：
+
+```bash
+# 安装 PowerMem（必需）
+pip install powermem
+
+# 安装 uvx（如果尚未安装）
+# 在 macOS/Linux 上：
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# 在 Windows 上：
+powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+```
+
+**启动 MCP 服务器**：
+
+```bash
+# SSE 模式（推荐，默认端口 8000）
+uvx powermem-mcp sse
+
+# SSE 模式，自定义端口
+uvx powermem-mcp sse 8001
+
+# Stdio 模式
+uvx powermem-mcp stdio
+
+# Streamable HTTP 模式（默认端口 8000）
+uvx powermem-mcp streamable-http
+
+# Streamable HTTP 模式，自定义端口
+uvx powermem-mcp streamable-http 8001
+```
+
+**与 Claude Desktop 集成**：
+
+在 Claude Desktop 配置文件中添加以下配置：
+
+```json
+{
+  "mcpServers": {
+    "powermem": {
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
+```
+
+MCP 服务器提供记忆管理工具，包括添加、搜索、更新和删除记忆。完整的 MCP 文档和使用示例，请参阅 [MCP 服务器文档](docs/api/0004-mcp.md)。
+
 ## 🔗 集成与演示
 
 - 🔗 **LangChain 集成**：基于 LangChain + PowerMem + OceanBase 构建医疗支持机器人，[查看示例](examples/langchain/README.md)
@@ -119,10 +209,11 @@ for result in results.get('results', []):
 
 ## ⭐ 重点发布说明
 
-| Version | Iteration Period | Release Date | Function |
-|---------|--------|-------|---------|
-| 0.2.0 | 2025.12 | 2025.12.16 | <ul><li>高级用户画像管理，支持 AI 应用的“千人千面”</li><li>扩展多模态支持，包括文本、图像和音频记忆</li></ul> |
-| 0.1.0 | 2025.11 | 2025.11.14 | <ul><li>核心记忆管理功能，支持持久化存储记忆</li><li>支持向量、全文和图的混合检索</li><li>基于 LLM 的事实提取智能记忆</li><li>支持基于艾宾浩斯遗忘曲线的全生命周期记忆管理</li><li>支持 Multi-Agent 记忆管理</li><li>多存储后端支持（OceanBase、PostgreSQL、SQLite）</li><li>支持通过多跳图检索的方式处理知识图谱的检索</li></ul> |
+| Version | Release Date | Function |
+|---------|-------|---------|
+| 0.3.0 |  2026.01.09 | <ul><li>生产就绪的 HTTP API 服务器，提供所有记忆操作的 RESTful 端点</li><li>Docker 支持，便于部署和容器化</li>></ul> |
+| 0.2.0 | 2025.12.16 | <ul><li>高级用户画像管理，支持 AI 应用的"千人千面"</li><li>扩展多模态支持，包括文本、图像和音频记忆</li></ul> |
+| 0.1.0 | 2025.11.14 | <ul><li>核心记忆管理功能，支持持久化存储记忆</li><li>支持向量、全文和图的混合检索</li><li>基于 LLM 的事实提取智能记忆</li><li>支持基于艾宾浩斯遗忘曲线的全生命周期记忆管理</li><li>支持 Multi-Agent 记忆管理</li><li>多存储后端支持（OceanBase、PostgreSQL、SQLite）</li><li>支持通过多跳图检索的方式处理知识图谱的检索</li></ul> |
 
 ## 💬 支持
 
diff --git a/README_JP.md b/README_JP.md
index 5d08a32..ee185ce 100644
--- a/README_JP.md
+++ b/README_JP.md
@@ -77,7 +77,7 @@ AI アプリケーション開発において、大規模言語モデルが履
 pip install powermem
 ```
 
-### 💡 基本的な使用方法
+### 💡 基本的な使用方法（SDK）
 
 **✨ 最も簡単な方法**：`.env` ファイルから自動的にメモリを作成！[設定ファイル参照](.env.example)
 
@@ -99,6 +99,96 @@ for result in results.get('results', []):
 
 より詳細な例と使用パターンについては、[はじめにガイド](docs/guides/0001-getting_started.md) を参照してください。
 
+### 🌐 HTTP API Server
+
+PowerMem は、すべてのコアメモリ管理機能を RESTful API を通じて公開する本番環境対応の HTTP API サーバーも提供します。これにより、HTTP 呼び出しをサポートする任意のアプリケーションが、プログラミング言語に関係なく PowerMem のインテリジェントメモリシステムを統合できます。
+
+**SDK との関係**：API サーバーは、内部で同じ PowerMem SDK を使用し、同じ設定（`.env` ファイル）を共有します。Python SDK で利用可能な同じメモリ管理機能への HTTP インターフェースを提供し、PowerMem を非 Python アプリケーションでも利用可能にします。
+
+**API サーバーの起動**：
+
+```bash
+# 方法 1：CLI コマンドを使用（pip インストール後）
+powermem-server --host 0.0.0.0 --port 8000
+
+# 方法 2：Docker を使用
+# Docker イメージをビルドして実行
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+
+# または Docker Compose を使用（推奨）
+docker-compose -f docker/docker-compose.yml up -d
+
+```
+
+起動後、API サーバーは以下を提供します：
+- すべてのメモリ操作の RESTful API エンドポイント
+- インタラクティブな API ドキュメント、`http://localhost:8000/docs` でアクセス可能
+- API Key 認証とレート制限サポート
+- SDK と同じ設定（`.env` ファイル経由）
+
+完全な API ドキュメントと使用例については、[API サーバードキュメント](docs/api/0005-api_server.md) を参照してください。
+
+### 🔌 MCP Server
+
+PowerMem は、Claude Desktop などの MCP 互換クライアントとの統合を可能にするモデルコンテキストプロトコル（MCP）サーバーも提供します。MCP サーバーは、MCP プロトコルを通じて PowerMem のメモリ管理機能を公開し、AI アシスタントがシームレスにメモリにアクセスして管理できるようにします。
+
+**SDK との関係**：MCP サーバーは、同じ PowerMem SDK を使用し、同じ設定（`.env` ファイル）を共有します。Python SDK で利用可能な同じメモリ管理機能への MCP インターフェースを提供し、PowerMem を MCP 互換の AI アシスタントでも利用可能にします。
+
+**インストール**：
+
+```bash
+# PowerMem をインストール（必須）
+pip install powermem
+
+# uvx をインストール（まだインストールされていない場合）
+# macOS/Linux の場合：
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Windows の場合：
+powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+```
+
+**MCP サーバーの起動**：
+
+```bash
+# SSE モード（推奨、デフォルトポート 8000）
+uvx powermem-mcp sse
+
+# SSE モード、カスタムポート
+uvx powermem-mcp sse 8001
+
+# Stdio モード
+uvx powermem-mcp stdio
+
+# Streamable HTTP モード（デフォルトポート 8000）
+uvx powermem-mcp streamable-http
+
+# Streamable HTTP モード、カスタムポート
+uvx powermem-mcp streamable-http 8001
+```
+
+**Claude Desktop との統合**：
+
+Claude Desktop 設定ファイルに次の設定を追加します：
+
+```json
+{
+  "mcpServers": {
+    "powermem": {
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
+```
+
+MCP サーバーは、メモリの追加、検索、更新、削除を含むメモリ管理ツールを提供します。完全な MCP ドキュメントと使用例については、[MCP サーバードキュメント](docs/api/0004-mcp.md) を参照してください。
+
 ## 🔗 統合とデモ
 
 - 🔗 **LangChain 統合**: LangChain + PowerMem + OceanBase を使用して医療サポートロボットを構築、[例を参照](examples/langchain/README.md)
@@ -118,10 +208,11 @@ for result in results.get('results', []):
 
 ## ⭐ ハイライト リリースノート
 
-| Version | Iteration Period | Release Date | Function |
-|---------|--------|-------|---------|
-| 0.2.0 | 2025.12 | 2025.12.16 | <ul><li>高度なユーザープロフィール管理、AI アプリケーションの「千人千面」をサポート</li><li>テキスト、画像、音声メモリを含む拡張マルチモーダルサポート</li></ul> |
-| 0.1.0 | 2025.11 | 2025.11.14 | <ul><li>コアメモリ管理機能、メモリの永続ストレージをサポート</li><li>ベクトル、全文、グラフ検索をサポートするハイブリッド検索</li><li>LLM ベースの事実抽出によるインテリジェントメモリ抽出</li><li>エビングハウス忘却曲線に基づく全ライフサイクルメモリ管理をサポート</li><li>Multi-Agent メモリ管理をサポート</li><li>複数のストレージバックエンドサポート（OceanBase、PostgreSQL、SQLite）</li><li>マルチホップグラフ検索による知識グラフの検索処理をサポート</li></ul> |
+| Version |  Release Date | Function |
+|---------|-------|---------|
+| 0.3.0 | 2026.01.09 | <ul><li>本番環境対応の HTTP API サーバー、すべてのメモリ操作の RESTful エンドポイントを提供</li><li>Docker サポート、簡単なデプロイとコンテナ化を実現</li></ul> |
+| 0.2.0 | 2025.12.16 | <ul><li>高度なユーザープロフィール管理、AI アプリケーションの「千人千面」をサポート</li><li>テキスト、画像、音声メモリを含む拡張マルチモーダルサポート</li></ul> |
+| 0.1.0 | 2025.11.14 | <ul><li>コアメモリ管理機能、メモリの永続ストレージをサポート</li><li>ベクトル、全文、グラフ検索をサポートするハイブリッド検索</li><li>LLM ベースの事実抽出によるインテリジェントメモリ抽出</li><li>エビングハウス忘却曲線に基づく全ライフサイクルメモリ管理をサポート</li><li>Multi-Agent メモリ管理をサポート</li><li>複数のストレージバックエンドサポート（OceanBase、PostgreSQL、SQLite）</li><li>マルチホップグラフ検索による知識グラフの検索処理をサポート</li></ul> |
 
 
 ## 💬 サポート
diff --git a/docker/DOCKER.md b/docker/DOCKER.md
new file mode 100644
index 0000000..792da63
--- /dev/null
+++ b/docker/DOCKER.md
@@ -0,0 +1,550 @@
+# Docker Deployment Guide for PowerMem Server
+
+This guide provides instructions for building and running PowerMem Server using Docker.
+
+## Table of Contents
+
+- [Prerequisites](#prerequisites)
+- [Quick Start](#quick-start)
+- [Building the Docker Image](#building-the-docker-image)
+- [Running the Container](#running-the-container)
+- [Configuration](#configuration)
+- [Environment Variables](#environment-variables)
+- [Docker Compose](#docker-compose)
+- [Production Deployment](#production-deployment)
+- [Troubleshooting](#troubleshooting)
+
+## Prerequisites
+
+- Docker 20.10 or later
+- Docker Compose 2.0 or later (optional, for docker-compose setup)
+
+## Quick Start
+
+### Build and Run
+
+```bash
+# Build the Docker image (from project root)
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+
+# Run the container with shared .env file (recommended)
+# This allows both SDK and Server to use the same configuration
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+The server will be available at `http://localhost:8000`.
+
+**Note**: If you have a `.env` file that's shared between the SDK and Server, use the first command with volume mount (`-v`) to ensure both components read from the same configuration file. See [Shared .env File](#shared-env-file-for-sdk-and-server) for more details.
+
+### Using Docker Compose
+
+The `docker/docker-compose.yml` file is pre-configured to:
+- Automatically load environment variables from `.env` file
+- Mount the `.env` file as a read-only volume at `/app/.env`
+- Enable both SDK and Server to use the same configuration
+
+```bash
+# Start the server (from project root)
+docker-compose -f docker/docker-compose.yml up -d
+
+# View logs
+docker-compose -f docker/docker-compose.yml logs -f
+
+# Stop the server
+docker-compose -f docker/docker-compose.yml down
+```
+
+**Note**: The Docker Compose setup automatically handles the shared `.env` file configuration, so both your local SDK and the containerized Server will use the same configuration values.
+
+## Building the Docker Image
+
+### Basic Build
+
+```bash
+# Build from project root directory
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+```
+
+### Build with Custom Tag
+
+```bash
+# Build from project root directory
+docker build -t oceanbase/powermem-server:v0.2.1 -f docker/Dockerfile .
+```
+
+### Build with Mirror Sources (for slow network)
+
+If you're experiencing slow download speeds or network timeouts, you can use mirror sources for both pip and apt-get:
+
+```bash
+# Using Tsinghua mirror (China) - speeds up both pip and apt-get
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile \
+  --build-arg PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
+  --build-arg PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn \
+  --build-arg DEBIAN_MIRROR=mirrors.tuna.tsinghua.edu.cn .
+
+# Using Aliyun mirror (China) - speeds up both pip and apt-get
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile \
+  --build-arg PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple \
+  --build-arg PIP_TRUSTED_HOST=mirrors.aliyun.com \
+  --build-arg DEBIAN_MIRROR=mirrors.aliyun.com .
+
+# Using only pip mirror (if apt-get is fast enough)
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile \
+  --build-arg PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
+  --build-arg PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn .
+
+# Using only Debian mirror (if pip is fast enough)
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile \
+  --build-arg DEBIAN_MIRROR=mirrors.aliyun.com .
+```
+
+**Note**: 
+- The Dockerfile has been configured with a longer timeout (300 seconds) for pip to handle slow network connections.
+- Using Debian mirror can significantly speed up `apt-get` operations (especially when installing gcc, g++, etc.).
+- The `docker-build-mirror` Makefile target automatically configures both pip and Debian mirrors.
+
+### Build Arguments (if needed in future)
+
+Currently, the Dockerfile uses a multi-stage build to optimize image size. The build process:
+
+1. **Builder stage**: Installs all dependencies and builds the package
+2. **Final stage**: Creates a minimal runtime image with only necessary files
+
+## Running the Container
+
+### Basic Run
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  oceanbase/powermem-server:latest
+```
+
+### Run with Environment Variables
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -e POWERMEM_SERVER_HOST=0.0.0.0 \
+  -e POWERMEM_SERVER_PORT=8000 \
+  -e POWERMEM_SERVER_WORKERS=4 \
+  -e POWERMEM_SERVER_API_KEYS=key1,key2,key3 \
+  -e POWERMEM_SERVER_AUTH_ENABLED=true \
+  -e POWERMEM_SERVER_LOG_LEVEL=INFO \
+  oceanbase/powermem-server:latest
+```
+
+### Run with Environment File
+
+Create a `.env` file:
+
+```env
+POWERMEM_SERVER_HOST=0.0.0.0
+POWERMEM_SERVER_PORT=8000
+POWERMEM_SERVER_WORKERS=4
+POWERMEM_SERVER_API_KEYS=your-api-key-1,your-api-key-2
+POWERMEM_SERVER_AUTH_ENABLED=true
+POWERMEM_SERVER_LOG_LEVEL=INFO
+POWERMEM_SERVER_CORS_ENABLED=true
+POWERMEM_SERVER_CORS_ORIGINS=*
+```
+
+Run with the environment file:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+### Run with Shared .env File (SDK and Server)
+
+When both the SDK and Server need to use the same `.env` file, you can mount it as a volume. This allows the Server running in Docker to read the same configuration file that the SDK uses locally:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+**Note**: The `--env-file` flag loads environment variables from `.env` into the container's environment, while the volume mount (`-v`) makes the `.env` file accessible inside the container at `/app/.env` so the Server's configuration loader can read it directly. This ensures both SDK and Server use the exact same configuration values.
+
+**Benefits of this approach**:
+- Single source of truth: One `.env` file for both SDK and Server
+- Consistent configuration: Both components read from the same file
+- Easy updates: Modify `.env` once, both components pick up changes (after container restart)
+
+### Run with Volume Mounts (for persistent data)
+
+If you need to mount volumes for logs or configuration:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v ./logs:/app/logs \
+  -v ./config:/app/config \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+## Configuration
+
+### Shared .env File for SDK and Server
+
+PowerMem Server and SDK are designed to share the same `.env` file. The `.env` file contains configuration for both:
+
+- **Server configuration**: `POWERMEM_SERVER_*`, `POWERMEM_AUTH_*`, `POWERMEM_RATE_LIMIT_*`, etc.
+- **SDK configuration**: `DATABASE_PROVIDER`, `OCEANBASE_*`, `POSTGRES_*`, `LLM_*`, `EMBEDDING_*`, etc.
+
+When running the Server in Docker, you have two options:
+
+#### Option 1: Mount .env File (Recommended)
+
+Mount the `.env` file as a read-only volume so the Server can read it directly:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+This approach:
+- Allows the Server to read `.env` file directly (same as SDK)
+- Ensures both SDK and Server use identical configuration
+- Makes it easy to update configuration by editing the `.env` file
+
+#### Option 2: Environment Variables Only
+
+If you prefer not to mount the file, you can use `--env-file` to load environment variables:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+**Note**: With Docker Compose, the `.env` file is automatically mounted and loaded. See the `docker/docker-compose.yml` file for details.
+
+
+## Environment Variables
+
+The `.env` file contains configuration for both the PowerMem SDK and Server. The following sections describe the variables used by the Server. For SDK configuration variables (database, LLM, embedding providers), refer to the [Configuration Guide](../docs/guides/0003-configuration.md).
+
+### Server Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_HOST` | `0.0.0.0` | Host to bind the server to |
+| `POWERMEM_SERVER_PORT` | `8000` | Port to bind the server to |
+| `POWERMEM_SERVER_WORKERS` | `4` | Number of worker processes |
+| `POWERMEM_SERVER_RELOAD` | `false` | Enable auto-reload (development only) |
+
+### Authentication Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_AUTH_ENABLED` | `true` | Enable API key authentication |
+| `POWERMEM_SERVER_API_KEYS` | `` | Comma-separated list of API keys |
+
+### Rate Limiting Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_RATE_LIMIT_ENABLED` | `true` | Enable rate limiting |
+| `POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE` | `100` | Requests per minute per IP |
+
+### Logging Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_LOG_LEVEL` | `INFO` | Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
+| `POWERMEM_SERVER_LOG_FORMAT` | `json` | Log format (json or text) |
+
+### CORS Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_CORS_ENABLED` | `true` | Enable CORS |
+| `POWERMEM_SERVER_CORS_ORIGINS` | `*` | Comma-separated list of allowed origins |
+
+### API Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `POWERMEM_SERVER_API_TITLE` | `PowerMem API` | API title |
+| `POWERMEM_SERVER_API_VERSION` | `v1` | API version |
+| `POWERMEM_SERVER_API_DESCRIPTION` | `PowerMem HTTP API Server - Intelligent Memory System` | API description |
+
+### SDK Configuration Variables (Shared with Server)
+
+The following variables are used by the SDK but may also be referenced by the Server for database connections:
+
+- `DATABASE_PROVIDER`: Database provider (`sqlite`, `oceanbase`, `postgres`)
+- `OCEANBASE_*`: OceanBase database configuration
+- `POSTGRES_*`: PostgreSQL database configuration
+- `SQLITE_*`: SQLite database configuration
+- `LLM_*`: LLM provider configuration
+- `EMBEDDING_*`: Embedding provider configuration
+
+For complete SDK configuration options, refer to the [Configuration Guide](../docs/guides/0003-configuration.md).
+
+## Docker Compose
+
+A `docker/docker-compose.yml` file is provided for easier deployment:
+
+```yaml
+version: '3.8'
+
+services:
+  powermem-server:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+    container_name: powermem-server
+    ports:
+      - "8000:8000"
+    environment:
+      - POWERMEM_SERVER_HOST=0.0.0.0
+      - POWERMEM_SERVER_PORT=8000
+      - POWERMEM_SERVER_WORKERS=4
+      - POWERMEM_SERVER_API_KEYS=${POWERMEM_SERVER_API_KEYS:-}
+      - POWERMEM_SERVER_AUTH_ENABLED=${POWERMEM_SERVER_AUTH_ENABLED:-true}
+      - POWERMEM_SERVER_LOG_LEVEL=${POWERMEM_SERVER_LOG_LEVEL:-INFO}
+      - POWERMEM_SERVER_CORS_ENABLED=${POWERMEM_SERVER_CORS_ENABLED:-true}
+      - POWERMEM_DATABASE_URL=${POWERMEM_DATABASE_URL:-}
+    env_file:
+      - .env
+    volumes:
+      - ./logs:/app/logs
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+```
+
+### Using Docker Compose
+
+```bash
+# Start services (from project root)
+docker-compose -f docker/docker-compose.yml up -d
+
+# View logs
+docker-compose -f docker/docker-compose.yml logs -f powermem-server
+
+# Stop services
+docker-compose -f docker/docker-compose.yml down
+
+# Rebuild and restart
+docker-compose -f docker/docker-compose.yml up -d --build
+```
+
+## Production Deployment
+
+### Security Best Practices
+
+1. **Use Secrets Management**: Never hardcode API keys or passwords. Use Docker secrets or environment variable injection.
+
+2. **Run as Non-Root User**: The Docker image runs as a non-root user (`powermem`) by default.
+
+3. **Network Security**: Use Docker networks to isolate containers and restrict access.
+
+4. **Resource Limits**: Set appropriate resource limits:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  --memory="2g" \
+  --cpus="2" \
+  -p 8000:8000 \
+  oceanbase/powermem-server:latest
+```
+
+5. **Health Checks**: The image includes a health check. Monitor container health:
+
+```bash
+docker ps  # Check STATUS column
+docker inspect --format='{{.State.Health.Status}}' powermem-server
+```
+
+### Production Docker Compose Example
+
+```yaml
+version: '3.8'
+
+services:
+  powermem-server:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+    image: oceanbase/powermem-server:latest
+    container_name: powermem-server
+    restart: always
+    ports:
+      - "8000:8000"
+    environment:
+      - POWERMEM_SERVER_WORKERS=8
+      - POWERMEM_SERVER_LOG_LEVEL=INFO
+    env_file:
+      - .env.production
+    deploy:
+      resources:
+        limits:
+          cpus: '4'
+          memory: 4G
+        reservations:
+          cpus: '2'
+          memory: 2G
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+```
+
+### Reverse Proxy Setup
+
+For production, it's recommended to use a reverse proxy (nginx, traefik, etc.):
+
+```nginx
+# nginx.conf example
+upstream powermem {
+    server powermem-server:8000;
+}
+
+server {
+    listen 80;
+    server_name api.powermem.example.com;
+
+    location / {
+        proxy_pass http://powermem;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+}
+```
+
+## Troubleshooting
+
+### Container Won't Start
+
+1. **Check logs**:
+```bash
+docker logs powermem-server
+```
+
+2. **Check environment variables**:
+```bash
+docker exec powermem-server env | grep POWERMEM
+```
+
+3. **Check if .env file is mounted correctly**:
+```bash
+# Check if .env file exists in container
+docker exec powermem-server ls -la /app/.env
+
+# View .env file contents (if mounted)
+docker exec powermem-server cat /app/.env
+```
+
+4. **Verify database connection**:
+```bash
+docker exec powermem-server python -c "import psycopg; psycopg.connect('${POWERMEM_DATABASE_URL}')"
+```
+
+### Health Check Failing
+
+1. **Check if server is running**:
+```bash
+docker exec powermem-server curl -f http://localhost:8000/api/v1/system/health
+```
+
+2. **Check server logs**:
+```bash
+docker logs powermem-server --tail 50
+```
+
+### Port Already in Use
+
+If port 8000 is already in use, change the port:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8001:8000 \
+  -e POWERMEM_SERVER_PORT=8000 \
+  oceanbase/powermem-server:latest
+```
+
+### Database Connection Issues
+
+The entrypoint script waits up to 60 seconds for the database. If you need more time or want to disable the wait, you can modify the `docker-entrypoint.sh` script.
+
+### Permission Issues
+
+The container runs as user `powermem` (UID 1000). If you mount volumes, ensure proper permissions:
+
+```bash
+sudo chown -R 1000:1000 ./logs
+```
+
+### .env File Not Found or Not Readable
+
+If the Server can't read the `.env` file:
+
+1. **Verify the file is mounted**:
+```bash
+docker exec powermem-server ls -la /app/.env
+```
+
+2. **Check file permissions**:
+```bash
+# Ensure .env file is readable
+chmod 644 .env
+```
+
+3. **Verify mount path**:
+   - The `.env` file should be mounted at `/app/.env` inside the container
+   - Use `-v $(pwd)/.env:/app/.env:ro` to mount it as read-only
+
+4. **Alternative: Use environment variables only**:
+   If mounting the file doesn't work, you can use `--env-file` to load variables:
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+**Note**: When using `--env-file`, the Server will read from environment variables, but the SDK running locally will still read from the `.env` file. This is fine as long as both have the same values.
+
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..6055e61
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,107 @@
+# Multi-stage build for PowerMem Server
+FROM python:3.11-slim as builder
+
+# Build argument for pip index URL (optional, for using mirror sources)
+# Usage: docker build --build-arg PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
+ARG PIP_INDEX_URL=""
+ARG PIP_TRUSTED_HOST=""
+
+# Build argument for Debian mirror (optional, for faster apt-get)
+# Usage: docker build --build-arg DEBIAN_MIRROR=mirrors.aliyun.com
+ARG DEBIAN_MIRROR=""
+
+# Set working directory
+WORKDIR /build
+
+# Configure Debian mirror if provided, then install build dependencies
+RUN if [ -n "$DEBIAN_MIRROR" ]; then \
+        sed -i "s|http://deb.debian.org/debian|http://$DEBIAN_MIRROR/debian|g" /etc/apt/sources.list.d/debian.sources || \
+        sed -i "s|http://deb.debian.org/debian|http://$DEBIAN_MIRROR/debian|g" /etc/apt/sources.list || true; \
+        sed -i "s|http://security.debian.org/debian-security|http://$DEBIAN_MIRROR/debian-security|g" /etc/apt/sources.list.d/debian.sources || \
+        sed -i "s|http://security.debian.org/debian-security|http://$DEBIAN_MIRROR/debian-security|g" /etc/apt/sources.list || true; \
+    fi && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+        gcc \
+        g++ \
+        postgresql-client \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy dependency files and source code needed for building
+COPY pyproject.toml ./
+COPY README.md ./
+COPY src/ ./src/
+
+# Install the package in build stage
+# Increase timeout to handle slow network connections
+# Configure pip with longer timeout and optional mirror source
+RUN pip config set global.timeout 300 && \
+    if [ -n "$PIP_INDEX_URL" ]; then \
+        pip config set global.index-url "$PIP_INDEX_URL" && \
+        if [ -n "$PIP_TRUSTED_HOST" ]; then \
+            pip config set global.trusted-host "$PIP_TRUSTED_HOST"; \
+        fi; \
+    fi && \
+    pip install --no-cache-dir --upgrade pip setuptools wheel && \
+    pip install --no-cache-dir .
+
+# Final stage
+FROM python:3.11-slim
+
+# Build argument for Debian mirror (needs to be redeclared in each stage)
+ARG DEBIAN_MIRROR=""
+
+# Set working directory
+WORKDIR /app
+
+# Configure Debian mirror if provided, then install runtime dependencies
+RUN if [ -n "$DEBIAN_MIRROR" ]; then \
+        sed -i "s|http://deb.debian.org/debian|http://$DEBIAN_MIRROR/debian|g" /etc/apt/sources.list.d/debian.sources || \
+        sed -i "s|http://deb.debian.org/debian|http://$DEBIAN_MIRROR/debian|g" /etc/apt/sources.list || true; \
+        sed -i "s|http://security.debian.org/debian-security|http://$DEBIAN_MIRROR/debian-security|g" /etc/apt/sources.list.d/debian.sources || \
+        sed -i "s|http://security.debian.org/debian-security|http://$DEBIAN_MIRROR/debian-security|g" /etc/apt/sources.list || true; \
+    fi && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+        postgresql-client \
+        curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user
+RUN useradd -m -u 1000 powermem && \
+    chown -R powermem:powermem /app
+
+# Copy installed package from builder
+COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application source code
+COPY --chown=powermem:powermem src/ ./src/
+COPY --chown=powermem:powermem pyproject.toml ./
+
+# Copy entrypoint script
+COPY --chown=powermem:powermem docker/docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Switch to non-root user
+USER powermem
+
+# Expose default port
+EXPOSE 8000
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    POWERMEM_SERVER_HOST=0.0.0.0 \
+    POWERMEM_SERVER_PORT=8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/system/health || exit 1
+
+# Use entrypoint script
+ENTRYPOINT ["docker-entrypoint.sh"]
+
+# Default command
+CMD ["powermem-server"]
+
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..c056cbd
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,53 @@
+# Docker Directory
+
+This directory contains all Docker-related files for PowerMem Server.
+
+## Files
+
+- `Dockerfile` - Multi-stage Docker build file for PowerMem Server
+- `docker-compose.yml` - Docker Compose configuration file
+- `docker-entrypoint.sh` - Container entrypoint script
+- `.dockerignore` - Files to exclude from Docker build context
+- `DOCKER.md` - Complete Docker deployment documentation
+
+## Quick Start
+
+### Build Docker Image
+
+From the project root directory:
+
+```bash
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+```
+
+### Run with Docker Compose
+
+From the project root directory:
+
+```bash
+docker-compose -f docker/docker-compose.yml up -d
+```
+
+### Run with Docker
+
+From the project root directory:
+
+```bash
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+```
+
+## Documentation
+
+For detailed documentation, see [DOCKER.md](./DOCKER.md).
+
+## Notes
+
+- All Docker commands should be run from the **project root directory**, not from the `docker/` directory
+- The build context is the project root, so paths in Dockerfile are relative to the project root
+- The `.env` file should be in the project root directory and will be mounted into the container
+
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..a747772
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,43 @@
+version: '3.8'
+
+services:
+  powermem-server:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+    image: oceanbase/powermem-server:latest
+    container_name: powermem-server
+    ports:
+      - "${POWERMEM_SERVER_PORT:-8000}:8000"
+    environment:
+      - POWERMEM_SERVER_HOST=${POWERMEM_SERVER_HOST:-0.0.0.0}
+      - POWERMEM_SERVER_PORT=${POWERMEM_SERVER_PORT:-8000}
+      - POWERMEM_SERVER_WORKERS=${POWERMEM_SERVER_WORKERS:-4}
+      - POWERMEM_SERVER_API_KEYS=${POWERMEM_SERVER_API_KEYS:-}
+      - POWERMEM_SERVER_AUTH_ENABLED=${POWERMEM_SERVER_AUTH_ENABLED:-false}
+      - POWERMEM_SERVER_RATE_LIMIT_ENABLED=${POWERMEM_SERVER_RATE_LIMIT_ENABLED:-true}
+      - POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE=${POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE:-100}
+      - POWERMEM_SERVER_LOG_LEVEL=${POWERMEM_SERVER_LOG_LEVEL:-INFO}
+      - POWERMEM_SERVER_LOG_FORMAT=${POWERMEM_SERVER_LOG_FORMAT:-json}
+      - POWERMEM_SERVER_CORS_ENABLED=${POWERMEM_SERVER_CORS_ENABLED:-true}
+      - POWERMEM_SERVER_CORS_ORIGINS=${POWERMEM_SERVER_CORS_ORIGINS:-*}
+      - POWERMEM_DATABASE_URL=${POWERMEM_DATABASE_URL:-}
+    env_file:
+      - .env
+    volumes:
+      - ./logs:/app/logs
+      # Mount .env file so both SDK and Server can use the same configuration
+      - ./.env:/app/.env:ro
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+
diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh
new file mode 100644
index 0000000..eb13df7
--- /dev/null
+++ b/docker/docker-entrypoint.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+set -e
+
+# Function to print log messages
+log() {
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $*"
+}
+
+# Function to validate required environment variables
+validate_config() {
+    log "Validating configuration..."
+    
+    # Check if API keys are set when auth is enabled
+    if [ "${POWERMEM_SERVER_AUTH_ENABLED:-true}" = "true" ] && [ -z "${POWERMEM_SERVER_API_KEYS}" ]; then
+        log "Warning: POWERMEM_SERVER_AUTH_ENABLED is true but POWERMEM_SERVER_API_KEYS is not set"
+        log "Server will start but API authentication may fail"
+    fi
+    
+    log "Configuration validation completed"
+}
+
+# Main execution
+main() {
+    log "Starting PowerMem Server..."
+    
+    # Change to app directory to ensure .env file can be found
+    cd /app || exit 1
+    
+    # Check if .env file exists (mounted or copied)
+    if [ -f "/app/.env" ]; then
+        log ".env file found at /app/.env"
+    elif [ -f "/app/../.env" ]; then
+        log ".env file found at parent directory"
+    else
+        log "No .env file found, using environment variables only"
+    fi
+    
+    # Validate configuration
+    validate_config
+    
+    # Log configuration (without sensitive data)
+    log "Server Configuration:"
+    log "  Host: ${POWERMEM_SERVER_HOST:-0.0.0.0}"
+    log "  Port: ${POWERMEM_SERVER_PORT:-8000}"
+    log "  Workers: ${POWERMEM_SERVER_WORKERS:-4}"
+    log "  Log Level: ${POWERMEM_SERVER_LOG_LEVEL:-INFO}"
+    log "  Auth Enabled: ${POWERMEM_SERVER_AUTH_ENABLED:-true}"
+    log "  CORS Enabled: ${POWERMEM_SERVER_CORS_ENABLED:-true}"
+    
+    # Execute the command
+    log "Launching server..."
+    exec "$@"
+}
+
+# Run main function
+main "$@"
+
diff --git a/docs/api/0005-api_server.md b/docs/api/0005-api_server.md
new file mode 100644
index 0000000..203df7c
--- /dev/null
+++ b/docs/api/0005-api_server.md
@@ -0,0 +1,1863 @@
+# HTTP API Server
+
+PowerMem HTTP API Server provides a production-ready RESTful API interface for PowerMem, enabling any application that supports HTTP calls to integrate PowerMem's intelligent memory capabilities.
+
+## Overview
+
+The PowerMem HTTP API Server is built with FastAPI and provides:
+
+- **RESTful API endpoints** for all core PowerMem operations
+- **API Key authentication** for secure access
+- **Rate limiting** to protect server resources
+- **Automatic API documentation** (Swagger UI and ReDoc)
+- **Structured logging** with request tracking
+- **CORS support** for web applications
+- **Production-ready** deployment options
+
+### Starting the API Server
+```bash
+# Method 1: Install from powermem package, use CLI command
+pip install powermem
+powermem-server --host 0.0.0.0 --port 8000
+
+# Method 2: Using Docker
+# Build and run with Docker
+docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile .
+docker run -d \
+  --name powermem-server \
+  -p 8000:8000 \
+  -v $(pwd)/.env:/app/.env:ro \
+  --env-file .env \
+  oceanbase/powermem-server:latest
+
+# Or use Docker Compose (recommended)
+docker-compose -f docker/docker-compose.yml up -d
+
+# Method 3: From source code, use Makefile
+git clone git@github.com:oceanbase/powermem.git
+cd powermem
+# Start server (production mode)
+make server-start
+
+# Start server with auto-reload (development mode)
+make server-start-reload
+
+# Check server status
+make server-status
+
+# View server logs
+make server-logs
+
+# Stop server
+make server-stop
+
+# Restart server
+make server-restart
+
+```
+
+### PowerMem .env Configuration
+The PowerMem SDK configuration is the same as the previous v0.2.0 version, with the addition of PowerMem server configuration section 12. PowerMem HTTP API Server Configuration. In most cases, the default configuration can be kept.
+
+```bash
+=============================================================================
+# 12. PowerMem HTTP API Server Configuration
+# =============================================================================
+# Configuration for the PowerMem HTTP API Server
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Server Settings
+# -----------------------------------------------------------------------------
+# Server host address (0.0.0.0 to listen on all interfaces)
+POWERMEM_SERVER_HOST=0.0.0.0
+
+# Server port number
+POWERMEM_SERVER_PORT=8000
+
+# Number of worker processes (only used when reload=false)
+POWERMEM_SERVER_WORKERS=4
+
+# Enable auto-reload for development (true/false)
+POWERMEM_SERVER_RELOAD=false
+
+# -----------------------------------------------------------------------------
+# Authentication Settings
+# -----------------------------------------------------------------------------
+# Enable API key authentication (true/false)
+POWERMEM_SERVER_AUTH_ENABLED=false
+
+# API keys (comma-separated list)
+# Example: POWERMEM_SERVER_API_KEYS=key1,key2,key3
+POWERMEM_SERVER_API_KEYS=
+
+# -----------------------------------------------------------------------------
+# Rate Limiting Settings
+# -----------------------------------------------------------------------------
+# Enable rate limiting (true/false)
+POWERMEM_SERVER_RATE_LIMIT_ENABLED=true
+
+# Rate limit per minute per IP address
+POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE=100
+
+# -----------------------------------------------------------------------------
+# Logging Settings
+# -----------------------------------------------------------------------------
+POWERMEM_SERVER_LOG_FILE=server.log
+
+# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
+POWERMEM_SERVER_LOG_LEVEL=INFO
+
+# Log format: json or text
+POWERMEM_SERVER_LOG_FORMAT=text
+
+# -----------------------------------------------------------------------------
+# API Settings
+# -----------------------------------------------------------------------------
+# API title (shown in Swagger UI)
+POWERMEM_SERVER_API_TITLE=PowerMem API
+
+# API version
+POWERMEM_SERVER_API_VERSION=v1
+
+# API description (shown in Swagger UI)
+POWERMEM_SERVER_API_DESCRIPTION=PowerMem HTTP API Server - Intelligent Memory System
+
+# -----------------------------------------------------------------------------
+# CORS Settings
+# -----------------------------------------------------------------------------
+# Enable CORS (true/false)
+POWERMEM_SERVER_CORS_ENABLED=true
+
+# CORS allowed origins (comma-separated, use * for all origins)
+# Example: POWERMEM_SERVER_CORS_ORIGINS=http://localhost:3000,https://example.com
+POWERMEM_SERVER_CORS_ORIGINS=*
+
+```
+
+### Available Tools
+You can use the following tools to interact with the API:
+
++ **curl**: Command-line tool
++ **Postman**: GUI tool
++ **Swagger UI**: Access via browser at `http://0.0.0.0:8000/docs`
+
+### Base URL
+```plain
+Base URL: http://0.0.0.0:8000
+API Base: http://0.0.0.0:8000/api/v1
+```
+
+---
+
+## Authentication
+When authentication is enabled, configure the `.env` file:
+
+```bash
+# Enable API key authentication (true/false)
+POWERMEM_AUTH_ENABLED=true
+
+# API keys (comma-separated list)
+# Example: POWERMEM_API_KEYS=key1,key2,key3
+POWERMEM_API_KEYS=test-api-key-123
+```
+
+All endpoints that require authentication must include the API Key in the request header:
+
+```bash
+X-API-Key: test-api-key-123
+```
+
+**Exception**: The `/api/v1/system/health` endpoint is public and does not require authentication.
+
+---
+
+## System Endpoints
+### Health Check
+**Endpoint**: `GET /api/v1/system/health`
+
+**Description**: Check the health status of the API server (public endpoint, no authentication required)
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/system/health"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "status": "healthy",
+        "timestamp": "2025-12-24T07:10:06.455901Z"
+    },
+    "message": "Service is healthy",
+    "timestamp": "2025-12-24T07:10:06.456033Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Expected Result |
+| --- | --- |
+| Normal request | Returns 200, status is "healthy" |
+| No authentication | API Key not required |
+
+
+---
+
+### System Status
+**Endpoint**: `GET /api/v1/system/status`
+
+**Description**: Get system status and configuration information
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/system/status" -i
+
+curl -X GET "http://localhost:8000/api/v1/system/status" \
+  -H "X-API-Key: test-api-key-123" -i
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "status": "operational",
+        "version": "v1",
+        "storage_type": "oceanbase",
+        "llm_provider": "qwen",
+        "timestamp": "2025-12-24T07:37:20.316941Z"
+    },
+    "message": "System status retrieved successfully",
+    "timestamp": "2025-12-24T07:37:20.317057Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Expected Result |
+| --- | --- |
+| Normal request | Returns 200 with system information |
+| No API Key | Returns 401 Unauthorized |
+| Invalid API Key | Returns 401 Unauthorized |
+
+
+---
+
+### System Metrics
+**Endpoint**: `GET /api/v1/system/metrics`
+
+**Description**: Get metrics in Prometheus format (placeholder implementation)
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/system/metrics" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+# HELP powermem_api_requests_total Total number of API requests
+# TYPE powermem_api_requests_total counter
+powermem_api_requests_total{method="GET",endpoint="/api/v1/system/status",status="200"} 1
+
+# HELP powermem_memory_operations_total Total number of memory operations
+# TYPE powermem_memory_operations_total counter
+
+# HELP powermem_api_request_duration_seconds API request duration in seconds
+# TYPE powermem_api_request_duration_seconds histogram
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="0.01"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="0.05"} 1
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="0.1"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="0.2"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="0.5"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="1.0"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="2.5"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="5.0"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="10.0"} 0
+powermem_api_request_duration_seconds_bucket{method="GET",endpoint="/api/v1/system/status",le="+Inf"} 0
+powermem_api_request_duration_seconds_sum{method="GET",endpoint="/api/v1/system/status"} 0.017825
+powermem_api_request_duration_seconds_count{method="GET",endpoint="/api/v1/system/status"} 1
+
+# HELP powermem_errors_total Total number of errors
+# TYPE powermem_errors_total counter
+```
+
+
+
+### Delete All Memories
+**Endpoint**: `DELETE /api/v1/system/delete-all-memories`
+
+**Description**: Delete all memories
+
+**Request Example**:
+
+```bash
+# Delete all memories (system level)
+curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories" \
+  -H "X-API-Key: test-api-key-123"
+
+# Delete all memories for a specific agent
+curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories?agent_id=agent-456" \
+  -H "X-API-Key: test-api-key-123"
+
+# Delete all memories for a specific user
+curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories?user_id=user-123" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+  "success": true,
+  "data": {},
+  "message": "All memories reset successfully",
+  "timestamp": "2025-12-24T08:24:29.170996Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Expected Result |
+| --- | --- |
+| Normal deletion | Returns 200, all memories deleted |
+| Query after deletion | Returns empty list |
+
+## Memory Management Endpoints
+### Create Memory
+**Endpoint**: `POST /api/v1/memories`
+
+**Description**: Create a new memory
+
+**Request Example**:
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/memories" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "content": "User likes coffee and goes to Starbucks every morning",
+    "user_id": "user-123",
+    "agent_id": "agent-456",
+    "run_id": "run-789",
+    "metadata": {
+      "source": "conversation",
+      "importance": "high"
+    },
+    "filters": {
+      "category": "preference",
+      "topic": "beverage"
+    },
+    "scope": "user",
+    "memory_type": "preference",
+    "infer": true
+  }'
+```
+
+```json
+{
+    "success": true,
+    "data": [
+        {
+            "memory_id": 658954684471443456,
+            "content": "User likes coffee",
+            "user_id": "user-123",
+            "agent_id": "agent-456",
+            "run_id": "run-789",
+            "metadata": {
+                "source": "conversation",
+                "importance": "high"
+            }
+        },
+        {
+            "memory_id": 658954684538552320,
+            "content": "Goes to Starbucks every morning",
+            "user_id": "user-123",
+            "agent_id": "agent-456",
+            "run_id": "run-789",
+            "metadata": {
+                "source": "conversation",
+                "importance": "high"
+            }
+        }
+    ],
+    "message": "Created 2 memories successfully",
+    "timestamp": "2025-12-24T08:50:39.586609Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Minimum parameters | Only content | Returns 200, creation successful |
+| Full parameters | All fields | Returns 200, all fields saved |
+| Missing content | No content field | Returns 422 Validation Error |
+| Empty content | content is empty string | Returns 422 Validation Error |
+| Invalid metadata | metadata format error | Returns 422 Validation Error |
+
+
+---
+
+### Batch Create Memories
+**Endpoint**: `POST /api/v1/memories/batch`
+
+**Description**: Create multiple memories in batch
+
+**Request Example**:
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/memories/batch" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "memories": [
+      {
+        "content": "User likes Python programming",
+        "metadata": {"topic": "programming"},
+        "filters": {"category": "skill"},
+        "scope": "user",
+        "memory_type": "skill"
+      },
+      {
+        "content": "User lives in Beijing",
+        "metadata": {"topic": "location"},
+        "filters": {"category": "personal"},
+        "scope": "user",
+        "memory_type": "fact"
+      }
+    ],
+    "user_id": "user-123",
+    "agent_id": "agent-456",
+    "run_id": "run-789",
+    "infer": true
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memories": [
+            {
+                "memory_id": 658958021480677376,
+                "content": "User likes Python programming",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "programming"
+                },
+                "created_at": "2025-12-24T09:03:55.157320Z",
+                "updated_at": "2025-12-24T09:03:55.157330Z"
+            },
+            {
+                "memory_id": 658958031962243072,
+                "content": "User lives in Beijing",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "location"
+                },
+                "created_at": "2025-12-24T09:03:57.668669Z",
+                "updated_at": "2025-12-24T09:03:57.668677Z"
+            }
+        ],
+        "total": 2,
+        "created_count": 2,
+        "failed_count": 0
+    },
+    "message": "Created 2 out of 2 memories",
+    "timestamp": "2025-12-24T09:03:57.738674Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal batch creation | 2-10 memories | Returns 200, all created successfully |
+| Partial failure | Contains invalid data | Returns 200, failed_count > 0 |
+| Empty list | memories is empty | Returns 422 Validation Error |
+| Exceeds limit | > 100 memories | Returns 422 Validation Error |
+| Mixed success/failure | Some valid, some invalid | Returns 200, shows success and failure counts |
+
+
+---
+
+### List Memories
+**Endpoint**: `GET /api/v1/memories`
+
+**Description**: Get a list of memories with pagination and filtering support
+
+**Request Example**:
+
+```bash
+# Basic query
+curl -X GET "http://localhost:8000/api/v1/memories?limit=10&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+
+# Filter by user
+curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&limit=20&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+
+# Filter by agent
+curl -X GET "http://localhost:8000/api/v1/memories?agent_id=agent-456&limit=50&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memories": [
+            {
+                "memory_id": 658958021480677376,
+                "content": "User likes Python programming",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "programming"
+                },
+                "created_at": "2025-12-24T09:03:55.157320Z",
+                "updated_at": "2025-12-24T09:03:57.682036Z"
+            },
+            {
+                "memory_id": 658958031962243072,
+                "content": "User lives in Beijing",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "location"
+                },
+                "created_at": "2025-12-24T09:03:57.668669Z",
+                "updated_at": "2025-12-24T09:03:57.717598Z"
+            }
+        ],
+        "total": 2,
+        "limit": 10,
+        "offset": 0
+    },
+    "message": "Memories retrieved successfully",
+    "timestamp": "2025-12-24T09:08:17.639957Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Default pagination | No parameters | Returns limit=100, offset=0 |
+| Custom pagination | limit=20, offset=10 | Returns 20 items, skips first 10 |
+| Filter by user | user_id=user-123 | Returns only memories for that user |
+| Filter by agent | agent_id=agent-456 | Returns only memories for that agent |
+| Combined filters | user_id + agent_id | Returns records matching both |
+| Limit exceeds max | limit=2000 | Returns 422 Validation Error |
+| Negative offset | offset=-1 | Returns 422 Validation Error |
+| Empty results | No matching records | Returns empty array |
+
+
+---
+
+### Get Memory
+**Endpoint**: `GET /api/v1/memories/{memory_id}`
+
+**Description**: Get a single memory by ID
+
+**Request Example**:
+
+```bash
+# First, list all memories to see available IDs
+curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=agent-456" \
+  -H "X-API-Key: test-api-key-123"
+
+# Then query by specific ID
+curl -X GET "http://localhost:8000/api/v1/memories/1?user_id=user-123&agent_id=agent-456" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memories": [
+            {
+                "memory_id": 658958021480677376,
+                "content": "User likes Python programming",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "programming"
+                },
+                "created_at": "2025-12-24T09:03:55.157320Z",
+                "updated_at": "2025-12-24T09:25:06.810068Z"
+            },
+            {
+                "memory_id": 658958031962243072,
+                "content": "User lives in Beijing",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "location"
+                },
+                "created_at": "2025-12-24T09:03:57.668669Z",
+                "updated_at": "2025-12-24T09:03:57.717598Z"
+            }
+        ],
+        "total": 2,
+        "limit": 100,
+        "offset": 0
+    },
+    "message": "Memories retrieved successfully",
+    "timestamp": "2025-12-24T09:25:21.217493Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal retrieval | Existing memory_id | Returns 200 with complete memory information |
+| Non-existent ID | memory_id=99999 | Returns 404 Not Found |
+| Invalid ID | memory_id=abc | Returns 422 Validation Error |
+| Access control | Wrong user_id | Returns 403 or 404 |
+| Access control | Wrong agent_id | Returns 403 or 404 |
+
+
+---
+
+### Update Memory
+**Endpoint**: `PUT /api/v1/memories/{memory_id}`
+
+**Description**: Update an existing memory
+
+**Request Example**:
+
+```bash
+# First, list all memories to see available IDs
+curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=agent-456" \
+  -H "X-API-Key: test-api-key-123"
+
+# Update content
+curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "content": "User likes latte coffee and goes to Starbucks every morning",
+    "user_id": "user-123",
+    "agent_id": "agent-456"
+  }'
+
+# Update metadata
+curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "metadata": {
+      "source": "conversation",
+      "importance": "high",
+      "updated_by": "admin"
+    },
+    "user_id": "user-123",
+    "agent_id": "agent-456"
+  }'
+
+# Update both content and metadata
+curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "content": "User likes latte coffee",
+    "metadata": {
+      "importance": "medium"
+    },
+    "user_id": "user-123",
+    "agent_id": "agent-456"
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memory_id": 658958031962243072,
+        "content": "User likes latte coffee",
+        "user_id": "user-123",
+        "agent_id": "agent-456",
+        "run_id": "run-789",
+        "metadata": {
+            "topic": "location",
+            "source": "conversation",
+            "importance": "medium",
+            "updated_by": "admin",
+            "memory_type": "working",
+            "access_count": 0,
+            "intelligence": {
+                "importance_score": 0,
+                "memory_type": "working",
+                "initial_retention": 0,
+                "decay_rate": 0.2,
+                "current_retention": 0,
+                "next_review": "2025-12-24T18:41:21.908824+08:00",
+                "review_schedule": [
+                    "2025-12-24T18:41:21.908824+08:00",
+                    "2025-12-24T23:41:21.908824+08:00",
+                    "2025-12-25T17:41:21.908824+08:00",
+                    "2025-12-27T17:41:21.908824+08:00",
+                    "2025-12-31T17:41:21.908824+08:00"
+                ],
+                "last_reviewed": "2025-12-24T17:41:21.908824+08:00",
+                "review_count": 0,
+                "access_count": 0,
+                "reinforcement_factor": 0.3
+            },
+            "importance_score": 0,
+            "memory_management": {
+                "should_promote": false,
+                "should_forget": false,
+                "should_archive": false,
+                "is_active": true
+            },
+            "processing_applied": true
+        },
+        "created_at": "2025-12-24T09:03:57.668669Z",
+        "updated_at": "2025-12-24T09:41:21.908991Z"
+    },
+    "message": "Memory updated successfully",
+    "timestamp": "2025-12-24T09:41:21.930404Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Update content | Only content | Returns 200, content updated |
+| Update metadata | Only metadata | Returns 200, metadata updated |
+| Update both | content + metadata | Returns 200, both updated |
+| No update fields | Both content and metadata are empty | Returns 400 Error |
+| Non-existent ID | memory_id=99999 | Returns 404 Not Found |
+| Access control | Wrong user_id | Returns 403 or 404 |
+
+
+---
+
+### Batch Update Memories
+**Endpoint**: `PUT /api/v1/memories/batch`
+
+**Description**: Update multiple memories in batch
+
+**Request Example**:
+
+```bash
+curl -X PUT "http://localhost:8000/api/v1/memories/batch" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "updates": [
+      {
+        "memory_id": 658958021480677376,
+        "content": "Updated content 1",
+        "metadata": {"updated": true}
+      },
+      {
+        "memory_id": 658958031962243072,
+        "metadata": {"updated": true}
+      }
+    ],
+    "user_id": "user-123",
+    "agent_id": "agent-456"
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memories": [
+            {
+                "memory_id": 658958021480677376,
+                "content": "Updated content 1",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "programming",
+                    "updated": true,
+                    "memory_type": "working",
+                    "access_count": 0,
+                    "intelligence": {
+                        "decay_rate": 0.2,
+                        "memory_type": "working",
+                        "next_review": "2025-12-24T18:44:57.696937+08:00",
+                        "access_count": 0,
+                        "review_count": 0,
+                        "last_reviewed": "2025-12-24T17:44:57.696937+08:00",
+                        "review_schedule": [
+                            "2025-12-24T18:44:57.696937+08:00",
+                            "2025-12-24T23:44:57.696937+08:00",
+                            "2025-12-25T17:44:57.696937+08:00",
+                            "2025-12-27T17:44:57.696937+08:00",
+                            "2025-12-31T17:44:57.696937+08:00"
+                        ],
+                        "importance_score": 0,
+                        "current_retention": 0,
+                        "initial_retention": 0,
+                        "reinforcement_factor": 0.3
+                    },
+                    "importance_score": 0,
+                    "memory_management": {
+                        "is_active": true,
+                        "should_forget": false,
+                        "should_archive": false,
+                        "should_promote": false
+                    },
+                    "processing_applied": true
+                },
+                "created_at": "2025-12-24T09:03:55.157320Z",
+                "updated_at": "2025-12-24T09:44:57.697059Z"
+            },
+            {
+                "memory_id": 658958031962243072,
+                "content": "User likes latte coffee",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "topic": "location",
+                    "source": "conversation",
+                    "updated": true,
+                    "importance": "medium",
+                    "updated_by": "admin",
+                    "memory_type": "working",
+                    "access_count": 0,
+                    "intelligence": {
+                        "decay_rate": 0.2,
+                        "memory_type": "working",
+                        "next_review": "2025-12-24T18:44:58.114457+08:00",
+                        "access_count": 0,
+                        "review_count": 0,
+                        "last_reviewed": "2025-12-24T17:44:58.114457+08:00",
+                        "review_schedule": [
+                            "2025-12-24T18:44:58.114457+08:00",
+                            "2025-12-24T23:44:58.114457+08:00",
+                            "2025-12-25T17:44:58.114457+08:00",
+                            "2025-12-27T17:44:58.114457+08:00",
+                            "2025-12-31T17:44:58.114457+08:00"
+                        ],
+                        "importance_score": 0,
+                        "current_retention": 0,
+                        "initial_retention": 0,
+                        "reinforcement_factor": 0.3
+                    },
+                    "importance_score": 0,
+                    "memory_management": {
+                        "is_active": true,
+                        "should_forget": false,
+                        "should_archive": false,
+                        "should_promote": false
+                    },
+                    "processing_applied": true
+                },
+                "created_at": "2025-12-24T09:03:57.668669Z",
+                "updated_at": "2025-12-24T09:44:58.114565Z"
+            }
+        ],
+        "total": 2,
+        "updated_count": 2,
+        "failed_count": 0
+    },
+    "message": "Updated 2 out of 2 memories",
+    "timestamp": "2025-12-24T09:44:58.180191Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal batch update | 2-10 updates | Returns 200, all updated successfully |
+| Partial failure | Contains non-existent IDs | Returns 200, failed_count > 0 |
+| Empty list | updates is empty | Returns 422 Validation Error |
+| Exceeds limit | > 100 updates | Returns 422 Validation Error |
+
+
+---
+
+### Delete Memory
+**Endpoint**: `DELETE /api/v1/memories/{memory_id}`
+
+**Description**: Delete a single memory
+
+**Request Example**:
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/memories/658958021480677376?user_id=user-123&agent_id=agent-456" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memory_id": 658958021480677376
+    },
+    "message": "Memory deleted successfully",
+    "timestamp": "2025-12-24T09:45:47.174799Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal deletion | Existing memory_id | Returns 200, deletion successful |
+| Non-existent ID | memory_id=99999 | Returns 404 Not Found |
+| Query after deletion | Query same ID again | Returns 404 Not Found |
+| Access control | Wrong user_id | Returns 403 or 404 |
+
+
+---
+
+### Bulk Delete Memories
+**Endpoint**: `DELETE /api/v1/memories/batch`
+
+**Description**: Delete multiple memories in batch
+
+**Request Example**:
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/memories/batch" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "memory_ids": [658958031962243072, 658968835172335616, 658968835277193216],
+    "user_id": "user-123",
+    "agent_id": "agent-456"
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "deleted": [
+            658958031962243072,
+            658968835172335616,
+            658968835277193216
+        ],
+        "failed": [
+
+        ],
+        "total": 3,
+        "deleted_count": 3,
+        "failed_count": 0
+    },
+    "message": "Deleted 3 memories",
+    "timestamp": "2025-12-24T09:47:56.022512Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal bulk deletion | 5-10 IDs | Returns 200, all deleted successfully |
+| Partial failure | Contains non-existent IDs | Returns 200, failed_count > 0 |
+| Empty list | memory_ids is empty | Returns 422 Validation Error |
+| Exceeds limit | > 100 IDs | Returns 422 Validation Error |
+
+
+---
+
+## Search Endpoints
+### Search Memories (POST)
+**Endpoint**: `POST /api/v1/memories/search`
+
+**Description**: Search memories using semantic search (supports complex filtering)
+
+**Request Example**:
+
+```bash
+# First, create some data
+curl -X POST "http://localhost:8000/api/v1/memories"   -H "X-API-Key: test-api-key-123"   -H "Content-Type: application/json"   -d '{
+    "content": "User likes coffee and goes to Starbucks every morning",
+    "user_id": "user-123",
+    "agent_id": "agent-456",
+    "run_id": "run-789",
+    "metadata": {
+      "source": "conversation",
+      "importance": "high"
+    },
+    "filters": {
+      "category": "preference",
+      "topic": "beverage"
+    },
+    "scope": "user",
+    "memory_type": "preference",
+    "infer": true
+  }'
+
+# Search
+curl -X POST "http://localhost:8000/api/v1/memories/search" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What does the user like to drink",
+    "user_id": "user-123",
+    "agent_id": "agent-456",
+    "run_id": "run-789",
+    "filters": {
+      "category": "preference",
+      "topic": "beverage"
+    },
+    "limit": 10
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+  "success": true,
+  "data": {
+    "results": [
+      {
+        "memory_id": 1,
+        "content": "User likes coffee and goes to Starbucks every morning",
+        "score": 0.95,
+        "metadata": {
+          "source": "conversation",
+          "importance": "high"
+        }
+      },
+      {
+        "memory_id": 5,
+        "content": "User occasionally drinks tea",
+        "score": 0.78,
+        "metadata": {}
+      }
+    ],
+    "total": 2,
+    "query": "What beverages does the user like"
+  },
+  "message": "Search completed successfully",
+  "timestamp": "2024-01-15T11:00:00Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Basic search | Only query | Returns relevant memories, sorted by relevance |
+| With user filter | query + user_id | Returns only memories for that user |
+| With agent filter | query + agent_id | Returns only memories for that agent |
+| With complex filters | query + filters | Returns memories matching filter conditions |
+| Limit results | limit=5 | Returns at most 5 results |
+| Empty query | query is empty | Returns 422 Validation Error |
+| No results | No matching memories | Returns empty array |
+| Limit exceeds max | limit=200 | Returns 422 Validation Error |
+
+
+---
+
+### Search Memories (GET)
+**Endpoint**: `POST /api/v1/memories/search`
+
+**Description**: Search using query parameters
+
+**Request Example**:
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/memories/search" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "What beverages does the user like",
+    "user_id": "user-123",
+    "agent_id": "agent-456",
+    "limit": 10
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "results": [
+            {
+                "memory_id": 658969617326145536,
+                "content": "Likes coffee",
+                "score": 0.01639344262295082,
+                "metadata": {
+                    "source": "conversation",
+                    "importance": "high",
+                    "_fusion_info": {
+                        "fts_rank": 1,
+                        "rrf_score": 0.01639344262295082,
+                        "fts_weight": 0.5,
+                        "vector_rank": 1,
+                        "fusion_method": "rrf",
+                        "vector_weight": 0.5
+                    },
+                    "search_count": 2,
+                    "last_searched_at": "2025-12-24T18:23:45.620404+08:00"
+                }
+            },
+            {
+                "memory_id": 658969617401643008,
+                "content": "Goes to Starbucks every morning",
+                "score": 0.008064516129032258,
+                "metadata": {
+                    "source": "conversation",
+                    "importance": "high",
+                    "_fusion_info": {
+                        "fts_rank": null,
+                        "rrf_score": 0.008064516129032258,
+                        "fts_weight": 0.5,
+                        "vector_rank": 2,
+                        "fusion_method": "rrf",
+                        "vector_weight": 0.5
+                    },
+                    "search_count": 2,
+                    "last_searched_at": "2025-12-24T18:23:45.620435+08:00"
+                }
+            }
+        ],
+        "total": 2,
+        "query": "What beverages does the user like"
+    },
+    "message": "Search completed successfully",
+    "timestamp": "2025-12-24T10:23:45.659143Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Basic search | query parameter | Returns relevant memories |
+| With filters | query + user_id + agent_id | Returns matching memories |
+| Missing query | No query parameter | Returns 422 Validation Error |
+| URL encoding | Non-ASCII query | Properly handles URL encoding |
+
+
+---
+
+## User Profile Endpoints
+### Update User Profile
+**Endpoint**: `POST /api/v1/users/{user_id}/profile`
+
+**Description**: Generate or update a user profile
+
+**Request Example**:
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "profile_content": "User is a senior software engineer, focused on AI and machine learning",
+    "topics": {
+      "programming": ["Python", "JavaScript", "Go"],
+      "interests": ["Machine Learning", "Deep Learning", "NLP"],
+      "location": "Beijing"
+    }
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "user_id": "user-123",
+        "profile_content": "User is a senior software engineer, focused on AI and machine learning",
+        "topics": {
+            "location": "Beijing",
+            "interests": [
+                "Machine Learning",
+                "Deep Learning",
+                "NLP"
+            ],
+            "programming": [
+                "Python",
+                "JavaScript",
+                "Go"
+            ]
+        },
+        "updated_at": "2025-12-24T10:31:13.169725Z"
+    },
+    "message": "User profile updated successfully",
+    "timestamp": "2025-12-24T10:31:13.195518Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Update content | Only profile_content | Returns 200, content updated |
+| Update topics | Only topics | Returns 200, topics updated |
+| Update both | profile_content + topics | Returns 200, both updated |
+| Partial update | Only update some topics | Returns 200, merged update |
+| Empty content | profile_content is empty | Returns 200, content cleared |
+
+
+---
+
+### Get User Profile
+**Endpoint**: `GET /api/v1/users/{user_id}/profile`
+
+**Description**: Get the profile of a specific user
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/users/user-123/profile" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "user_id": "user-123",
+        "profile_content": "User is a senior software engineer, focused on AI and machine learning",
+        "topics": {
+            "location": "Beijing",
+            "interests": [
+                "Machine Learning",
+                "Deep Learning",
+                "NLP"
+            ],
+            "programming": [
+                "Python",
+                "JavaScript",
+                "Go"
+            ]
+        },
+        "updated_at": "2025-12-24T10:31:13.169725Z"
+    },
+    "message": "User profile retrieved successfully",
+    "timestamp": "2025-12-24T10:32:53.838365Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal retrieval | Existing user_id | Returns 200 with user profile |
+| Non-existent user | user_id=unknown | Returns 404 or empty profile |
+| User without profile | New user | Returns empty profile_content |
+
+
+---
+
+### Delete User Profile
+**Endpoint**: `DELETE /api/v1/users/{user_id}/profile`
+
+**Description**: Delete the user profile for a specific user
+
+**Request Example**:
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/users/user-123/profile" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "user_id": "user-123",
+        "deleted": true
+    },
+    "message": "User profile for user-123 deleted successfully",
+    "timestamp": "2025-12-24T10:45:30.123456Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal deletion | Existing user_id with profile | Returns 200, profile deleted successfully |
+| Non-existent user | user_id=unknown | Returns 404 Not Found |
+| User without profile | User has no profile | Returns 404 Not Found |
+| Query after deletion | Query same user again | Returns 404 Not Found |
+
+
+---
+
+### Get User Memories
+**Endpoint**: `GET /api/v1/users/{user_id}/memories`
+
+**Description**: Get all memories for a specific user
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/users/user-123/memories?limit=20&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memories": [
+            {
+                "memory_id": 658969617326145536,
+                "content": "Likes coffee",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "source": "conversation",
+                    "importance": "high",
+                    "_fusion_info": {
+                        "fts_rank": 1,
+                        "rrf_score": 0.01639344262295082,
+                        "fts_weight": 0.5,
+                        "vector_rank": 1,
+                        "fusion_method": "rrf",
+                        "vector_weight": 0.5
+                    },
+                    "search_count": 2,
+                    "last_searched_at": "2025-12-24T18:23:45.620404+08:00"
+                },
+                "created_at": "2025-12-24T09:49:59.822334Z",
+                "updated_at": "2025-12-24T10:23:45.620371Z"
+            },
+            {
+                "memory_id": 658969617401643008,
+                "content": "Goes to Starbucks every morning",
+                "user_id": "user-123",
+                "agent_id": "agent-456",
+                "run_id": "run-789",
+                "metadata": {
+                    "source": "conversation",
+                    "importance": "high",
+                    "_fusion_info": {
+                        "fts_rank": null,
+                        "rrf_score": 0.008064516129032258,
+                        "fts_weight": 0.5,
+                        "vector_rank": 2,
+                        "fusion_method": "rrf",
+                        "vector_weight": 0.5
+                    },
+                    "search_count": 2,
+                    "last_searched_at": "2025-12-24T18:23:45.620435+08:00"
+                },
+                "created_at": "2025-12-24T09:49:59.852494Z",
+                "updated_at": "2025-12-24T10:23:45.620411Z"
+            }
+        ],
+        "total": 2,
+        "limit": 20,
+        "offset": 0
+    },
+    "message": "User memories retrieved successfully",
+    "timestamp": "2025-12-24T10:35:40.820301Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal retrieval | Existing user_id | Returns all memories for that user |
+| Paginated query | limit=10, offset=10 | Returns paginated results |
+| User without memories | New user | Returns empty array |
+| Non-existent user | user_id=unknown | Returns empty array or 404 |
+
+
+---
+
+### Delete User Memories
+**Endpoint**: `DELETE /api/v1/users/{user_id}/memories`
+
+**Description**: Delete all memories for a specific user (user profile deletion)
+
+**Request Example**:
+
+```bash
+curl -X DELETE "http://localhost:8000/api/v1/users/user-123/memories" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "user_id": "user-123",
+        "deleted_count": 2,
+        "failed_count": 0,
+        "total": 2
+    },
+    "message": "Deleted 2 memories for user user-123",
+    "timestamp": "2025-12-24T10:39:15.125245Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal deletion | Existing user_id | Returns 200, all memories deleted |
+| User without memories | New user | Returns deleted_count=0 |
+| Query after deletion | Query same user again | Returns empty array |
+
+
+---
+
+## Agent Management Endpoints
+
+### Create Agent Memory
+**Endpoint**: `POST /api/v1/agents/{agent_id}/memories`
+
+**Description**: Create a memory for a specific agent
+
+**Request Example**:
+
+```bash
+curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories"   -H "X-API-Key: test-api-key-123"   -H "Content-Type: application/json"   -d '{
+    "content": "Agent learned new conversation techniques",
+    "user_id": "user-123",
+    "run_id": "run-789"
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+    "success": true,
+    "data": {
+        "memory_id": 659015038446600192,
+        "content": "Agent learned new conversation techniques",
+        "user_id": null,
+        "agent_id": "agent-456",
+        "run_id": null,
+        "metadata": {
+            "run_id": "run-789",
+            "agent": {
+                "agent_id": "agent-456",
+                "mode": "multi_agent",
+                "scope": "private",
+                "collaboration": {
+                    "is_collaborating": false,
+                    "collaboration_type": null,
+                    "collaboration_status": null,
+                    "participants": [
+
+                    ],
+                    "collaboration_level": "low"
+                },
+                "permissions": {
+                    "scope_permissions": {
+                        "read": true,
+                        "write": true,
+                        "delete": true,
+                        "admin": false
+                    },
+                    "scope_type": "private",
+                    "access_level": "owner"
+                },
+                "sharing": {
+                    "is_shared": false,
+                    "shared_with": [
+
+                    ],
+                    "sharing_level": "private",
+                    "can_share": true
+                }
+            },
+            "intelligence": {
+                "importance_score": 0.65,
+                "memory_type": "short_term",
+                "initial_retention": 0.65,
+                "decay_rate": 0.15000000000000002,
+                "current_retention": 0.65,
+                "next_review": "2025-12-24T21:38:46.649257+08:00",
+                "review_schedule": [
+                    "2025-12-24T21:38:46.649257+08:00",
+                    "2025-12-25T01:40:16.649257+08:00",
+                    "2025-12-25T16:09:40.649257+08:00",
+                    "2025-12-27T06:48:04.649257+08:00",
+                    "2025-12-30T12:04:52.649257+08:00"
+                ],
+                "last_reviewed": "2025-12-24T20:50:28.649257+08:00",
+                "review_count": 0,
+                "access_count": 0,
+                "reinforcement_factor": 0.3
+            },
+            "memory_management": {
+                "should_promote": false,
+                "should_forget": false,
+                "should_archive": false,
+                "is_active": true
+            },
+            "created_at": "2025-12-24T20:50:28.649257+08:00",
+            "updated_at": "2025-12-24T20:50:28.649257+08:00"
+        },
+        "created_at": "2025-12-24T20:50:29.556144Z",
+        "updated_at": null
+    },
+    "message": "Agent memory created successfully",
+    "timestamp": "2025-12-24T12:50:29.556662Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal creation | content parameter | Returns 200, creation successful |
+| With user ID | content + user_id | Returns 200, associated with user |
+| With run ID | content + run_id | Returns 200, associated with run |
+| Missing content | No content | Returns 422 Validation Error |
+
+
+---
+
+### Get Agent Memories
+**Endpoint**: `GET /api/v1/agents/{agent_id}/memories`
+
+**Description**: Get all memories for a specific agent
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/agents/agent-456/memories?limit=20&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+  "success": true,
+  "data": {
+    "memories": [
+      {
+        "memory_id": 2,
+        "content": "Agent learned to handle user preferences",
+        "user_id": "user-123",
+        "agent_id": "agent-456",
+        "metadata": {},
+        "created_at": "2024-01-15T10:30:00Z"
+      }
+    ],
+    "total": 1,
+    "limit": 20,
+    "offset": 0
+  },
+  "message": "Agent memories retrieved successfully",
+  "timestamp": "2024-01-15T11:30:00Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal retrieval | Existing agent_id | Returns all memories for that agent |
+| Paginated query | limit=10, offset=10 | Returns paginated results |
+| Agent without memories | New agent | Returns empty array |
+
+
+---
+
+### Share Agent Memories
+**Endpoint**: `POST /api/v1/agents/{agent_id}/memories/share`
+
+**Description**: Share agent memories with another agent
+
+**Request Example**:
+
+```bash
+# Share all memories
+curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "target_agent_id": "agent-789"
+  }'
+
+# Share specific memories
+curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "target_agent_id": "agent-789",
+    "memory_ids": [1, 2, 3]
+  }'
+```
+
+**Response Example**:
+
+```json
+{
+  "success": true,
+  "data": {
+    "shared_count": 3,
+    "source_agent_id": "agent-456",
+    "target_agent_id": "agent-789"
+  },
+  "message": "Shared 3 memories successfully",
+  "timestamp": "2024-01-15T11:40:00Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Share all memories | Only target_agent_id | Returns 200, all memories shared |
+| Share specific memories | target_agent_id + memory_ids | Returns 200, specified memories shared |
+| Non-existent memory IDs | Invalid memory_ids | Returns 200, shared_count less than requested |
+| Non-existent target agent | target_agent_id=unknown | Returns 200, creates new agent record |
+
+
+---
+
+### Get Shared Memories
+**Endpoint**: `GET /api/v1/agents/{agent_id}/memories/share`
+
+**Description**: Get shared memories received by an agent
+
+**Request Example**:
+
+```bash
+curl -X GET "http://localhost:8000/api/v1/agents/agent-789/memories/share?limit=20&offset=0" \
+  -H "X-API-Key: test-api-key-123"
+```
+
+**Response Example**:
+
+```json
+{
+  "success": true,
+  "data": {
+    "memories": [
+      {
+        "memory_id": 1,
+        "content": "Shared memory content",
+        "user_id": "user-123",
+        "agent_id": "agent-456",
+        "metadata": {
+          "shared_from": "agent-456"
+        },
+        "created_at": "2024-01-15T10:30:00Z"
+      }
+    ],
+    "total": 1,
+    "limit": 20,
+    "offset": 0
+  },
+  "message": "Shared memories retrieved successfully",
+  "timestamp": "2024-01-15T11:45:00Z"
+}
+```
+
+**Usage Notes**:
+
+| Scenario | Request Parameters | Expected Result |
+| --- | --- | --- |
+| Normal retrieval | Existing agent_id | Returns list of shared memories |
+| No shared memories | New agent | Returns empty array |
+| Paginated query | limit=10, offset=10 | Returns paginated results |
+
+
+---
+
+## Error Scenarios
+### Authentication Errors
+**Error Cases**:
+
+| Scenario | Request | Expected Result |
+| --- | --- | --- |
+| No API Key | Missing X-API-Key header | Returns 401 Unauthorized |
+| Invalid API Key | X-API-Key: invalid-key | Returns 401 Unauthorized |
+| Empty API Key | X-API-Key: (empty) | Returns 401 Unauthorized |
+
+
+**Example**:
+
+```bash
+# No API Key
+curl -X GET "http://localhost:8000/api/v1/memories"
+
+# Response
+{
+  "success": false,
+  "error": {
+    "code": "UNAUTHORIZED",
+    "message": "API key is required"
+  }
+}
+```
+
+---
+
+### Rate Limiting
+**Error Cases**:
+
+| Scenario | Request | Expected Result |
+| --- | --- | --- |
+| Normal request | Single request | Returns 200 |
+| Rate limit exceeded | Many requests in short time | Returns 429 Too Many Requests |
+
+
+**Example**:
+
+```bash
+# Send 200 requests quickly
+for i in {1..200}; do
+  curl -X GET "http://localhost:8000/api/v1/memories" \
+    -H "X-API-Key: test-api-key-123" &
+done
+
+# Response (when rate limit exceeded)
+{
+  "success": false,
+  "error": {
+    "code": "RATE_LIMIT_EXCEEDED",
+    "message": "Rate limit exceeded: 100 per minute"
+  }
+}
+```
+
+---
+
+### Validation Errors
+**Error Cases**:
+
+| Scenario | Request | Expected Result |
+| --- | --- | --- |
+| Missing required field | Missing content | Returns 422 Validation Error |
+| Type error | memory_id="abc" | Returns 422 Validation Error |
+| Range error | limit=2000 | Returns 422 Validation Error |
+| Format error | Invalid JSON format | Returns 422 Validation Error |
+
+
+**Example**:
+
+```bash
+# Missing required field
+curl -X POST "http://localhost:8000/api/v1/memories" \
+  -H "X-API-Key: test-api-key-123" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "user_id": "user-123"
+  }'
+
+# Response
+{
+  "success": false,
+  "error": {
+    "code": "VALIDATION_ERROR",
+    "message": "Field 'content' is required"
+  }
+}
+```
+
+---
+
+### Resource Not Found
+**Error Cases**:
+
+| Scenario | Request | Expected Result |
+| --- | --- | --- |
+| Non-existent memory | GET /memories/99999 | Returns 404 Not Found |
+| Non-existent user | GET /users/unknown/profile | Returns 404 or empty data |
+| Non-existent agent | GET /agents/unknown/memories | Returns empty array |
+
+
+---
+
+### Server Errors
+**Error Cases**:
+
+| Scenario | Request | Expected Result |
+| --- | --- | --- |
+| Database connection failure | Any request | Returns 500 Internal Server Error |
+| Service unavailable | Any request | Returns 503 Service Unavailable |
+
+
+---
+
+## Performance Testing
+### Response Time Testing
+Use tools to measure endpoint response times:
+
+```bash
+# Using curl to measure response time
+time curl -X GET "http://localhost:8000/api/v1/memories" \
+  -H "X-API-Key: test-api-key-123"
+
+# Using httpie
+http --timeout=5 GET "http://localhost:8000/api/v1/memories" \
+  X-API-Key:test-api-key-123
+```
+
+
+---
+
+### Concurrent Testing
+Use tools for concurrent load testing:
+
+```bash
+# Using Apache Bench
+ab -n 1000 -c 10 -H "X-API-Key: test-api-key-123" \
+  http://localhost:8000/api/v1/memories
+
+# Using wrk
+wrk -t4 -c100 -d30s -H "X-API-Key: test-api-key-123" \
+  http://localhost:8000/api/v1/memories
+```
+
+---
diff --git a/docs/api/overview.md b/docs/api/overview.md
index 42547bc..1be3995 100644
--- a/docs/api/overview.md
+++ b/docs/api/overview.md
@@ -10,3 +10,4 @@ This section contains comprehensive API documentation for powermem.
 - **[AsyncMemory API](./0002-async_memory.md)** - Asynchronous memory operations
 - **[Agent APIs](./0003-agents.md)** - Multi-agent memory management
 - **[MCP API](./0004-mcp.md)** - Model Context Protocol server for AI assistants
+- **[HTTP API Server](./0005-api_server.md)** - RESTful HTTP API server for PowerMem
diff --git a/examples/langchain/requirements.txt b/examples/langchain/requirements.txt
index b6f2199..fa16b5a 100644
--- a/examples/langchain/requirements.txt
+++ b/examples/langchain/requirements.txt
@@ -2,7 +2,7 @@
 # Install with: pip install -r requirements.txt
 
 # Core dependencies
-powermem>=0.2.1
+powermem>=0.3.0
 python-dotenv>=1.0.0
 
 openai>=1.109.1,<3.0.0
diff --git a/examples/langgraph/__init__.py b/examples/langgraph/__init__.py
index 21136fe..dbdc495 100644
--- a/examples/langgraph/__init__.py
+++ b/examples/langgraph/__init__.py
@@ -5,5 +5,5 @@
 for building stateful AI applications with intelligent memory management.
 """
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
 
diff --git a/examples/langgraph/requirements.txt b/examples/langgraph/requirements.txt
index 786cc33..e3a8422 100644
--- a/examples/langgraph/requirements.txt
+++ b/examples/langgraph/requirements.txt
@@ -2,7 +2,7 @@
 # Install with: pip install -r requirements.txt
 
 # Core dependencies
-powermem>=0.2.1
+powermem>=0.3.0
 python-dotenv>=1.0.0
 
 # LangGraph and LangChain dependencies
diff --git a/pyproject.toml b/pyproject.toml
index 9246d37..7353298 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "powermem"
-version = "0.2.1"
+version = "0.3.0"
 description = "Intelligent Memory System - Persistent memory layer for LLM applications"
 readme = "README.md"
 license = {text = "Apache-2.0"}
@@ -25,6 +25,7 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
     "pydantic>=2.0.0",
+    "pydantic-settings>=2.0.0",
     "httpx>=0.24.0",
     "sqlalchemy>=2.0.0",
     "numpy>=1.21.0",
@@ -32,6 +33,8 @@ dependencies = [
     "python-dotenv>=1.0.0",
     "fastapi>=0.100.0",
     "uvicorn>=0.23.0",
+    "slowapi>=0.1.9",
+    "click>=8.0.0",
     "rank-bm25>=0.2.2",
     "pyobvector>=0.2.0,<0.3.0",
     "jieba>=0.42.1",
@@ -48,6 +51,7 @@ dependencies = [
     "google-generativeai>=0.3.0",
     "google-genai>=1.0.0",
     "dashscope>=1.14.0",
+    "psutil>=5.9.0",
 ]
 
 [project.optional-dependencies]
@@ -86,6 +90,9 @@ where = ["src"]
 [tool.setuptools.package-dir]
 "" = "src"
 
+[project.scripts]
+powermem-server = "server.cli.server:server"
+
 [tool.black]
 line-length = 88
 target-version = ['py310']
diff --git a/src/powermem/agent/implementations/multi_agent.py b/src/powermem/agent/implementations/multi_agent.py
index 3943d73..7038526 100644
--- a/src/powermem/agent/implementations/multi_agent.py
+++ b/src/powermem/agent/implementations/multi_agent.py
@@ -302,6 +302,7 @@ def process_memory(
                 'memory_type': memory_type.value,
                 'agent_id': agent_id,
                 'created_at': memory_data['created_at'],
+                'updated_at': memory_data['updated_at'],
                 'metadata': memory_data['metadata'],
             }
             
@@ -335,17 +336,20 @@ def _persist_memory_to_storage(self, memory_data: Dict[str, Any]) -> int:
             
             # Use the existing Memory.add() method
             # Get the Snowflake ID returned from database to ensure consistency
+            # Use infer=False to use simple mode since intelligent processing is already done at agent layer
             add_result = self._memory_instance.add(
                 messages=memory_data['content'],
                 user_id=memory_data.get('user_id'),
                 agent_id=memory_data.get('agent_id'),
+                run_id=memory_data.get('run_id'),
                 metadata={
                     'scope': memory_data.get('scope').value if memory_data.get('scope') else None,
                     'memory_type': memory_data.get('memory_type').value if memory_data.get('memory_type') else None,
                     'retention_score': memory_data.get('retention_score'),
                     'importance_level': memory_data.get('importance_level'),
                     **memory_data.get('metadata', {})
-                }
+                },
+                infer=False  # Use simple mode to avoid intelligent processing returning empty results
             )
             
             # Get the Snowflake ID from database
@@ -482,15 +486,129 @@ def get_memories(
             List of memory dictionaries
         """
         try:
+            # First, try to get memories from database
+            # Initialize Memory instance if not exists
+            if not hasattr(self, '_memory_instance'):
+                from powermem.core.memory import Memory
+                if hasattr(self.config, '_data'):
+                    config_dict = self.config._data
+                elif hasattr(self.config, 'to_dict'):
+                    config_dict = self.config.to_dict()
+                else:
+                    config_dict = self.config
+                self._memory_instance = Memory(config_dict)
+            
+            # Query memories from database by agent_id
+            user_id = filters.get('user_id') if filters else None
+            db_result = self._memory_instance.get_all(
+                user_id=user_id,
+                agent_id=agent_id,
+                limit=1000  # Get a large number to ensure we get all memories
+            )
+            
+            # Extract results from database response
+            db_memories = db_result.get('results', []) if isinstance(db_result, dict) else db_result
+            
+            # Convert database format to agent memory format and load into memory cache
             accessible_memories = []
-            total_memories = 0
+            total_memories = len(db_memories)
             scope_access_passed = 0
             permission_passed = 0
             
-            # Get accessible memory IDs based on scope and permissions
+            for db_memory in db_memories:
+                memory_id = db_memory.get('id')
+                if not memory_id:
+                    continue
+                
+                # Convert database format to agent memory format
+                # Storage adapter.get_all_memories() returns 'memory' field (mapped from payload.data)
+                # Keep 'document' as fallback for database raw field name compatibility
+                content = db_memory.get('memory') or db_memory.get('document', '')
+                
+                memory_data = {
+                    'id': memory_id,
+                    'content': content,
+                    'agent_id': db_memory.get('agent_id', agent_id),
+                    'user_id': db_memory.get('user_id'),
+                    'run_id': db_memory.get('run_id'),
+                    'metadata': db_memory.get('metadata', {}),
+                    'created_at': db_memory.get('created_at'),
+                    'updated_at': db_memory.get('updated_at'),
+                    'access_count': 0,
+                    'last_accessed': None,
+                }
+                
+                # Extract scope and memory_type from metadata
+                metadata = memory_data.get('metadata', {})
+                scope_str = metadata.get('scope') or metadata.get('agent', {}).get('scope', 'agent')
+                memory_type_str = metadata.get('memory_type') or metadata.get('agent', {}).get('memory_type', 'working')
+                
+                try:
+                    scope = MemoryScope(scope_str) if isinstance(scope_str, str) else scope_str
+                except (ValueError, TypeError):
+                    scope = MemoryScope.AGENT  # Default scope
+                
+                try:
+                    memory_type = MemoryType(memory_type_str) if isinstance(memory_type_str, str) else memory_type_str
+                except (ValueError, TypeError):
+                    memory_type = MemoryType.WORKING  # Default type
+                
+                memory_data['scope'] = scope
+                memory_data['memory_type'] = memory_type
+                
+                # Load into memory cache for future fast access
+                if memory_id not in self.scope_memories[scope][memory_type]:
+                    self.scope_memories[scope][memory_type][memory_id] = memory_data
+                
+                # Also load into scope controller's storage for access control
+                if self.scope_controller and memory_id not in self.scope_controller.scope_storage[scope][memory_type]:
+                    self.scope_controller.scope_storage[scope][memory_type][memory_id] = memory_data
+                
+                # Restore permissions from metadata if available
+                # Check if this memory was created by the current agent
+                memory_agent_id = memory_data.get('agent_id') or metadata.get('agent', {}).get('agent_id')
+                if memory_agent_id == agent_id:
+                    # This memory belongs to the agent, grant owner permissions
+                    if memory_id not in self.permission_controller.memory_permissions:
+                        self.permission_controller.memory_permissions[memory_id] = {}
+                    if agent_id not in self.permission_controller.memory_permissions[memory_id]:
+                        # Grant owner permissions
+                        owner_permissions = self.multi_agent_config.default_permissions.get("owner", [])
+                        owner_permissions_enum = []
+                        for perm in owner_permissions:
+                            try:
+                                if isinstance(perm, AccessPermission):
+                                    owner_permissions_enum.append(perm)
+                                else:
+                                    owner_permissions_enum.append(AccessPermission(perm.lower()))
+                            except ValueError:
+                                pass
+                        self.permission_controller.memory_permissions[memory_id][agent_id] = owner_permissions_enum
+                
+                # Check if agent has access to this memory
+                scope_access = self.scope_controller.check_scope_access(agent_id, memory_id)
+                if scope_access:
+                    scope_access_passed += 1
+                    
+                    permission_check = self.permission_controller.check_permission(
+                        agent_id, memory_id, AccessPermission.READ
+                    )
+                    if permission_check:
+                        permission_passed += 1
+                        accessible_memories.append(memory_data)
+                    else:
+                        logger.debug(f"Permission denied for agent {agent_id} on memory {memory_id}")
+                else:
+                    logger.debug(f"Scope access denied for agent {agent_id} on memory {memory_id}")
+            
+            # Also check in-memory cache for any memories not in database (for backward compatibility)
             for scope in MemoryScope:
                 for memory_type in MemoryType:
                     for memory_id, memory_data in self.scope_memories[scope][memory_type].items():
+                        # Skip if already processed from database
+                        if any(m.get('id') == memory_id for m in accessible_memories):
+                            continue
+                        
                         total_memories += 1
                         
                         # Check if agent has access to this memory
@@ -513,20 +631,22 @@ def get_memories(
             if query:
                 accessible_memories = [
                     memory for memory in accessible_memories
-                    if query.lower() in memory['content'].lower()
+                    if query.lower() in memory.get('content', '').lower()
                 ]
             
             # Apply additional filters if provided
             if filters:
                 for key, value in filters.items():
-                    accessible_memories = [
-                        memory for memory in accessible_memories
-                        if memory.get(key) == value
-                    ]
+                    if key != 'user_id':  # user_id already used for database query
+                        accessible_memories = [
+                            memory for memory in accessible_memories
+                            if memory.get(key) == value
+                        ]
             
             # Update access statistics
             for memory in accessible_memories:
-                memory['access_count'] += 1
+                if 'access_count' in memory:
+                    memory['access_count'] = memory.get('access_count', 0) + 1
                 memory['last_accessed'] = datetime.now().isoformat()
             
             logger.info(f"Retrieved {len(accessible_memories)} memories for agent {agent_id} "
@@ -534,7 +654,7 @@ def get_memories(
             return accessible_memories
             
         except Exception as e:
-            logger.error(f"Failed to get memories for agent {agent_id}: {e}")
+            logger.error(f"Failed to get memories for agent {agent_id}: {e}", exc_info=True)
             raise
     
     def update_memory(
diff --git a/src/powermem/agent/implementations/multi_user.py b/src/powermem/agent/implementations/multi_user.py
index 2d324a1..a3a3701 100644
--- a/src/powermem/agent/implementations/multi_user.py
+++ b/src/powermem/agent/implementations/multi_user.py
@@ -153,10 +153,14 @@ def process_memory(
             
             # Persist to database first to get Snowflake ID
             # Use temporary memory data for database insertion
+            # Extract run_id from metadata
+            run_id = enhanced_metadata.get('run_id') if enhanced_metadata else None
+            
             temp_memory_data = {
                 'content': content,
                 'user_id': user_id,
                 'agent_id': agent_id,
+                'run_id': run_id,
                 'scope': scope,
                 'memory_type': memory_type,
                 'metadata': enhanced_metadata,
@@ -224,6 +228,7 @@ def process_memory(
                 'user_id': user_id,
                 'agent_id': agent_id,
                 'created_at': memory_data['created_at'],
+                'updated_at': memory_data['updated_at'],
                 'metadata': memory_data['metadata'],
             }
             
@@ -251,10 +256,12 @@ def _persist_memory_to_storage(self, memory_data: Dict[str, Any]) -> int:
             
             # Use the existing Memory.add() method
             # Get the Snowflake ID returned from database to ensure consistency
+            # Use infer=False to use simple mode since intelligent processing is already done at agent layer
             add_result = self._memory_instance.add(
                 messages=memory_data['content'],
                 user_id=memory_data.get('user_id'),
                 agent_id=memory_data.get('agent_id'),
+                run_id=memory_data.get('run_id'),
                 metadata={
                     'scope': memory_data.get('scope').value if memory_data.get('scope') else None,
                     'memory_type': memory_data.get('memory_type').value if memory_data.get('memory_type') else None,
@@ -262,19 +269,30 @@ def _persist_memory_to_storage(self, memory_data: Dict[str, Any]) -> int:
                     'importance_level': memory_data.get('importance_level'),
                     'privacy_level': memory_data.get('privacy_level').value if memory_data.get('privacy_level') else None,
                     **memory_data.get('metadata', {})
-                }
+                },
+                infer=False  # Use simple mode to avoid intelligent processing returning empty results
             )
             
             # Get the Snowflake ID from database
-            if add_result and 'results' in add_result and len(add_result['results']) > 0:
-                db_memory_id = add_result['results'][0].get('id')
-                if db_memory_id:
-                    logger.info(f"Persisted memory {db_memory_id} to storage")
-                    return db_memory_id
-                else:
-                    raise ValueError("Failed to get memory ID from database")
+            if not add_result:
+                logger.error("Memory.add() returned None or empty result")
+                raise ValueError("Failed to persist memory to database: Memory.add() returned None")
+            
+            if 'results' not in add_result:
+                logger.error(f"Memory.add() returned unexpected structure: {add_result}")
+                raise ValueError(f"Failed to persist memory to database: Missing 'results' key in response. Got keys: {list(add_result.keys())}")
+            
+            if not add_result['results'] or len(add_result['results']) == 0:
+                logger.error(f"Memory.add() returned empty results list: {add_result}")
+                raise ValueError("Failed to persist memory to database: Empty results list")
+            
+            db_memory_id = add_result['results'][0].get('id')
+            if db_memory_id:
+                logger.info(f"Persisted memory {db_memory_id} to storage")
+                return db_memory_id
             else:
-                raise ValueError("Failed to persist memory to database")
+                logger.error(f"Memory.add() result missing 'id' field: {add_result['results'][0]}")
+                raise ValueError("Failed to get memory ID from database: Missing 'id' in result")
             
         except Exception as e:
             logger.error(f"Failed to persist memory to storage: {e}")
@@ -511,31 +529,121 @@ def get_memories(
             user_id = self._extract_user_id(agent_id, None, filters)
             accessible_memories = []
             
-            # Get user's own memories
+            # First, try to get memories from database
+            # Initialize Memory instance if not exists
+            if not hasattr(self, '_memory_instance'):
+                from powermem.core.memory import Memory
+                config_dict = self.config._data if hasattr(self.config, '_data') else self.config
+                self._memory_instance = Memory(config_dict)
+            
+            # Query memories from database by user_id and agent_id
+            db_result = self._memory_instance.get_all(
+                user_id=user_id,
+                agent_id=agent_id,
+                limit=1000  # Get a large number to ensure we get all memories
+            )
+            
+            # Extract results from database response
+            db_memories = db_result.get('results', []) if isinstance(db_result, dict) else db_result
+            
+            # Convert database format to user memory format and load into memory cache
+            processed_memory_ids = set()
+            
+            for db_memory in db_memories:
+                memory_id = db_memory.get('id')
+                if not memory_id:
+                    continue
+                
+                processed_memory_ids.add(memory_id)
+                
+                # Convert database format to user memory format
+                # Storage adapter.get_all_memories() returns 'memory' field (mapped from payload.data)
+                # Keep 'document' as fallback for database raw field name compatibility
+                content = db_memory.get('memory') or db_memory.get('document', '')
+                
+                memory_data = {
+                    'id': memory_id,
+                    'content': content,
+                    'user_id': db_memory.get('user_id', user_id),
+                    'agent_id': db_memory.get('agent_id', agent_id),
+                    'run_id': db_memory.get('run_id'),
+                    'metadata': db_memory.get('metadata', {}),
+                    'created_at': db_memory.get('created_at'),
+                    'updated_at': db_memory.get('updated_at'),
+                    'access_count': 0,
+                    'last_accessed': None,
+                }
+                
+                # Extract memory_type from metadata
+                metadata = memory_data.get('metadata', {})
+                memory_type_str = metadata.get('memory_type') or metadata.get('agent', {}).get('memory_type', 'working')
+                
+                try:
+                    memory_type = MemoryType(memory_type_str) if isinstance(memory_type_str, str) else memory_type_str
+                except (ValueError, TypeError):
+                    memory_type = MemoryType.WORKING  # Default type
+                
+                memory_data['memory_type'] = memory_type
+                memory_data['scope'] = metadata.get('scope') or 'private'
+                
+                # Extract additional fields from metadata
+                if 'intelligence' in metadata:
+                    memory_data['retention_score'] = metadata['intelligence'].get('current_retention', 1.0)
+                    memory_data['importance_level'] = metadata['intelligence'].get('importance_score')
+                
+                # Load into memory cache for future fast access
+                if user_id not in self.user_memories:
+                    self.user_memories[user_id] = {
+                        MemoryType.WORKING: {},
+                        MemoryType.SHORT_TERM: {},
+                        MemoryType.LONG_TERM: {},
+                        MemoryType.SEMANTIC: {},
+                        MemoryType.EPISODIC: {},
+                        MemoryType.PROCEDURAL: {},
+                        MemoryType.PUBLIC_SHARED: {},
+                        MemoryType.PRIVATE_AGENT: {},
+                        MemoryType.COLLABORATIVE: {},
+                        MemoryType.GROUP_CONSENSUS: {},
+                    }
+                
+                if memory_id not in self.user_memories[user_id][memory_type]:
+                    self.user_memories[user_id][memory_type][memory_id] = memory_data
+                
+                # Check if this memory belongs to the user
+                if memory_data['user_id'] == user_id:
+                    accessible_memories.append(memory_data)
+            
+            # Also check in-memory cache for any memories not in database (for backward compatibility)
             if user_id in self.user_memories:
                 for memory_type in MemoryType:
-                    for memory_data in self.user_memories[user_id][memory_type].values():
-                        accessible_memories.append(memory_data)
+                    for memory_id, memory_data in self.user_memories[user_id][memory_type].items():
+                        # Skip if already processed from database
+                        if memory_id in processed_memory_ids:
+                            continue
+                        
+                        # Check if memory belongs to user
+                        if memory_data.get('user_id') == user_id:
+                            accessible_memories.append(memory_data)
             
             # Get shared memories
             for memory_id, sharing_data in self.shared_memories.items():
                 if user_id in sharing_data['shared_with']:
                     # Find the original memory
                     memory_data = self._find_memory(memory_id)
-                    if memory_data:
+                    if memory_data and memory_data not in accessible_memories:
                         accessible_memories.append(memory_data)
             
             # Apply query filtering if provided
             if query:
                 accessible_memories = [
                     memory for memory in accessible_memories
-                    if query.lower() in memory['content'].lower()
+                    if query.lower() in memory.get('content', '').lower()
                 ]
             
             # Apply additional filters if provided
             if filters:
                 for key, value in filters.items():
-                    if key != 'user_id':  # Skip user_id filter as it's already applied
+                    if key != 'user_id':  # user_id already used for database query
                         accessible_memories = [
                             memory for memory in accessible_memories
                             if memory.get(key) == value
@@ -543,14 +651,15 @@ def get_memories(
             
             # Update access statistics
             for memory in accessible_memories:
-                memory['access_count'] += 1
+                if 'access_count' in memory:
+                    memory['access_count'] = memory.get('access_count', 0) + 1
                 memory['last_accessed'] = datetime.now().isoformat()
             
             logger.info(f"Retrieved {len(accessible_memories)} memories for user {user_id}")
             return accessible_memories
             
         except Exception as e:
-            logger.error(f"Failed to get memories for user {agent_id}: {e}")
+            logger.error(f"Failed to get memories for user {agent_id}: {e}", exc_info=True)
             raise
     
     def update_memory(
diff --git a/src/powermem/core/audit.py b/src/powermem/core/audit.py
index bb60997..41c9d84 100644
--- a/src/powermem/core/audit.py
+++ b/src/powermem/core/audit.py
@@ -73,7 +73,7 @@ def log_event(
                 "user_id": user_id,
                 "agent_id": agent_id,
                 "details": details,
-                "version": "0.2.1",
+                "version": "0.3.0",
             }
             
             # Log to file
diff --git a/src/powermem/core/memory.py b/src/powermem/core/memory.py
index 5a5db4f..e9bf2d4 100644
--- a/src/powermem/core/memory.py
+++ b/src/powermem/core/memory.py
@@ -1113,6 +1113,10 @@ def search(
                 for key in ["id", "created_at", "updated_at", "user_id", "agent_id", "run_id"]:
                     if key in result:
                         transformed_result[key] = result[key]
+                
+                # Ensure memory_id field exists (for API compatibility)
+                if "id" in transformed_result and "memory_id" not in transformed_result:
+                    transformed_result["memory_id"] = transformed_result["id"]
                 transformed_results.append(transformed_result)
             
             # Log audit event
diff --git a/src/powermem/core/telemetry.py b/src/powermem/core/telemetry.py
index 493aa69..d02b60b 100644
--- a/src/powermem/core/telemetry.py
+++ b/src/powermem/core/telemetry.py
@@ -65,7 +65,7 @@ def capture_event(
                 "user_id": user_id,
                 "agent_id": agent_id,
                 "timestamp": get_current_datetime().isoformat(),
-                "version": "0.2.1",
+                "version": "0.3.0",
             }
             
             self.events.append(event)
@@ -165,7 +165,7 @@ def set_user_properties(self, user_id: str, properties: Dict[str, Any]) -> None:
                 "properties": properties,
                 "user_id": user_id,
                 "timestamp": get_current_datetime().isoformat(),
-                "version": "0.2.1",
+                "version": "0.3.0",
             }
             
             self.events.append(event)
diff --git a/src/powermem/version.py b/src/powermem/version.py
index f2a9b8b..1260acb 100644
--- a/src/powermem/version.py
+++ b/src/powermem/version.py
@@ -2,11 +2,12 @@
 Version information management
 """
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
 __version_info__ = tuple(map(int, __version__.split(".")))
 
 # Version history
 VERSION_HISTORY = {
+    "0.3.0": "2026-01-09 - Version 0.3.0 release",
     "0.2.1": "2025-12-19 - Version 0.2.1 release",
     "0.2.0": "2025-12-16 - Version 0.2.0 release",
     "0.1.0": "2025-10-16 - Initial version release",
diff --git a/src/server/__init__.py b/src/server/__init__.py
new file mode 100644
index 0000000..34924e6
--- /dev/null
+++ b/src/server/__init__.py
@@ -0,0 +1,8 @@
+"""
+PowerMem HTTP API Server
+
+A production-ready HTTP API server for PowerMem that provides RESTful endpoints
+for memory management, search, user profiles, and multi-agent support.
+"""
+
+__version__ = "0.1.0"
diff --git a/src/server/api/__init__.py b/src/server/api/__init__.py
new file mode 100644
index 0000000..86a5d1a
--- /dev/null
+++ b/src/server/api/__init__.py
@@ -0,0 +1,3 @@
+"""
+API routes for PowerMem API Server
+"""
diff --git a/src/server/api/v1/__init__.py b/src/server/api/v1/__init__.py
new file mode 100644
index 0000000..037fb93
--- /dev/null
+++ b/src/server/api/v1/__init__.py
@@ -0,0 +1,20 @@
+"""
+API v1 routes
+"""
+
+from fastapi import APIRouter
+from .memories import router as memories_router
+from .search import router as search_router
+from .users import router as users_router
+from .agents import router as agents_router
+from .system import router as system_router
+
+# Create main v1 router
+router = APIRouter(prefix="/api/v1", tags=["v1"])
+
+# Include sub-routers
+router.include_router(memories_router)
+router.include_router(search_router)
+router.include_router(users_router)
+router.include_router(agents_router)
+router.include_router(system_router)
diff --git a/src/server/api/v1/agents.py b/src/server/api/v1/agents.py
new file mode 100644
index 0000000..aaf94e5
--- /dev/null
+++ b/src/server/api/v1/agents.py
@@ -0,0 +1,156 @@
+"""
+Agent memory API routes
+"""
+
+from typing import Optional
+from fastapi import APIRouter, Depends, Query, Request
+from slowapi import Limiter
+
+from ...models.request import AgentMemoryCreateRequest, AgentMemoryShareRequest
+from ...models.response import APIResponse, MemoryListResponse
+from ...services.agent_service import AgentService
+from ...middleware.auth import verify_api_key
+from ...middleware.rate_limit import limiter, get_rate_limit_string
+from ...utils.converters import memory_dict_to_response
+
+router = APIRouter(prefix="/agents", tags=["agents"])
+
+
+def get_agent_service() -> AgentService:
+    """Dependency to get agent service"""
+    return AgentService()
+
+
+@router.get(
+    "/{agent_id}/memories",
+    response_model=APIResponse,
+    summary="Get agent memories",
+    description="Get all memories for a specific agent",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_agent_memories(
+    request: Request,
+    agent_id: str,
+    limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
+    offset: int = Query(0, ge=0, description="Number of results to skip"),
+    api_key: str = Depends(verify_api_key),
+    service: AgentService = Depends(get_agent_service),
+):
+    """Get all memories for an agent"""
+    memories = service.get_agent_memories(
+        agent_id=agent_id,
+        limit=limit,
+        offset=offset,
+    )
+    
+    memory_responses = [memory_dict_to_response(m) for m in memories]
+    
+    response_data = MemoryListResponse(
+        memories=memory_responses,
+        total=len(memory_responses),
+        limit=limit,
+        offset=offset,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="Agent memories retrieved successfully",
+    )
+
+
+@router.post(
+    "/{agent_id}/memories",
+    response_model=APIResponse,
+    summary="Create agent memory",
+    description="Create a new memory for a specific agent",
+)
+@limiter.limit(get_rate_limit_string())
+async def create_agent_memory(
+    request: Request,
+    agent_id: str,
+    body: AgentMemoryCreateRequest,
+    api_key: str = Depends(verify_api_key),
+    service: AgentService = Depends(get_agent_service),
+):
+    """Create a memory for an agent"""
+    result = service.create_agent_memory(
+        agent_id=agent_id,
+        content=body.content,
+        user_id=body.user_id,
+        run_id=body.run_id,
+    )
+    
+    memory_response = memory_dict_to_response(result)
+    
+    return APIResponse(
+        success=True,
+        data=memory_response.model_dump(mode='json'),
+        message="Agent memory created successfully",
+    )
+
+
+@router.post(
+    "/{agent_id}/memories/share",
+    response_model=APIResponse,
+    summary="Share agent memories",
+    description="Share memories from one agent to another",
+)
+@limiter.limit(get_rate_limit_string())
+async def share_agent_memories(
+    request: Request,
+    agent_id: str,
+    body: AgentMemoryShareRequest,
+    api_key: str = Depends(verify_api_key),
+    service: AgentService = Depends(get_agent_service),
+):
+    """Share memories between agents"""
+    result = service.share_memories(
+        agent_id=agent_id,
+        target_agent_id=body.target_agent_id,
+        memory_ids=body.memory_ids,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=result,
+        message=f"Shared {result['shared_count']} memories successfully",
+    )
+
+
+@router.get(
+    "/{agent_id}/memories/share",
+    response_model=APIResponse,
+    summary="Get shared memories",
+    description="Get shared memories for an agent",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_shared_memories(
+    request: Request,
+    agent_id: str,
+    limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
+    offset: int = Query(0, ge=0, description="Number of results to skip"),
+    api_key: str = Depends(verify_api_key),
+    service: AgentService = Depends(get_agent_service),
+):
+    """Get shared memories for an agent"""
+    memories = service.get_shared_memories(
+        agent_id=agent_id,
+        limit=limit,
+        offset=offset,
+    )
+    
+    memory_responses = [memory_dict_to_response(m) for m in memories]
+    
+    response_data = MemoryListResponse(
+        memories=memory_responses,
+        total=len(memory_responses),
+        limit=limit,
+        offset=offset,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="Shared memories retrieved successfully",
+    )
diff --git a/src/server/api/v1/memories.py b/src/server/api/v1/memories.py
new file mode 100644
index 0000000..5091bab
--- /dev/null
+++ b/src/server/api/v1/memories.py
@@ -0,0 +1,381 @@
+"""
+Memory management API routes
+"""
+
+import logging
+from typing import List, Optional
+from fastapi import APIRouter, Depends, Query, Request
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+from ...models.request import (
+    MemoryCreateRequest,
+    MemoryBatchCreateRequest,
+    MemoryUpdateRequest,
+    MemoryBatchUpdateRequest,
+    BulkDeleteRequest,
+)
+from ...models.response import (
+    APIResponse,
+    MemoryResponse,
+    MemoryListResponse,
+)
+from ...services.memory_service import MemoryService
+from ...middleware.auth import verify_api_key
+from ...middleware.rate_limit import limiter, get_rate_limit_string
+from ...utils.converters import memory_dict_to_response
+
+logger = logging.getLogger("server")
+
+router = APIRouter(prefix="/memories", tags=["memories"])
+
+
+def get_memory_service() -> MemoryService:
+    """Dependency to get memory service"""
+    return MemoryService()
+
+
+@router.post(
+    "",
+    response_model=APIResponse,
+    summary="Create a memory",
+    description="Create a new memory with optional user_id, agent_id, and metadata",
+)
+@limiter.limit(get_rate_limit_string())
+async def create_memory(
+    request: Request,
+    body: MemoryCreateRequest,
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Create a new memory"""
+    results = service.create_memory(
+        content=body.content,
+        user_id=body.user_id,
+        agent_id=body.agent_id,
+        run_id=body.run_id,
+        metadata=body.metadata,
+        filters=body.filters,
+        scope=body.scope,
+        memory_type=body.memory_type,
+        infer=body.infer,
+    )
+    
+    # Convert all created memories to response format
+    # results is now a list of memory dictionaries
+    memory_responses = [memory_dict_to_response(m) for m in results]
+    
+    # Always return array of memories
+    # Exclude None values to avoid returning null fields
+    message = "Memory created successfully" if len(memory_responses) == 1 else f"Created {len(memory_responses)} memories successfully"
+    
+    return APIResponse(
+        success=True,
+        data=[m.model_dump(mode='json', exclude_none=True) for m in memory_responses],
+        message=message,
+    )
+
+
+@router.post(
+    "/batch",
+    response_model=APIResponse,
+    summary="Create multiple memories",
+    description="Create multiple memories in a single request (batch operation)",
+)
+@limiter.limit(get_rate_limit_string())
+async def batch_create_memories(
+    request: Request,
+    body: MemoryBatchCreateRequest,
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Create multiple memories in batch"""
+    # Convert MemoryItem objects to dictionaries
+    memories_data = [
+        {
+            "content": item.content,
+            "metadata": item.metadata,
+            "filters": item.filters,
+            "scope": item.scope,
+            "memory_type": item.memory_type,
+        }
+        for item in body.memories
+    ]
+    
+    result = service.batch_create_memories(
+        memories=memories_data,
+        user_id=body.user_id,
+        agent_id=body.agent_id,
+        run_id=body.run_id,
+        infer=body.infer,
+    )
+    
+    # Convert created memories to response format
+    created_memories = []
+    for item in result["created"]:
+        try:
+            memory = service.get_memory(
+                memory_id=item["memory_id"],
+                user_id=body.user_id,
+                agent_id=body.agent_id,
+            )
+            created_memories.append(memory_dict_to_response(memory).model_dump(mode='json'))
+        except Exception as e:
+            logger.warning(f"Failed to retrieve created memory {item['memory_id']}: {e}")
+            # Include basic info even if full retrieval fails
+            created_memories.append({
+                "memory_id": item["memory_id"],
+                "content": item["content"],
+            })
+    
+    response_data = {
+        "memories": created_memories,
+        "total": result["total"],
+        "created_count": result["created_count"],
+        "failed_count": result["failed_count"],
+    }
+    
+    # Only include failed items if there are any
+    if result["failed_count"] > 0:
+        response_data["failed"] = result["failed"]
+    
+    return APIResponse(
+        success=True,
+        data=response_data,
+        message=f"Created {result['created_count']} out of {result['total']} memories",
+    )
+
+
+@router.get(
+    "",
+    response_model=APIResponse,
+    summary="List memories",
+    description="Get a list of memories with optional filtering and pagination",
+)
+@limiter.limit(get_rate_limit_string())
+async def list_memories(
+    request: Request,
+    user_id: Optional[str] = Query(None, description="Filter by user ID"),
+    agent_id: Optional[str] = Query(None, description="Filter by agent ID"),
+    limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
+    offset: int = Query(0, ge=0, description="Number of results to skip"),
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """List memories with pagination"""
+    memories = service.list_memories(
+        user_id=user_id,
+        agent_id=agent_id,
+        limit=limit,
+        offset=offset,
+    )
+    
+    memory_responses = [memory_dict_to_response(m) for m in memories]
+    
+    response_data = MemoryListResponse(
+        memories=memory_responses,
+        total=len(memory_responses),
+        limit=limit,
+        offset=offset,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="Memories retrieved successfully",
+    )
+
+
+@router.get(
+    "/{memory_id}",
+    response_model=APIResponse,
+    summary="Get a memory",
+    description="Get a specific memory by ID",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_memory(
+    request: Request,
+    memory_id: int,
+    user_id: Optional[str] = Query(None, description="User ID for access control"),
+    agent_id: Optional[str] = Query(None, description="Agent ID for access control"),
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Get a memory by ID"""
+    memory = service.get_memory(
+        memory_id=memory_id,
+        user_id=user_id,
+        agent_id=agent_id,
+    )
+    
+    memory_response = memory_dict_to_response(memory)
+    
+    return APIResponse(
+        success=True,
+        data=memory_response.model_dump(mode='json'),
+        message="Memory retrieved successfully",
+    )
+
+
+@router.put(
+    "/batch",
+    response_model=APIResponse,
+    summary="Batch update memories",
+    description="Update multiple memories in a single request (batch operation)",
+)
+@limiter.limit(get_rate_limit_string())
+async def batch_update_memories(
+    request: Request,
+    body: MemoryBatchUpdateRequest,
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Update multiple memories in batch"""
+    # Convert MemoryUpdateItem objects to dictionaries
+    updates_data = [
+        {
+            "memory_id": item.memory_id,
+            "content": item.content,
+            "metadata": item.metadata,
+        }
+        for item in body.updates
+    ]
+    
+    result = service.batch_update_memories(
+        updates=updates_data,
+        user_id=body.user_id,
+        agent_id=body.agent_id,
+    )
+    
+    # Convert updated memories to response format
+    updated_memories = []
+    for item in result["updated"]:
+        try:
+            memory = service.get_memory(
+                memory_id=item["memory_id"],
+                user_id=body.user_id,
+                agent_id=body.agent_id,
+            )
+            updated_memories.append(memory_dict_to_response(memory).model_dump(mode='json'))
+        except Exception as e:
+            logger.warning(f"Failed to retrieve updated memory {item['memory_id']}: {e}")
+            # Include basic info even if full retrieval fails
+            updated_memories.append({
+                "memory_id": item["memory_id"],
+            })
+    
+    response_data = {
+        "memories": updated_memories,
+        "total": result["total"],
+        "updated_count": result["updated_count"],
+        "failed_count": result["failed_count"],
+    }
+    
+    # Only include failed items if there are any
+    if result["failed_count"] > 0:
+        response_data["failed"] = result["failed"]
+    
+    return APIResponse(
+        success=True,
+        data=response_data,
+        message=f"Updated {result['updated_count']} out of {result['total']} memories",
+    )
+
+
+@router.put(
+    "/{memory_id}",
+    response_model=APIResponse,
+    summary="Update a memory",
+    description="Update an existing memory",
+)
+@limiter.limit(get_rate_limit_string())
+async def update_memory(
+    request: Request,
+    memory_id: int,
+    body: MemoryUpdateRequest,
+    user_id: Optional[str] = Query(None, description="User ID for access control"),
+    agent_id: Optional[str] = Query(None, description="Agent ID for access control"),
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Update a memory"""
+    # At least one of content or metadata must be provided
+    if body.content is None and body.metadata is None:
+        from ...models.errors import ErrorCode, APIError
+        raise APIError(
+            code=ErrorCode.INVALID_REQUEST,
+            message="At least one of content or metadata must be provided",
+            status_code=400,
+        )
+    
+    result = service.update_memory(
+        memory_id=memory_id,
+        content=body.content,
+        user_id=user_id,
+        agent_id=agent_id,
+        metadata=body.metadata,
+    )
+    
+    memory_response = memory_dict_to_response(result)
+    
+    return APIResponse(
+        success=True,
+        data=memory_response.model_dump(mode='json'),
+        message="Memory updated successfully",
+    )
+
+
+@router.delete(
+    "/batch",
+    response_model=APIResponse,
+    summary="Bulk delete memories",
+    description="Delete multiple memories at once",
+)
+@limiter.limit(get_rate_limit_string())
+async def bulk_delete_memories(
+    request: Request,
+    body: BulkDeleteRequest,
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Bulk delete memories"""
+    result = service.bulk_delete_memories(
+        memory_ids=body.memory_ids,
+        user_id=body.user_id,
+        agent_id=body.agent_id,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=result,
+        message=f"Deleted {result['deleted_count']} memories",
+    )
+
+
+@router.delete(
+    "/{memory_id}",
+    response_model=APIResponse,
+    summary="Delete a memory",
+    description="Delete a specific memory by ID",
+)
+@limiter.limit(get_rate_limit_string())
+async def delete_memory(
+    request: Request,
+    memory_id: int,
+    user_id: Optional[str] = Query(None, description="User ID for access control"),
+    agent_id: Optional[str] = Query(None, description="Agent ID for access control"),
+    api_key: str = Depends(verify_api_key),
+    service: MemoryService = Depends(get_memory_service),
+):
+    """Delete a memory"""
+    service.delete_memory(
+        memory_id=memory_id,
+        user_id=user_id,
+        agent_id=agent_id,
+    )
+    
+    return APIResponse(
+        success=True,
+        data={"memory_id": memory_id},
+        message="Memory deleted successfully",
+    )
diff --git a/src/server/api/v1/search.py b/src/server/api/v1/search.py
new file mode 100644
index 0000000..47cc38d
--- /dev/null
+++ b/src/server/api/v1/search.py
@@ -0,0 +1,106 @@
+"""
+Memory search API routes
+"""
+
+from typing import Optional
+from fastapi import APIRouter, Depends, Query, Request
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+from ...models.request import SearchRequest
+from ...models.response import APIResponse, SearchResponse, SearchResult
+from ...services.search_service import SearchService
+from ...middleware.auth import verify_api_key
+from ...middleware.rate_limit import limiter, get_rate_limit_string
+from ...utils.converters import search_result_to_response
+
+router = APIRouter(prefix="/memories", tags=["search"])
+
+
+def get_search_service() -> SearchService:
+    """Dependency to get search service"""
+    return SearchService()
+
+
+@router.post(
+    "/search",
+    response_model=APIResponse,
+    summary="Search memories",
+    description="Search memories using semantic search with optional filters",
+)
+@limiter.limit(get_rate_limit_string())
+async def search_memories_post(
+    request: Request,
+    body: SearchRequest,
+    api_key: str = Depends(verify_api_key),
+    service: SearchService = Depends(get_search_service),
+):
+    """Search memories (POST method)"""
+    results = service.search_memories(
+        query=body.query,
+        user_id=body.user_id,
+        agent_id=body.agent_id,
+        run_id=body.run_id,
+        filters=body.filters,
+        limit=body.limit,
+    )
+    
+    search_results = [
+        search_result_to_response(r) for r in results.get("results", [])
+    ]
+    
+    response_data = SearchResponse(
+        results=search_results,
+        total=len(search_results),
+        query=body.query,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="Search completed successfully",
+    )
+
+
+@router.get(
+    "/search",
+    response_model=APIResponse,
+    summary="Search memories (GET)",
+    description="Search memories using query parameters",
+)
+@limiter.limit(get_rate_limit_string())
+async def search_memories_get(
+    request: Request,
+    query: str = Query(..., description="Search query"),
+    user_id: Optional[str] = Query(None, description="Filter by user ID"),
+    agent_id: Optional[str] = Query(None, description="Filter by agent ID"),
+    run_id: Optional[str] = Query(None, description="Filter by run ID"),
+    limit: int = Query(30, ge=1, le=100, description="Maximum number of results"),
+    api_key: str = Depends(verify_api_key),
+    service: SearchService = Depends(get_search_service),
+):
+    """Search memories (GET method)"""
+    results = service.search_memories(
+        query=query,
+        user_id=user_id,
+        agent_id=agent_id,
+        run_id=run_id,
+        filters=None,  # GET method doesn't support complex filters
+        limit=limit,
+    )
+    
+    search_results = [
+        search_result_to_response(r) for r in results.get("results", [])
+    ]
+    
+    response_data = SearchResponse(
+        results=search_results,
+        total=len(search_results),
+        query=query,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="Search completed successfully",
+    )
diff --git a/src/server/api/v1/system.py b/src/server/api/v1/system.py
new file mode 100644
index 0000000..49570b7
--- /dev/null
+++ b/src/server/api/v1/system.py
@@ -0,0 +1,155 @@
+"""
+System management API routes
+"""
+
+from fastapi import APIRouter, Depends, Request, Response, Query
+from slowapi import Limiter
+from typing import Optional
+
+from ...models.response import APIResponse, HealthResponse, StatusResponse
+from ...middleware.auth import verify_api_key
+from ...middleware.rate_limit import limiter, get_rate_limit_string
+from ...config import config
+from ...utils.metrics import get_metrics_collector
+from powermem import auto_config
+
+router = APIRouter(prefix="/system", tags=["system"])
+
+
+@router.get(
+    "/health",
+    response_model=APIResponse,
+    summary="Health check",
+    description="Check if the API server is healthy (public endpoint, no authentication required)",
+)
+async def health_check():
+    """Health check endpoint"""
+    health = HealthResponse(status="healthy")
+    
+    return APIResponse(
+        success=True,
+        data=health.model_dump(mode='json'),
+        message="Service is healthy",
+    )
+
+
+@router.get(
+    "/status",
+    response_model=APIResponse,
+    summary="System status",
+    description="Get system status and configuration information",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_status(
+    request: Request,
+    api_key: str = Depends(verify_api_key),
+):
+    """Get system status"""
+    # Get PowerMem config
+    powermem_config = auto_config()
+    
+    storage_type = None
+    llm_provider = None
+    
+    if isinstance(powermem_config, dict):
+        # Extract from dict config
+        vector_store = powermem_config.get("vector_store") or powermem_config.get("database", {})
+        storage_type = vector_store.get("provider") if isinstance(vector_store, dict) else None
+        
+        llm = powermem_config.get("llm", {})
+        llm_provider = llm.get("provider") if isinstance(llm, dict) else None
+    else:
+        # Extract from config object
+        if hasattr(powermem_config, "vector_store") and powermem_config.vector_store:
+            storage_type = powermem_config.vector_store.provider
+        if hasattr(powermem_config, "llm") and powermem_config.llm:
+            llm_provider = powermem_config.llm.provider
+    
+    status_data = StatusResponse(
+        status="operational",
+        version=config.api_version,
+        storage_type=storage_type,
+        llm_provider=llm_provider,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=status_data.model_dump(mode='json'),
+        message="System status retrieved successfully",
+    )
+
+
+@router.get(
+    "/metrics",
+    summary="Prometheus metrics",
+    description="Get Prometheus format metrics",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_metrics(
+    request: Request,
+    api_key: str = Depends(verify_api_key),
+):
+    """Get Prometheus format metrics"""
+    metrics_collector = get_metrics_collector()
+    metrics_text = metrics_collector.get_metrics()
+    
+    return Response(
+        content=metrics_text,
+        media_type="text/plain; version=0.0.4; charset=utf-8"
+    )
+
+
+@router.delete(
+    "/delete-all-memories",
+    response_model=APIResponse,
+    summary="Delete all memories",
+    description="Delete all memories matching the provided filters (requires admin permissions). "
+                "This endpoint uses Memory.delete_all() to match the powermem SDK API. "
+                "If no filters provided, deletes all memories.",
+)
+@limiter.limit(get_rate_limit_string())
+async def delete_all_memories(
+    request: Request,
+    user_id: Optional[str] = Query(None, description="Filter by user ID"),
+    agent_id: Optional[str] = Query(None, description="Filter by agent ID"),
+    run_id: Optional[str] = Query(None, description="Filter by run ID"),
+    api_key: str = Depends(verify_api_key),
+):
+    """
+    Delete all memories matching the provided filters.
+    
+    This endpoint uses Memory.delete_all() to match the powermem SDK API.
+    If no filters are provided, all memories will be deleted.
+    """
+    from powermem import Memory
+    from ...models.errors import ErrorCode, APIError
+    
+    try:
+        memory = Memory(config=auto_config())
+        result = memory.delete_all(
+            user_id=user_id,
+            agent_id=agent_id,
+            run_id=run_id,
+        )
+        
+        filters = {}
+        if user_id:
+            filters["user_id"] = user_id
+        if agent_id:
+            filters["agent_id"] = agent_id
+        if run_id:
+            filters["run_id"] = run_id
+        
+        filter_desc = f" with filters: {filters}" if filters else ""
+        
+        return APIResponse(
+            success=True,
+            data={"deleted": result, "filters": filters},
+            message=f"All memories{filter_desc} deleted successfully",
+        )
+    except Exception as e:
+        raise APIError(
+            code=ErrorCode.INTERNAL_ERROR,
+            message=f"Failed to delete all memories: {str(e)}",
+            status_code=500,
+        )
diff --git a/src/server/api/v1/users.py b/src/server/api/v1/users.py
new file mode 100644
index 0000000..e0d3dd6
--- /dev/null
+++ b/src/server/api/v1/users.py
@@ -0,0 +1,160 @@
+"""
+User profile API routes
+"""
+
+from typing import Optional
+from fastapi import APIRouter, Depends, Query, Request
+from slowapi import Limiter
+
+from ...models.request import UserProfileUpdateRequest
+from ...models.response import APIResponse, UserProfileResponse, MemoryListResponse
+from ...services.user_service import UserService
+from ...middleware.auth import verify_api_key
+from ...middleware.rate_limit import limiter, get_rate_limit_string
+from ...utils.converters import user_profile_to_response, memory_dict_to_response
+
+router = APIRouter(prefix="/users", tags=["users"])
+
+
+def get_user_service() -> UserService:
+    """Dependency to get user service"""
+    return UserService()
+
+
+@router.get(
+    "/{user_id}/profile",
+    response_model=APIResponse,
+    summary="Get user profile",
+    description="Get the user profile for a specific user",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_user_profile(
+    request: Request,
+    user_id: str,
+    api_key: str = Depends(verify_api_key),
+    service: UserService = Depends(get_user_service),
+):
+    """Get user profile"""
+    profile = service.get_user_profile(user_id)
+    
+    profile_response = user_profile_to_response(user_id, profile)
+    
+    return APIResponse(
+        success=True,
+        data=profile_response.model_dump(mode='json'),
+        message="User profile retrieved successfully",
+    )
+
+
+@router.post(
+    "/{user_id}/profile",
+    response_model=APIResponse,
+    summary="Update user profile",
+    description="Update the user profile for a specific user",
+)
+@limiter.limit(get_rate_limit_string())
+async def update_user_profile(
+    request: Request,
+    user_id: str,
+    body: UserProfileUpdateRequest,
+    api_key: str = Depends(verify_api_key),
+    service: UserService = Depends(get_user_service),
+):
+    """Update user profile"""
+    profile = service.update_user_profile(
+        user_id=user_id,
+        profile_content=body.profile_content,
+        topics=body.topics,
+    )
+    
+    profile_response = user_profile_to_response(user_id, profile)
+    
+    return APIResponse(
+        success=True,
+        data=profile_response.model_dump(mode='json'),
+        message="User profile updated successfully",
+    )
+
+
+@router.get(
+    "/{user_id}/memories",
+    response_model=APIResponse,
+    summary="Get user memories",
+    description="Get all memories for a specific user",
+)
+@limiter.limit(get_rate_limit_string())
+async def get_user_memories(
+    request: Request,
+    user_id: str,
+    limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
+    offset: int = Query(0, ge=0, description="Number of results to skip"),
+    api_key: str = Depends(verify_api_key),
+    service: UserService = Depends(get_user_service),
+):
+    """Get all memories for a user"""
+    memories = service.get_user_memories(
+        user_id=user_id,
+        limit=limit,
+        offset=offset,
+    )
+    
+    memory_responses = [memory_dict_to_response(m) for m in memories]
+    
+    response_data = MemoryListResponse(
+        memories=memory_responses,
+        total=len(memory_responses),
+        limit=limit,
+        offset=offset,
+    )
+    
+    return APIResponse(
+        success=True,
+        data=response_data.model_dump(mode='json'),
+        message="User memories retrieved successfully",
+    )
+
+
+@router.delete(
+    "/{user_id}/profile",
+    response_model=APIResponse,
+    summary="Delete user profile",
+    description="Delete the user profile for a specific user",
+)
+@limiter.limit(get_rate_limit_string())
+async def delete_user_profile(
+    request: Request,
+    user_id: str,
+    api_key: str = Depends(verify_api_key),
+    service: UserService = Depends(get_user_service),
+):
+    """Delete user profile"""
+    result = service.delete_user_profile(user_id=user_id)
+    
+    return APIResponse(
+        success=True,
+        data=result,
+        message=f"User profile for {user_id} deleted successfully",
+    )
+
+
+@router.delete(
+    "/{user_id}/memories",
+    response_model=APIResponse,
+    summary="Delete user memories",
+    description="Delete all memories for a specific user (user profile deletion)",
+)
+@limiter.limit(get_rate_limit_string())
+async def delete_user_memories(
+    request: Request,
+    user_id: str,
+    api_key: str = Depends(verify_api_key),
+    service: UserService = Depends(get_user_service),
+):
+    """Delete all memories for a user"""
+    result = service.delete_user_memories(user_id=user_id)
+    
+    return APIResponse(
+        success=True,
+        data=result,
+        message=f"Deleted {result['deleted_count']} memories for user {user_id}",
+    )
diff --git a/src/server/cli/__init__.py b/src/server/cli/__init__.py
new file mode 100644
index 0000000..efba03e
--- /dev/null
+++ b/src/server/cli/__init__.py
@@ -0,0 +1,3 @@
+"""
+CLI tools for PowerMem API Server
+"""
diff --git a/src/server/cli/server.py b/src/server/cli/server.py
new file mode 100644
index 0000000..7ad2481
--- /dev/null
+++ b/src/server/cli/server.py
@@ -0,0 +1,55 @@
+"""
+CLI command for starting the PowerMem API server
+"""
+
+import click
+import uvicorn
+from ..config import config
+from ..middleware.logging import setup_logging
+
+
+@click.command()
+@click.option("--host", default=None, help="Host to bind to")
+@click.option("--port", default=None, type=int, help="Port to bind to")
+@click.option("--workers", default=None, type=int, help="Number of worker processes")
+@click.option("--reload", is_flag=True, help="Enable auto-reload")
+@click.option("--log-level", default=None, help="Log level")
+def server(host, port, workers, reload, log_level):
+    """
+    Start the PowerMem API server.
+    
+    Example:
+        powermem-server --host 0.0.0.0 --port 8000 --reload
+    """
+    # Override config with CLI options
+    if host:
+        config.host = host
+    if port:
+        config.port = port
+    if workers:
+        config.workers = workers
+    if reload:
+        config.reload = True
+    if log_level:
+        config.log_level = log_level
+    
+    # Debug: Print current log format (can be removed later)
+    import sys
+    print(f"[DEBUG] Current log_format: {config.log_format}", file=sys.stderr)
+    
+    # Setup logging BEFORE starting uvicorn to ensure all logs have timestamps
+    setup_logging()
+    
+    # Start server
+    uvicorn.run(
+        "server.main:app",
+        host=config.host,
+        port=config.port,
+        reload=config.reload,
+        workers=config.workers if not config.reload else 1,
+        log_level=config.log_level.lower(),
+    )
+
+
+if __name__ == "__main__":
+    server()
diff --git a/src/server/config.py b/src/server/config.py
new file mode 100644
index 0000000..c6b60c9
--- /dev/null
+++ b/src/server/config.py
@@ -0,0 +1,221 @@
+"""
+Configuration management for PowerMem API Server
+"""
+
+import os
+from typing import List, Optional
+from pydantic_settings import BaseSettings
+from pydantic import Field, ConfigDict, field_validator, model_validator
+
+
+class ServerConfig(BaseSettings):
+    """Server configuration settings"""
+    
+    model_config = ConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",  # Ignore extra fields from .env that are not in this model
+    )
+    
+    # Server settings
+    host: str = Field(default="0.0.0.0", env="POWERMEM_SERVER_HOST")
+    port: int = Field(default=8000, env="POWERMEM_SERVER_PORT")
+    workers: int = Field(default=4, env="POWERMEM_SERVER_WORKERS")
+    reload: bool = Field(default=False, env="POWERMEM_SERVER_RELOAD")
+    
+    # Authentication settings
+    auth_enabled: bool = Field(default=True, env="POWERMEM_SERVER_AUTH_ENABLED")
+    api_keys: str = Field(default="", env="POWERMEM_SERVER_API_KEYS")
+    
+    @model_validator(mode='after')
+    def parse_auth_enabled_from_env(self):
+        """Parse auth_enabled from environment variable, handling string 'false'"""
+        # Read directly from environment to bypass Pydantic's bool parsing
+        env_value = os.getenv('POWERMEM_SERVER_AUTH_ENABLED', '').strip().lower()
+        if env_value:
+            # Only update if explicitly set in environment
+            self.auth_enabled = env_value in ('true', '1', 'yes', 'on', 'enabled')
+        return self
+    
+    # Rate limiting settings
+    rate_limit_enabled: bool = Field(default=True, env="POWERMEM_SERVER_RATE_LIMIT_ENABLED")
+    rate_limit_per_minute: int = Field(default=100, env="POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE")
+    
+    # Logging settings
+    log_level: str = Field(default="INFO", env="POWERMEM_SERVER_LOG_LEVEL")
+    log_format: str = Field(default="json", env="POWERMEM_SERVER_LOG_FORMAT")  # json or text
+    log_file: Optional[str] = Field(default="server.log", env="POWERMEM_SERVER_LOG_FILE")  # Log file path, None to disable file logging
+    
+    # API settings
+    api_title: str = Field(default="PowerMem API", env="POWERMEM_SERVER_API_TITLE")
+    api_version: str = Field(default="v1", env="POWERMEM_SERVER_API_VERSION")
+    api_description: str = Field(
+        default="PowerMem HTTP API Server - Intelligent Memory System",
+        env="POWERMEM_SERVER_API_DESCRIPTION"
+    )
+    
+    # CORS settings
+    cors_enabled: bool = Field(default=True, env="POWERMEM_SERVER_CORS_ENABLED")
+    cors_origins: str = Field(default="*", env="POWERMEM_SERVER_CORS_ORIGINS")
+    
+    def get_api_keys_list(self) -> List[str]:
+        """Get list of valid API keys"""
+        if not self.api_keys:
+            return []
+        return [key.strip() for key in self.api_keys.split(",") if key.strip()]
+    
+    def get_cors_origins_list(self) -> List[str]:
+        """Get list of CORS origins"""
+        if self.cors_origins == "*":
+            return ["*"]
+        return [origin.strip() for origin in self.cors_origins.split(",") if origin.strip()]
+
+
+# Global config instance
+config = ServerConfig()
+
+# Manually override config values from .env file if set
+# This handles the case where Pydantic doesn't properly parse certain values
+try:
+    env_file_path = os.path.join(os.getcwd(), '.env')
+    if os.path.exists(env_file_path):
+        with open(env_file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#') and '=' in line:
+                    key, value = line.split('=', 1)
+                    key = key.strip()
+                    value = value.strip().strip('"').strip("'")
+                    key_upper = key.upper()
+                    
+                    # Handle server settings
+                    if key_upper == 'POWERMEM_SERVER_HOST':
+                        config.host = value
+                    elif key_upper == 'POWERMEM_SERVER_PORT':
+                        try:
+                            config.port = int(value)
+                        except ValueError:
+                            pass
+                    elif key_upper == 'POWERMEM_SERVER_WORKERS':
+                        try:
+                            config.workers = int(value)
+                        except ValueError:
+                            pass
+                    elif key_upper == 'POWERMEM_SERVER_RELOAD':
+                        config.reload = value.lower() in ('true', '1', 'yes', 'on', 'enabled')
+                    # Handle auth_enabled
+                    elif key_upper == 'POWERMEM_SERVER_AUTH_ENABLED':
+                        config.auth_enabled = value.lower() in ('true', '1', 'yes', 'on', 'enabled')
+                    # Handle api_keys
+                    elif key_upper == 'POWERMEM_SERVER_API_KEYS':
+                        config.api_keys = value
+                    # Handle rate limiting settings
+                    elif key_upper == 'POWERMEM_SERVER_RATE_LIMIT_ENABLED':
+                        config.rate_limit_enabled = value.lower() in ('true', '1', 'yes', 'on', 'enabled')
+                    elif key_upper == 'POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE':
+                        try:
+                            config.rate_limit_per_minute = int(value)
+                        except ValueError:
+                            pass
+                    # Handle log_format
+                    elif key_upper == 'POWERMEM_SERVER_LOG_FORMAT':
+                        config.log_format = value.lower()
+                    # Handle log_level
+                    elif key_upper == 'POWERMEM_SERVER_LOG_LEVEL':
+                        config.log_level = value.upper()
+                    # Handle log_file
+                    elif key_upper == 'POWERMEM_SERVER_LOG_FILE':
+                        config.log_file = value if value else None
+                    # Handle API settings
+                    elif key_upper == 'POWERMEM_SERVER_API_TITLE':
+                        config.api_title = value
+                    elif key_upper == 'POWERMEM_SERVER_API_VERSION':
+                        config.api_version = value
+                    elif key_upper == 'POWERMEM_SERVER_API_DESCRIPTION':
+                        config.api_description = value
+                    # Handle CORS settings
+                    elif key_upper == 'POWERMEM_SERVER_CORS_ENABLED':
+                        config.cors_enabled = value.lower() in ('true', '1', 'yes', 'on', 'enabled')
+                    elif key_upper == 'POWERMEM_SERVER_CORS_ORIGINS':
+                        config.cors_origins = value
+except Exception:
+    # Fallback to environment variables
+    # Server settings
+    _host_env = os.getenv('POWERMEM_SERVER_HOST', '').strip()
+    if _host_env:
+        config.host = _host_env
+    
+    _port_env = os.getenv('POWERMEM_SERVER_PORT', '').strip()
+    if _port_env:
+        try:
+            config.port = int(_port_env)
+        except ValueError:
+            pass
+    
+    _workers_env = os.getenv('POWERMEM_SERVER_WORKERS', '').strip()
+    if _workers_env:
+        try:
+            config.workers = int(_workers_env)
+        except ValueError:
+            pass
+    
+    _reload_env = os.getenv('POWERMEM_SERVER_RELOAD', '').strip().lower()
+    if _reload_env:
+        config.reload = _reload_env in ('true', '1', 'yes', 'on', 'enabled')
+    
+    # Authentication settings
+    _auth_env = os.getenv('POWERMEM_SERVER_AUTH_ENABLED', '').strip().lower()
+    if _auth_env:
+        config.auth_enabled = _auth_env in ('true', '1', 'yes', 'on', 'enabled')
+    
+    _api_keys_env = os.getenv('POWERMEM_SERVER_API_KEYS', '').strip()
+    if _api_keys_env:
+        config.api_keys = _api_keys_env
+    
+    # Rate limiting settings
+    _rate_limit_enabled_env = os.getenv('POWERMEM_SERVER_RATE_LIMIT_ENABLED', '').strip().lower()
+    if _rate_limit_enabled_env:
+        config.rate_limit_enabled = _rate_limit_enabled_env in ('true', '1', 'yes', 'on', 'enabled')
+    
+    _rate_limit_per_minute_env = os.getenv('POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE', '').strip()
+    if _rate_limit_per_minute_env:
+        try:
+            config.rate_limit_per_minute = int(_rate_limit_per_minute_env)
+        except ValueError:
+            pass
+    
+    # Logging settings
+    _log_format_env = os.getenv('POWERMEM_SERVER_LOG_FORMAT', '').strip().lower()
+    if _log_format_env:
+        config.log_format = _log_format_env
+    
+    _log_level_env = os.getenv('POWERMEM_SERVER_LOG_LEVEL', '').strip().upper()
+    if _log_level_env:
+        config.log_level = _log_level_env
+    
+    _log_file_env = os.getenv('POWERMEM_SERVER_LOG_FILE', '').strip()
+    if _log_file_env:
+        config.log_file = _log_file_env if _log_file_env else None
+    
+    # API settings
+    _api_title_env = os.getenv('POWERMEM_SERVER_API_TITLE', '').strip()
+    if _api_title_env:
+        config.api_title = _api_title_env
+    
+    _api_version_env = os.getenv('POWERMEM_SERVER_API_VERSION', '').strip()
+    if _api_version_env:
+        config.api_version = _api_version_env
+    
+    _api_description_env = os.getenv('POWERMEM_SERVER_API_DESCRIPTION', '').strip()
+    if _api_description_env:
+        config.api_description = _api_description_env
+    
+    # CORS settings
+    _cors_enabled_env = os.getenv('POWERMEM_SERVER_CORS_ENABLED', '').strip().lower()
+    if _cors_enabled_env:
+        config.cors_enabled = _cors_enabled_env in ('true', '1', 'yes', 'on', 'enabled')
+    
+    _cors_origins_env = os.getenv('POWERMEM_SERVER_CORS_ORIGINS', '').strip()
+    if _cors_origins_env:
+        config.cors_origins = _cors_origins_env
diff --git a/src/server/main.py b/src/server/main.py
new file mode 100644
index 0000000..b94ef45
--- /dev/null
+++ b/src/server/main.py
@@ -0,0 +1,91 @@
+"""
+Main FastAPI application for PowerMem API Server
+"""
+
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from slowapi.errors import RateLimitExceeded
+from slowapi import _rate_limit_exceeded_handler
+
+from .config import config
+from .api.v1 import router as v1_router
+from .middleware.logging import setup_logging, LoggingMiddleware
+from .middleware.rate_limit import rate_limit_middleware
+from .middleware.error_handler import error_handler
+from .middleware.auth import verify_api_key
+
+# Setup logging
+setup_logging()
+
+# Create FastAPI app
+app = FastAPI(
+    title=config.api_title,
+    version=config.api_version,
+    description=config.api_description,
+    docs_url="/docs",
+    redoc_url="/redoc",
+    openapi_url="/openapi.json",
+)
+
+# Setup CORS
+if config.cors_enabled:
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=config.get_cors_origins_list(),
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+# Setup logging middleware
+app.add_middleware(LoggingMiddleware)
+
+# Setup rate limiting
+rate_limit_middleware(app)
+
+# Include API routers
+app.include_router(v1_router)
+
+# Add exception handlers
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+app.add_exception_handler(StarletteHTTPException, error_handler)
+app.add_exception_handler(Exception, error_handler)
+
+
+@app.get("/", tags=["root"])
+async def root():
+    """Root endpoint"""
+    return {
+        "name": "PowerMem API Server",
+        "version": config.api_version,
+        "docs": "/docs",
+        "health": "/api/v1/system/health",
+    }
+
+
+@app.get("/api", tags=["root"])
+async def api_root():
+    """API root endpoint"""
+    return {
+        "version": config.api_version,
+        "endpoints": {
+            "v1": "/api/v1",
+            "docs": "/docs",
+            "health": "/api/v1/health",
+            "status": "/api/v1/status",
+        }
+    }
+
+
+if __name__ == "__main__":
+    import uvicorn
+    
+    uvicorn.run(
+        "server.main:app",
+        host=config.host,
+        port=config.port,
+        reload=config.reload,
+        workers=config.workers if not config.reload else 1,
+    )
diff --git a/src/server/middleware/__init__.py b/src/server/middleware/__init__.py
new file mode 100644
index 0000000..36b02fe
--- /dev/null
+++ b/src/server/middleware/__init__.py
@@ -0,0 +1,17 @@
+"""
+Middleware for PowerMem API Server
+"""
+
+from .auth import get_api_key, verify_api_key
+from .rate_limit import rate_limit_middleware
+from .logging import setup_logging, log_request
+from .error_handler import error_handler
+
+__all__ = [
+    "get_api_key",
+    "verify_api_key",
+    "rate_limit_middleware",
+    "setup_logging",
+    "log_request",
+    "error_handler",
+]
diff --git a/src/server/middleware/auth.py b/src/server/middleware/auth.py
new file mode 100644
index 0000000..060d1f6
--- /dev/null
+++ b/src/server/middleware/auth.py
@@ -0,0 +1,70 @@
+"""
+Authentication middleware for PowerMem API
+"""
+
+from typing import Optional
+from fastapi import Header, HTTPException, Query, Security
+from fastapi.security import APIKeyHeader, APIKeyQuery
+from ..config import config
+from ..models.errors import ErrorCode, APIError
+
+# API Key security schemes
+api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
+api_key_query = APIKeyQuery(name="api_key", auto_error=False)
+
+
+def get_api_key(
+    x_api_key: Optional[str] = Security(api_key_header),
+    api_key: Optional[str] = Security(api_key_query),
+) -> Optional[str]:
+    """
+    Extract API key from header or query parameter.
+    
+    Args:
+        x_api_key: API key from X-API-Key header
+        api_key: API key from query parameter
+        
+    Returns:
+        API key string or None
+    """
+    return x_api_key or api_key
+
+
+def verify_api_key(api_key: Optional[str] = Security(get_api_key)) -> str:
+    """
+    Verify API key and return it if valid.
+    
+    Args:
+        api_key: API key to verify
+        
+    Returns:
+        Verified API key
+        
+    Raises:
+        HTTPException: If authentication is required but key is invalid
+    """
+    if not config.auth_enabled:
+        return "anonymous"
+    
+    if not api_key:
+        raise HTTPException(
+            status_code=401,
+            detail={
+                "code": ErrorCode.UNAUTHORIZED.value,
+                "message": "API key required",
+                "details": {}
+            }
+        )
+    
+    valid_keys = config.get_api_keys_list()
+    if api_key not in valid_keys:
+        raise HTTPException(
+            status_code=401,
+            detail={
+                "code": ErrorCode.UNAUTHORIZED.value,
+                "message": "Invalid API key",
+                "details": {}
+            }
+        )
+    
+    return api_key
diff --git a/src/server/middleware/error_handler.py b/src/server/middleware/error_handler.py
new file mode 100644
index 0000000..0c1dcc7
--- /dev/null
+++ b/src/server/middleware/error_handler.py
@@ -0,0 +1,114 @@
+"""
+Error handling middleware for PowerMem API
+"""
+
+import logging
+from typing import Union
+from fastapi import Request, status
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from ..models.errors import ErrorCode, APIError
+from ..models.response import ErrorResponse
+from ..utils.metrics import get_metrics_collector
+from datetime import datetime, timezone
+
+try:
+    from powermem.utils.utils import get_current_datetime
+except ImportError:
+    # Fallback if powermem utils not available
+    def get_current_datetime():
+        return datetime.now(timezone.utc)
+
+logger = logging.getLogger("server")
+
+
+async def error_handler(request: Request, exc: Exception) -> JSONResponse:
+    """
+    Global error handler for FastAPI application.
+    
+    Args:
+        request: FastAPI request object
+        exc: Exception that was raised
+        
+    Returns:
+        JSONResponse with error details
+    """
+    # Record error metrics
+    metrics_collector = get_metrics_collector()
+    endpoint = metrics_collector.normalize_endpoint(request.url.path)
+    
+    # Handle APIError
+    if isinstance(exc, APIError):
+        error_type = exc.code.value
+        metrics_collector.record_error(error_type, endpoint)
+        
+        error_response = ErrorResponse(
+            error=exc.to_dict(),
+            timestamp=get_current_datetime(),
+        )
+        return JSONResponse(
+            status_code=exc.status_code,
+            content=error_response.model_dump(mode='json'),
+        )
+    
+    # Handle HTTPException
+    if isinstance(exc, StarletteHTTPException):
+        error_detail = exc.detail
+        if isinstance(error_detail, dict):
+            error_code = error_detail.get("code", ErrorCode.INTERNAL_ERROR.value)
+            error_message = error_detail.get("message", str(exc))
+        else:
+            error_code = ErrorCode.INTERNAL_ERROR.value
+            error_message = str(error_detail)
+        
+        metrics_collector.record_error(error_code, endpoint)
+        
+        error_response = ErrorResponse(
+            error={
+                "code": error_code,
+                "message": error_message,
+                "details": {},
+            },
+            timestamp=get_current_datetime(),
+        )
+        return JSONResponse(
+            status_code=exc.status_code,
+            content=error_response.model_dump(mode='json'),
+        )
+    
+    # Handle validation errors
+    if isinstance(exc, RequestValidationError):
+        metrics_collector.record_error("VALIDATION_ERROR", endpoint)
+        
+        error_response = ErrorResponse(
+            error={
+                "code": ErrorCode.INVALID_REQUEST.value,
+                "message": "Request validation failed",
+                "details": {
+                    "errors": exc.errors(),
+                },
+            },
+            timestamp=get_current_datetime(),
+        )
+        return JSONResponse(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            content=error_response.model_dump(mode='json'),
+        )
+    
+    # Handle unexpected errors
+    logger.exception(f"Unhandled error: {exc}")
+    metrics_collector.record_error(ErrorCode.INTERNAL_ERROR.value, endpoint)
+    
+    error_response = ErrorResponse(
+        error={
+            "code": ErrorCode.INTERNAL_ERROR.value,
+            "message": "Internal server error",
+            "details": {},
+        },
+        timestamp=datetime.utcnow(),
+    )
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content=error_response.model_dump(mode='json'),
+    )
diff --git a/src/server/middleware/logging.py b/src/server/middleware/logging.py
new file mode 100644
index 0000000..1498bc6
--- /dev/null
+++ b/src/server/middleware/logging.py
@@ -0,0 +1,288 @@
+"""
+Logging middleware for PowerMem API
+"""
+
+import logging
+import os
+import sys
+import json
+import time
+import uuid
+from logging.handlers import RotatingFileHandler
+from typing import Callable
+from fastapi import Request, Response
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.types import ASGIApp
+from ..config import config
+from ..utils.metrics import get_metrics_collector
+from ..models.errors import APIError
+
+# Setup logger
+logger = logging.getLogger("server")
+
+
+def setup_logging():
+    """Setup logging configuration
+    
+    This function can be safely called multiple times.
+    It will reconfigure loggers if called again.
+    """
+    log_level = getattr(logging, config.log_level.upper(), logging.INFO)
+    
+    # Create formatter
+    if config.log_format == "json":
+        formatter = JsonFormatter()
+        text_formatter = None  # JSON format doesn't need text formatter
+    else:
+        # Improved text format with timestamp
+        # Use right-aligned 7-character width for level name to accommodate WARNING/CRITICAL
+        formatter = logging.Formatter(
+            "%(asctime)s %(levelname)7s %(name)s: %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        text_formatter = formatter
+    
+    # Setup file handler if log_file is configured
+    file_handler = None
+    if config.log_file:
+        try:
+            # Create log file directory if it doesn't exist
+            log_file_path = os.path.abspath(config.log_file)
+            log_dir = os.path.dirname(log_file_path)
+            if log_dir and not os.path.exists(log_dir):
+                os.makedirs(log_dir, exist_ok=True)
+            
+            # Use RotatingFileHandler with append mode to preserve history
+            # Max file size: 10MB, keep 5 backup files
+            file_handler = RotatingFileHandler(
+                log_file_path,
+                mode='a',  # Append mode to preserve history
+                maxBytes=10 * 1024 * 1024,  # 10MB
+                backupCount=5,
+                encoding='utf-8'
+            )
+            file_handler.setLevel(log_level)
+            if config.log_format == "json":
+                file_handler.setFormatter(JsonFormatter())
+            else:
+                file_handler.setFormatter(text_formatter)
+        except Exception as e:
+            # If file logging fails, log to stderr and continue with console logging only
+            print(f"Warning: Failed to setup file logging: {e}", file=sys.stderr)
+            file_handler = None
+    
+    # Configure Uvicorn loggers FIRST (before they start logging)
+    # This ensures the initial startup messages have timestamps
+    uvicorn_loggers = [
+        logging.getLogger("uvicorn"),
+        logging.getLogger("uvicorn.error"),
+        logging.getLogger("uvicorn.access"),
+    ]
+    
+    for uvicorn_logger in uvicorn_loggers:
+        uvicorn_logger.setLevel(log_level)
+        # Remove existing handlers to avoid duplicates
+        uvicorn_logger.handlers.clear()
+        
+        # Create console handler for uvicorn
+        uvicorn_console_handler = logging.StreamHandler(sys.stdout)
+        if config.log_format == "json":
+            uvicorn_console_handler.setFormatter(JsonFormatter())
+        else:
+            # Use the same text formatter for consistency
+            uvicorn_console_handler.setFormatter(text_formatter)
+        uvicorn_logger.addHandler(uvicorn_console_handler)
+        
+        # Add file handler if configured
+        if file_handler:
+            # Create a new file handler for each logger (they share the same file)
+            uvicorn_file_handler = RotatingFileHandler(
+                os.path.abspath(config.log_file),
+                mode='a',
+                maxBytes=10 * 1024 * 1024,
+                backupCount=5,
+                encoding='utf-8'
+            )
+            uvicorn_file_handler.setLevel(log_level)
+            if config.log_format == "json":
+                uvicorn_file_handler.setFormatter(JsonFormatter())
+            else:
+                uvicorn_file_handler.setFormatter(text_formatter)
+            uvicorn_logger.addHandler(uvicorn_file_handler)
+        
+        uvicorn_logger.propagate = False
+    
+    # Setup handler for application logger
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(formatter)
+    
+    # Configure application logger
+    logger.setLevel(log_level)
+    # Remove existing handlers to avoid duplicates
+    logger.handlers.clear()
+    logger.addHandler(console_handler)
+    
+    # Add file handler if configured
+    if file_handler:
+        logger.addHandler(file_handler)
+    
+    # Prevent duplicate logs
+    logger.propagate = False
+
+
+class JsonFormatter(logging.Formatter):
+    """JSON formatter for structured logging"""
+    
+    def __init__(self, datefmt=None):
+        super().__init__(datefmt=datefmt or "%Y-%m-%d %H:%M:%S")
+    
+    def format(self, record):
+        log_data = {
+            "timestamp": self.formatTime(record, self.datefmt),
+            "level": record.levelname,
+            "logger": record.name,
+            "message": record.getMessage(),
+        }
+        
+        # Add extra fields
+        if hasattr(record, "request_id"):
+            log_data["request_id"] = record.request_id
+        if hasattr(record, "user_id"):
+            log_data["user_id"] = record.user_id
+        if hasattr(record, "agent_id"):
+            log_data["agent_id"] = record.agent_id
+        if hasattr(record, "method"):
+            log_data["method"] = record.method
+        if hasattr(record, "path"):
+            log_data["path"] = record.path
+        if hasattr(record, "status_code"):
+            log_data["status_code"] = record.status_code
+        if hasattr(record, "duration_ms"):
+            log_data["duration_ms"] = round(record.duration_ms, 2)
+        if hasattr(record, "client"):
+            log_data["client"] = record.client
+        
+        # Add exception info if present
+        if record.exc_info:
+            log_data["exception"] = self.formatException(record.exc_info)
+        
+        return json.dumps(log_data, ensure_ascii=False)
+
+
+class LoggingMiddleware(BaseHTTPMiddleware):
+    """Middleware for request/response logging"""
+    
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        # Generate request ID
+        request_id = str(uuid.uuid4())
+        request.state.request_id = request_id
+        
+        # Start time
+        start_time = time.time()
+        
+        # Log request
+        logger.info(
+            f"{request.method} {request.url.path}",
+            extra={
+                "request_id": request_id,
+                "method": request.method,
+                "path": request.url.path,
+                "client": request.client.host if request.client else None,
+            }
+        )
+        
+        try:
+            # Process request
+            response = await call_next(request)
+            
+            # Calculate duration
+            duration = time.time() - start_time
+            
+            # Record metrics
+            metrics_collector = get_metrics_collector()
+            # Normalize path to endpoint
+            endpoint = metrics_collector.normalize_endpoint(request.url.path)
+            metrics_collector.record_api_request(
+                method=request.method,
+                endpoint=endpoint,
+                status_code=response.status_code,
+                duration=duration
+            )
+            
+            # Log response
+            logger.info(
+                f"{request.method} {request.url.path} - {response.status_code}",
+                extra={
+                    "request_id": request_id,
+                    "status_code": response.status_code,
+                    "duration_ms": duration * 1000,
+                }
+            )
+            
+            # Add request ID to response header
+            response.headers["X-Request-ID"] = request_id
+            
+            return response
+            
+        except Exception as e:
+            duration = time.time() - start_time
+            
+            # Determine status code and whether this is an expected error
+            status_code = 500
+            is_expected_error = False
+            
+            if isinstance(e, APIError):
+                status_code = e.status_code
+                # Client errors (4xx) are expected, server errors (5xx) are unexpected
+                is_expected_error = status_code < 500
+            
+            # Record metrics for error
+            metrics_collector = get_metrics_collector()
+            endpoint = metrics_collector.normalize_endpoint(request.url.path)
+            metrics_collector.record_api_request(
+                method=request.method,
+                endpoint=endpoint,
+                status_code=status_code,
+                duration=duration
+            )
+            
+            # For expected errors (4xx), log without stack trace
+            # For unexpected errors (5xx), log with full stack trace
+            if is_expected_error:
+                logger.warning(
+                    f"{request.method} {request.url.path} - {status_code}: {str(e)}",
+                    extra={
+                        "request_id": request_id,
+                        "status_code": status_code,
+                        "error": str(e),
+                        "duration_ms": duration * 1000,
+                    },
+                )
+            else:
+                logger.error(
+                    f"Error processing {request.method} {request.url.path}",
+                    extra={
+                        "request_id": request_id,
+                        "status_code": status_code,
+                        "error": str(e),
+                        "duration_ms": duration * 1000,
+                    },
+                    exc_info=True,
+                )
+            raise
+
+
+def log_request(request: Request, message: str, **kwargs):
+    """
+    Log a request with additional context.
+    
+    Args:
+        request: FastAPI request object
+        message: Log message
+        **kwargs: Additional context
+    """
+    extra = {
+        "request_id": getattr(request.state, "request_id", None),
+        **kwargs
+    }
+    logger.info(message, extra=extra)
diff --git a/src/server/middleware/rate_limit.py b/src/server/middleware/rate_limit.py
new file mode 100644
index 0000000..0758f38
--- /dev/null
+++ b/src/server/middleware/rate_limit.py
@@ -0,0 +1,39 @@
+"""
+Rate limiting middleware for PowerMem API
+"""
+
+from typing import Callable
+from fastapi import Request, HTTPException
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+from ..config import config
+from ..models.errors import ErrorCode
+
+# Initialize rate limiter with Redis or in-memory storage
+# For now, use in-memory storage (can be upgraded to Redis later)
+limiter = Limiter(key_func=get_remote_address, storage_uri="memory://")
+
+
+def rate_limit_middleware(app):
+    """
+    Setup rate limiting middleware for FastAPI app.
+    
+    Args:
+        app: FastAPI application instance
+    """
+    if not config.rate_limit_enabled:
+        return
+    
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+
+def get_rate_limit_string() -> str:
+    """
+    Get rate limit string from config.
+    
+    Returns:
+        Rate limit string (e.g., "100/minute")
+    """
+    return f"{config.rate_limit_per_minute}/minute"
diff --git a/src/server/models/__init__.py b/src/server/models/__init__.py
new file mode 100644
index 0000000..aca843d
--- /dev/null
+++ b/src/server/models/__init__.py
@@ -0,0 +1,46 @@
+"""
+Request and response models for PowerMem API
+"""
+
+from .request import (
+    MemoryCreateRequest,
+    MemoryBatchCreateRequest,
+    MemoryItem,
+    MemoryUpdateRequest,
+    SearchRequest,
+    UserProfileUpdateRequest,
+    AgentMemoryShareRequest,
+    BulkDeleteRequest,
+)
+from .response import (
+    APIResponse,
+    MemoryResponse,
+    MemoryListResponse,
+    SearchResponse,
+    UserProfileResponse,
+    HealthResponse,
+    StatusResponse,
+    ErrorResponse,
+)
+from .errors import ErrorCode, APIError
+
+__all__ = [
+    "MemoryCreateRequest",
+    "MemoryBatchCreateRequest",
+    "MemoryItem",
+    "MemoryUpdateRequest",
+    "SearchRequest",
+    "UserProfileUpdateRequest",
+    "AgentMemoryShareRequest",
+    "BulkDeleteRequest",
+    "APIResponse",
+    "MemoryResponse",
+    "MemoryListResponse",
+    "SearchResponse",
+    "UserProfileResponse",
+    "HealthResponse",
+    "StatusResponse",
+    "ErrorResponse",
+    "ErrorCode",
+    "APIError",
+]
diff --git a/src/server/models/errors.py b/src/server/models/errors.py
new file mode 100644
index 0000000..2bc1008
--- /dev/null
+++ b/src/server/models/errors.py
@@ -0,0 +1,78 @@
+"""
+Error codes and exception classes for PowerMem API
+"""
+
+from enum import Enum
+from typing import Any, Dict, Optional
+
+
+class ErrorCode(str, Enum):
+    """Error codes for API responses"""
+    
+    # General errors
+    INTERNAL_ERROR = "INTERNAL_ERROR"
+    INVALID_REQUEST = "INVALID_REQUEST"
+    UNAUTHORIZED = "UNAUTHORIZED"
+    FORBIDDEN = "FORBIDDEN"
+    NOT_FOUND = "NOT_FOUND"
+    RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED"
+    SERVICE_UNAVAILABLE = "SERVICE_UNAVAILABLE"
+    
+    # Memory errors
+    MEMORY_NOT_FOUND = "MEMORY_NOT_FOUND"
+    MEMORY_CREATE_FAILED = "MEMORY_CREATE_FAILED"
+    MEMORY_UPDATE_FAILED = "MEMORY_UPDATE_FAILED"
+    MEMORY_DELETE_FAILED = "MEMORY_DELETE_FAILED"
+    MEMORY_SEARCH_FAILED = "MEMORY_SEARCH_FAILED"
+    MEMORY_VALIDATION_ERROR = "MEMORY_VALIDATION_ERROR"
+    MEMORY_DUPLICATE = "MEMORY_DUPLICATE"
+    MEMORY_BATCH_LIMIT_EXCEEDED = "MEMORY_BATCH_LIMIT_EXCEEDED"
+    
+    # Search errors
+    SEARCH_FAILED = "SEARCH_FAILED"
+    INVALID_SEARCH_PARAMS = "INVALID_SEARCH_PARAMS"
+    
+    # User errors
+    USER_NOT_FOUND = "USER_NOT_FOUND"
+    USER_PROFILE_NOT_FOUND = "USER_PROFILE_NOT_FOUND"
+    USER_PROFILE_UPDATE_FAILED = "USER_PROFILE_UPDATE_FAILED"
+    PROFILE_UPDATE_FAILED = "PROFILE_UPDATE_FAILED"  # Keep for backward compatibility
+    
+    # Agent errors
+    AGENT_NOT_FOUND = "AGENT_NOT_FOUND"
+    AGENT_MEMORY_ACCESS_DENIED = "AGENT_MEMORY_ACCESS_DENIED"
+    AGENT_MEMORY_SHARE_FAILED = "AGENT_MEMORY_SHARE_FAILED"
+    
+    # System errors
+    SYSTEM_STORAGE_ERROR = "SYSTEM_STORAGE_ERROR"
+    SYSTEM_LLM_ERROR = "SYSTEM_LLM_ERROR"
+    SYSTEM_CONFIG_ERROR = "SYSTEM_CONFIG_ERROR"
+    
+    # Configuration errors (deprecated, use SYSTEM_*)
+    CONFIG_ERROR = "CONFIG_ERROR"
+    STORAGE_ERROR = "STORAGE_ERROR"
+
+
+class APIError(Exception):
+    """Base exception for API errors"""
+    
+    def __init__(
+        self,
+        code: ErrorCode,
+        message: str,
+        details: Optional[Dict[str, Any]] = None,
+        status_code: int = 500,
+    ):
+        self.code = code
+        self.message = message
+        self.details = details or {}
+        self.status_code = status_code
+        super().__init__(self.message)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert error to dictionary"""
+        return {
+            "code": self.code.value,
+            "message": self.message,
+            "details": self.details,
+        }
diff --git a/src/server/models/request.py b/src/server/models/request.py
new file mode 100644
index 0000000..f196f06
--- /dev/null
+++ b/src/server/models/request.py
@@ -0,0 +1,104 @@
+"""
+Request models for PowerMem API
+"""
+
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+
+
+class MemoryCreateRequest(BaseModel):
+    """Request model for creating a memory"""
+    
+    content: str = Field(..., description="Memory content (string, dict, or list of dicts)")
+    user_id: Optional[str] = Field(None, description="User identifier")
+    agent_id: Optional[str] = Field(None, description="Agent identifier")
+    run_id: Optional[str] = Field(None, description="Run/conversation identifier")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+    filters: Optional[Dict[str, Any]] = Field(None, description="Filter metadata for advanced filtering")
+    scope: Optional[str] = Field(None, description="Memory scope (e.g., 'user', 'agent', 'session')")
+    memory_type: Optional[str] = Field(None, description="Memory type classification")
+    infer: bool = Field(True, description="Enable intelligent memory processing")
+
+
+class MemoryItem(BaseModel):
+    """Single memory item for batch creation"""
+    
+    content: str = Field(..., description="Memory content")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata for this memory")
+    filters: Optional[Dict[str, Any]] = Field(None, description="Filter metadata for this memory")
+    scope: Optional[str] = Field(None, description="Memory scope")
+    memory_type: Optional[str] = Field(None, description="Memory type classification")
+
+
+class MemoryBatchCreateRequest(BaseModel):
+    """Request model for creating multiple memories in batch"""
+    
+    memories: List[MemoryItem] = Field(..., description="List of memories to create", min_length=1, max_length=100)
+    user_id: Optional[str] = Field(None, description="User identifier (applied to all memories)")
+    agent_id: Optional[str] = Field(None, description="Agent identifier (applied to all memories)")
+    run_id: Optional[str] = Field(None, description="Run/conversation identifier (applied to all memories)")
+    infer: bool = Field(True, description="Enable intelligent memory processing")
+
+
+class MemoryUpdateRequest(BaseModel):
+    """Request model for updating a memory"""
+    
+    content: Optional[str] = Field(None, description="New content for the memory")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Updated metadata")
+
+
+class MemoryUpdateItem(BaseModel):
+    """Single memory update item for batch update"""
+    
+    memory_id: int = Field(..., description="Memory ID to update")
+    content: Optional[str] = Field(None, description="New content for the memory (optional)")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Updated metadata (optional)")
+
+
+class MemoryBatchUpdateRequest(BaseModel):
+    """Request model for updating multiple memories in batch"""
+    
+    updates: List[MemoryUpdateItem] = Field(..., description="List of memory updates", min_length=1, max_length=100)
+    user_id: Optional[str] = Field(None, description="User ID for access control")
+    agent_id: Optional[str] = Field(None, description="Agent ID for access control")
+
+
+class SearchRequest(BaseModel):
+    """Request model for searching memories"""
+    
+    query: str = Field(..., description="Search query")
+    user_id: Optional[str] = Field(None, description="Filter by user ID")
+    agent_id: Optional[str] = Field(None, description="Filter by agent ID")
+    run_id: Optional[str] = Field(None, description="Filter by run ID")
+    filters: Optional[Dict[str, Any]] = Field(None, description="Additional filters")
+    limit: int = Field(default=30, ge=1, le=100, description="Maximum number of results")
+
+
+class UserProfileUpdateRequest(BaseModel):
+    """Request model for updating user profile"""
+    
+    profile_content: Optional[str] = Field(None, description="Profile content text")
+    topics: Optional[Dict[str, Any]] = Field(None, description="Structured topics dictionary")
+
+
+class AgentMemoryCreateRequest(BaseModel):
+    """Request model for creating agent memory"""
+    
+    content: str = Field(..., description="Memory content")
+    user_id: Optional[str] = Field(None, description="User ID")
+    run_id: Optional[str] = Field(None, description="Run ID")
+
+
+class AgentMemoryShareRequest(BaseModel):
+    """Request model for sharing memories between agents"""
+    
+    target_agent_id: str = Field(..., description="Target agent ID to share with")
+    memory_ids: Optional[List[int]] = Field(None, description="Specific memory IDs to share (None for all)")
+
+
+class BulkDeleteRequest(BaseModel):
+    """Request model for bulk deleting memories"""
+    
+    memory_ids: List[int] = Field(..., description="List of memory IDs to delete", min_length=1, max_length=100)
+    user_id: Optional[str] = Field(None, description="User ID for access control")
+    agent_id: Optional[str] = Field(None, description="Agent ID for access control")
diff --git a/src/server/models/response.py b/src/server/models/response.py
new file mode 100644
index 0000000..e31c49f
--- /dev/null
+++ b/src/server/models/response.py
@@ -0,0 +1,174 @@
+"""
+Response models for PowerMem API
+"""
+
+from typing import Any, Dict, List, Optional
+from datetime import datetime, timezone
+from pydantic import BaseModel, Field, field_serializer
+
+try:
+    from powermem.utils.utils import get_current_datetime
+except ImportError:
+    # Fallback if powermem utils not available
+    def get_current_datetime():
+        return datetime.now(timezone.utc)
+
+
+class APIResponse(BaseModel):
+    """Standard API response wrapper"""
+    
+    success: bool = Field(..., description="Whether the operation was successful")
+    data: Optional[Any] = Field(None, description="Response data")
+    message: Optional[str] = Field(None, description="Response message")
+    timestamp: datetime = Field(default_factory=get_current_datetime, description="Response timestamp")
+    
+    @field_serializer('timestamp')
+    def serialize_timestamp(self, value: datetime, _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
+
+
+class MemoryResponse(BaseModel):
+    """Response model for a single memory"""
+    
+    memory_id: int = Field(..., description="Memory ID")
+    content: str = Field(..., description="Memory content")
+    user_id: Optional[str] = Field(None, description="User ID")
+    agent_id: Optional[str] = Field(None, description="Agent ID")
+    run_id: Optional[str] = Field(None, description="Run ID")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadata")
+    created_at: Optional[datetime] = Field(None, description="Creation timestamp")
+    updated_at: Optional[datetime] = Field(None, description="Update timestamp")
+    
+    @field_serializer('created_at', 'updated_at')
+    def serialize_datetime(self, value: Optional[datetime], _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
+
+
+class MemoryListResponse(BaseModel):
+    """Response model for a list of memories"""
+    
+    memories: List[MemoryResponse] = Field(default_factory=list, description="List of memories")
+    total: int = Field(0, description="Total number of memories")
+    limit: int = Field(0, description="Limit applied")
+    offset: int = Field(0, description="Offset applied")
+
+
+class SearchResult(BaseModel):
+    """Single search result"""
+    
+    memory_id: int = Field(..., description="Memory ID")
+    content: str = Field(..., description="Memory content")
+    score: Optional[float] = Field(None, description="Relevance score")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadata")
+
+
+class SearchResponse(BaseModel):
+    """Response model for search results"""
+    
+    results: List[SearchResult] = Field(default_factory=list, description="Search results")
+    total: int = Field(0, description="Total number of results")
+    query: str = Field(..., description="Search query")
+
+
+class UserProfileResponse(BaseModel):
+    """Response model for user profile"""
+    
+    user_id: str = Field(..., description="User ID")
+    profile_content: Optional[str] = Field(None, description="Profile content text")
+    topics: Optional[Dict[str, Any]] = Field(None, description="Structured topics")
+    updated_at: Optional[datetime] = Field(None, description="Last update timestamp")
+    
+    @field_serializer('updated_at')
+    def serialize_datetime(self, value: Optional[datetime], _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
+
+
+class HealthResponse(BaseModel):
+    """Response model for health check"""
+    
+    status: str = Field(..., description="Health status")
+    timestamp: datetime = Field(default_factory=get_current_datetime, description="Check timestamp")
+    
+    @field_serializer('timestamp')
+    def serialize_datetime(self, value: datetime, _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
+
+
+class StatusResponse(BaseModel):
+    """Response model for system status"""
+    
+    status: str = Field(..., description="System status")
+    version: str = Field(..., description="API version")
+    storage_type: Optional[str] = Field(None, description="Storage backend type")
+    llm_provider: Optional[str] = Field(None, description="LLM provider")
+    timestamp: datetime = Field(default_factory=get_current_datetime, description="Status timestamp")
+    
+    @field_serializer('timestamp')
+    def serialize_datetime(self, value: datetime, _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
+
+
+class ErrorResponse(BaseModel):
+    """Error response model"""
+    
+    success: bool = Field(False, description="Always false for errors")
+    error: Dict[str, Any] = Field(..., description="Error details")
+    timestamp: datetime = Field(default_factory=get_current_datetime, description="Error timestamp")
+    
+    @field_serializer('timestamp')
+    def serialize_datetime(self, value: datetime, _info):
+        """Serialize datetime to ISO format string with Z suffix (UTC)"""
+        if value is None:
+            return None
+        # Convert to UTC if timezone-aware, otherwise assume UTC
+        if value.tzinfo is not None:
+            utc_value = value.astimezone(timezone.utc)
+        else:
+            utc_value = value
+        # Format as ISO 8601 with Z suffix
+        return utc_value.replace(tzinfo=None).isoformat() + "Z"
diff --git a/src/server/services/__init__.py b/src/server/services/__init__.py
new file mode 100644
index 0000000..ae158d5
--- /dev/null
+++ b/src/server/services/__init__.py
@@ -0,0 +1,15 @@
+"""
+Service layer for PowerMem API Server
+"""
+
+from .memory_service import MemoryService
+from .agent_service import AgentService
+from .user_service import UserService
+from .search_service import SearchService
+
+__all__ = [
+    "MemoryService",
+    "AgentService",
+    "UserService",
+    "SearchService",
+]
diff --git a/src/server/services/agent_service.py b/src/server/services/agent_service.py
new file mode 100644
index 0000000..5c2403f
--- /dev/null
+++ b/src/server/services/agent_service.py
@@ -0,0 +1,421 @@
+"""
+Agent service for PowerMem API
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+from powermem import auto_config
+from powermem.agent import AgentMemory
+from ..models.errors import ErrorCode, APIError
+
+logger = logging.getLogger("server")
+
+
+class AgentService:
+    """Service for agent memory operations"""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """
+        Initialize agent service.
+        
+        Args:
+            config: PowerMem configuration (uses auto_config if None)
+        """
+        if config is None:
+            config = auto_config()
+        
+        self.agent_memory = AgentMemory(config=config)
+        logger.info("AgentService initialized")
+    
+    def get_agent_memories(
+        self,
+        agent_id: str,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """
+        Get all memories for an agent.
+        
+        Args:
+            agent_id: Agent ID
+            limit: Maximum number of results
+            offset: Number of results to skip
+            
+        Returns:
+            List of memories
+            
+        Raises:
+            APIError: If retrieval fails
+        """
+        try:
+            if not agent_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="agent_id is required",
+                    status_code=400,
+                )
+            
+            # AgentMemory.get_all() doesn't support offset, so we need to handle it manually
+            all_memories = self.agent_memory.get_all(
+                agent_id=agent_id,
+                limit=limit + offset,  # Get more results to account for offset
+            )
+            
+            # Apply offset manually
+            if offset > 0 and len(all_memories) > offset:
+                memories = all_memories[offset:offset + limit]
+            elif offset > 0:
+                memories = []
+            else:
+                memories = all_memories[:limit]
+            
+            return memories
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to get agent memories {agent_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to get agent memories: {str(e)}",
+                status_code=500,
+            )
+    
+    def create_agent_memory(
+        self,
+        agent_id: str,
+        content: str,
+        user_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        filters: Optional[Dict[str, Any]] = None,
+        scope: Optional[str] = None,
+        memory_type: Optional[str] = None,
+        infer: bool = True,
+    ) -> Dict[str, Any]:
+        """
+        Create a memory for an agent.
+        
+        Uses AgentMemory system for intelligent memory management with
+        multi-agent collaboration, permissions, and scope support.
+        
+        Args:
+            agent_id: Agent ID
+            content: Memory content
+            user_id: User ID
+            run_id: Run ID (stored in metadata)
+            metadata: Metadata
+            filters: Filters (stored in metadata)
+            scope: Memory scope (e.g., 'AGENT', 'USER_GROUP', 'PUBLIC')
+            memory_type: Memory type (stored in metadata)
+            infer: Deprecated - AgentMemory handles intelligent processing internally
+            
+        Returns:
+            Created memory data with memory_id field
+            
+        Raises:
+            APIError: If creation fails
+        """
+        try:
+            if not agent_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="agent_id is required",
+                    status_code=400,
+                )
+            
+            # Prepare metadata with run_id and other fields if provided
+            enhanced_metadata = metadata or {}
+            if run_id:
+                enhanced_metadata["run_id"] = run_id
+            if filters:
+                enhanced_metadata["filters"] = filters
+            if memory_type:
+                enhanced_metadata["memory_type"] = memory_type
+            
+            # AgentMemory.add() returns a dict with memory information
+            result = self.agent_memory.add(
+                content=content,
+                user_id=user_id,
+                agent_id=agent_id,
+                metadata=enhanced_metadata,
+                scope=scope,
+            )
+            
+            # Ensure memory_id field exists (use "id" from result)
+            if isinstance(result, dict):
+                if "id" in result and "memory_id" not in result:
+                    result["memory_id"] = result["id"]
+                logger.info(f"Agent memory created: {result.get('memory_id')} for agent {agent_id}")
+                return result
+            else:
+                logger.error(f"Failed to create memory for agent {agent_id}: unexpected result type={type(result)}")
+                raise APIError(
+                    code=ErrorCode.MEMORY_CREATE_FAILED,
+                    message="No memory was created. Unexpected result format.",
+                    status_code=500,
+                )
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to create agent memory: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.MEMORY_CREATE_FAILED,
+                message=f"Failed to create agent memory: {str(e)}",
+                status_code=500,
+            )
+    
+    def share_memories(
+        self,
+        agent_id: str,
+        target_agent_id: str,
+        memory_ids: Optional[List[int]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Share memories between agents.
+        
+        Uses AgentMemory's share_memory method for proper memory sharing
+        between agents with permission and collaboration support.
+        
+        Args:
+            agent_id: Source agent ID
+            target_agent_id: Target agent ID
+            memory_ids: Specific memory IDs to share (None for all)
+            
+        Returns:
+            Sharing result
+            
+        Raises:
+            APIError: If sharing fails
+        """
+        try:
+            if not agent_id or not target_agent_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="agent_id and target_agent_id are required",
+                    status_code=400,
+                )
+            
+            # Get memories to share
+            if memory_ids:
+                # Get specific memories by ID
+                memories = []
+                all_memories = self.agent_memory.get_all(agent_id=agent_id)
+                logger.info(f"Found {len(all_memories)} total memories for agent {agent_id}, filtering by {len(memory_ids)} specific IDs: {memory_ids}")
+                
+                # Convert memory_ids to set, handling both int and str types
+                memory_id_set = set(memory_ids)
+                # Also create a set with string versions for type compatibility
+                memory_id_set_str = {str(mid) for mid in memory_ids}
+                
+                # Debug: log first memory's ID to understand the format
+                if all_memories:
+                    first_mem_id = all_memories[0].get("id") or all_memories[0].get("memory_id")
+                    logger.debug(f"Sample memory ID from get_all: {first_mem_id} (type: {type(first_mem_id).__name__}), requested IDs: {memory_ids} (types: {[type(mid).__name__ for mid in memory_ids]})")
+                
+                for memory in all_memories:
+                    mem_id = memory.get("id") or memory.get("memory_id")
+                    # Check both original type and string type for compatibility
+                    if mem_id in memory_id_set or str(mem_id) in memory_id_set_str:
+                        memories.append(memory)
+                        logger.debug(f"Matched memory ID {mem_id} (type: {type(mem_id).__name__})")
+                    else:
+                        logger.debug(f"Memory ID {mem_id} (type: {type(mem_id).__name__}) not in requested IDs {memory_ids}. memory_id_set={memory_id_set}, memory_id_set_str={memory_id_set_str}")
+                
+                logger.info(f"After filtering, found {len(memories)} matching memories")
+            else:
+                memories = self.agent_memory.get_all(agent_id=agent_id)
+                logger.info(f"Found {len(memories)} memories for agent {agent_id} to share")
+            
+            if not memories:
+                logger.warning(f"No memories found for agent {agent_id}. Cannot share memories.")
+                return {
+                    "shared_count": 0,
+                    "source_agent_id": agent_id,
+                    "target_agent_id": target_agent_id,
+                }
+            
+            # Use AgentMemory's share_memory method for proper sharing
+            shared_count = 0
+            for memory in memories:
+                try:
+                    mem_id = memory.get("id") or memory.get("memory_id")
+                    if not mem_id:
+                        logger.warning(f"Memory missing ID, skipping: {memory}")
+                        continue
+                    
+                    # Try to use the share_memory method if available and supported
+                    use_share_method = False
+                    if hasattr(self.agent_memory, 'share_memory'):
+                        # Check if the mode supports share_memory
+                        current_mode = self.agent_memory.get_mode() if hasattr(self.agent_memory, 'get_mode') else None
+                        if current_mode in ['multi_agent', 'hybrid']:
+                            use_share_method = True
+                    
+                    if use_share_method:
+                        try:
+                            share_result = self.agent_memory.share_memory(
+                                memory_id=str(mem_id),
+                                from_agent=agent_id,
+                                to_agents=[target_agent_id],
+                            )
+                            if share_result.get("success", False):
+                                shared_count += 1
+                                logger.debug(f"Successfully shared memory {mem_id} using share_memory method")
+                            else:
+                                logger.warning(f"share_memory returned unsuccessful result for memory {mem_id}, falling back to copy")
+                                # Fallback to copy
+                                try:
+                                    self._copy_memory_to_agent(memory, target_agent_id)
+                                    shared_count += 1
+                                except ValueError as e:
+                                    logger.warning(f"Skipping memory {mem_id} due to empty content: {e}")
+                                except Exception as e:
+                                    logger.warning(f"Failed to copy memory {mem_id} to agent {target_agent_id}: {e}")
+                        except (RuntimeError, ValueError, PermissionError) as e:
+                            # share_memory not supported or failed, fallback to copy
+                            logger.info(f"share_memory not supported or failed for memory {mem_id}: {e}. Using fallback copy method.")
+                            try:
+                                self._copy_memory_to_agent(memory, target_agent_id)
+                                shared_count += 1
+                            except ValueError as ve:
+                                logger.warning(f"Skipping memory {mem_id} due to empty content: {ve}")
+                            except Exception as copy_e:
+                                logger.warning(f"Failed to copy memory {mem_id} to agent {target_agent_id}: {copy_e}")
+                    else:
+                        # Fallback: copy memory to target agent
+                        logger.debug(f"Using fallback copy method for memory {mem_id}")
+                        try:
+                            self._copy_memory_to_agent(memory, target_agent_id)
+                            shared_count += 1
+                        except ValueError as e:
+                            # Skip memories with empty content, but don't fail the entire operation
+                            logger.warning(f"Skipping memory {mem_id} due to empty content: {e}")
+                        except Exception as e:
+                            # Log other errors but continue with other memories
+                            logger.warning(f"Failed to copy memory {mem_id} to agent {target_agent_id}: {e}")
+                except Exception as e:
+                    logger.warning(f"Failed to share memory {memory.get('id') or memory.get('memory_id')}: {e}", exc_info=True)
+            
+            logger.info(f"Shared {shared_count} memories from {agent_id} to {target_agent_id}")
+            
+            return {
+                "shared_count": shared_count,
+                "source_agent_id": agent_id,
+                "target_agent_id": target_agent_id,
+            }
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to share memories: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.AGENT_MEMORY_SHARE_FAILED,
+                message=f"Failed to share memories: {str(e)}",
+                status_code=500,
+            )
+    
+    def _copy_memory_to_agent(
+        self,
+        memory: Dict[str, Any],
+        target_agent_id: str
+    ) -> None:
+        """
+        Copy a memory to another agent (fallback method when share_memory is not supported).
+        
+        Args:
+            memory: Memory dictionary to copy
+            target_agent_id: Target agent ID
+            
+        Raises:
+            ValueError: If memory content is empty or missing
+        """
+        # Extract content from various possible field names
+        # Storage adapter returns "memory" field, get_memories returns "content" field
+        memory_id = memory.get("id") or memory.get("memory_id")
+        
+        # Debug: log memory structure to understand the issue
+        logger.debug(f"Copying memory {memory_id} to agent {target_agent_id}. Memory keys: {list(memory.keys())}")
+        
+        # AgentMemory.get_all() returns memories with 'content' field (from get_memories())
+        # The agent layer standardizes to 'content' field, but keep 'memory' as fallback
+        # for compatibility with direct storage access
+        content = memory.get("content") or memory.get("memory", "")
+        
+        # Strip whitespace and check if content is actually empty
+        if isinstance(content, str):
+            content = content.strip()
+        elif content:
+            content = str(content).strip()
+        else:
+            content = ""
+        
+        if not content:
+            # Try to get content from metadata if available
+            metadata = memory.get("metadata", {})
+            if isinstance(metadata, dict):
+                content = (
+                    metadata.get("content") or 
+                    metadata.get("memory") or 
+                    metadata.get("data") or 
+                    ""
+                )
+                if isinstance(content, str):
+                    content = content.strip()
+                elif content:
+                    content = str(content).strip()
+                else:
+                    content = ""
+        
+        if not content:
+            logger.warning(
+                f"Memory {memory_id} has empty content, skipping copy to agent {target_agent_id}. "
+                f"Memory fields: content={memory.get('content')}, memory={memory.get('memory')}, "
+                f"data={memory.get('data')}, document={memory.get('document')}"
+            )
+            raise ValueError(f"Cannot copy memory {memory_id} with empty content")
+        
+        try:
+            # Extract scope - handle both enum and string
+            scope = memory.get("scope")
+            if scope and hasattr(scope, 'value'):
+                scope = scope.value
+            elif not isinstance(scope, str):
+                scope = None
+            
+            self.agent_memory.add(
+                content=content,
+                user_id=memory.get("user_id"),
+                agent_id=target_agent_id,
+                metadata=memory.get("metadata", {}),
+                scope=scope,
+            )
+            logger.debug(f"Successfully copied memory {memory.get('id') or memory.get('memory_id')} to agent {target_agent_id}")
+        except Exception as e:
+            logger.error(f"Failed to copy memory {memory.get('id') or memory.get('memory_id')} to agent {target_agent_id}: {e}")
+            raise
+    
+    def get_shared_memories(
+        self,
+        agent_id: str,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """
+        Get shared memories for an agent.
+        
+        Note: This is a simplified implementation. Full implementation would
+        track sharing relationships.
+        
+        Args:
+            agent_id: Agent ID
+            limit: Maximum number of results
+            offset: Number of results to skip
+            
+        Returns:
+            List of shared memories
+        """
+        # For now, return all memories for the agent
+        # In a full implementation, this would filter for shared memories only
+        return self.get_agent_memories(agent_id, limit, offset)
diff --git a/src/server/services/memory_service.py b/src/server/services/memory_service.py
new file mode 100644
index 0000000..da0e8c5
--- /dev/null
+++ b/src/server/services/memory_service.py
@@ -0,0 +1,593 @@
+"""
+Memory service for PowerMem API
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+from powermem import Memory, auto_config
+from ..models.errors import ErrorCode, APIError
+from ..utils.converters import memory_dict_to_response
+from ..utils.metrics import get_metrics_collector
+
+logger = logging.getLogger("server")
+
+
+class MemoryService:
+    """Service for memory management operations"""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """
+        Initialize memory service.
+        
+        Args:
+            config: PowerMem configuration (uses auto_config if None)
+        """
+        if config is None:
+            config = auto_config()
+        
+        self.memory = Memory(config=config)
+        logger.info("MemoryService initialized")
+    
+    def create_memory(
+        self,
+        content: str,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        filters: Optional[Dict[str, Any]] = None,
+        scope: Optional[str] = None,
+        memory_type: Optional[str] = None,
+        infer: bool = True,
+    ) -> List[Dict[str, Any]]:
+        """
+        Create a new memory.
+        
+        Args:
+            content: Memory content
+            user_id: User ID
+            agent_id: Agent ID
+            run_id: Run ID
+            metadata: Metadata
+            filters: Filters
+            scope: Scope
+            memory_type: Memory type
+            infer: Enable intelligent processing (may create multiple memories)
+            
+        Returns:
+            List of created memory data (may contain multiple memories if infer=True)
+            
+        Raises:
+            APIError: If creation fails
+        """
+        try:
+            result = self.memory.add(
+                messages=content,
+                user_id=user_id,
+                agent_id=agent_id,
+                run_id=run_id,
+                metadata=metadata,
+                filters=filters,
+                scope=scope,
+                memory_type=memory_type,
+                infer=infer,
+            )
+            
+            # Extract all created memories from result
+            # Result format: {"results": [{"id": memory_id, ...}], ...}
+            all_results = result.get("results", [])
+            
+            if not all_results:
+                raise APIError(
+                    code=ErrorCode.MEMORY_CREATE_FAILED,
+                    message="No memories were created",
+                    status_code=500,
+                )
+            
+            logger.info(f"Created {len(all_results)} memory/memories")
+            
+            # Normalize all results to include memory_id and other fields at top level
+            # Fetch full memory info from database to get timestamps (consistent with batch_create_memories)
+            normalized_memories = []
+            
+            for result_item in all_results:
+                memory_id = result_item.get("id")
+                if memory_id is None:
+                    continue
+                
+                # Fetch full memory info from database to get complete data including timestamps
+                try:
+                    full_memory = self.get_memory(memory_id, user_id, agent_id)
+                    if full_memory:
+                        normalized_memories.append(full_memory)
+                        continue
+                except Exception as e:
+                    logger.warning(f"Failed to fetch full memory info for {memory_id}: {e}, using result_item data")
+                
+                # Fallback to result_item if get_memory fails
+                # Ensure metadata is always a dict, never None
+                result_metadata = result_item.get("metadata")
+                if result_metadata is None:
+                    result_metadata = metadata or {}
+                
+                # Extract fields with fallback: use result_item value if present and not None, otherwise use input parameter
+                def get_field(result_key: str, param_value):
+                    """Get field from result_item if available and not None, otherwise use param_value"""
+                    if result_key in result_item:
+                        result_value = result_item.get(result_key)
+                        # Use result value if it's not None, otherwise fall back to param
+                        return result_value if result_value is not None else param_value
+                    return param_value
+                
+                normalized_memory = {
+                    "id": memory_id,
+                    "memory_id": memory_id,
+                    "content": get_field("memory", content),
+                    "user_id": get_field("user_id", user_id),
+                    "agent_id": get_field("agent_id", agent_id),
+                    "run_id": get_field("run_id", run_id),
+                    "metadata": result_metadata if isinstance(result_metadata, dict) else {},
+                }
+                
+                # Add timestamps only if they exist and are not None
+                if "created_at" in result_item and result_item["created_at"] is not None:
+                    normalized_memory["created_at"] = result_item["created_at"]
+                if "updated_at" in result_item and result_item["updated_at"] is not None:
+                    normalized_memory["updated_at"] = result_item["updated_at"]
+                
+                normalized_memories.append(normalized_memory)
+            
+            # Record successful memory operation
+            metrics_collector = get_metrics_collector()
+            metrics_collector.record_memory_operation("create", "success")
+            
+            # Return array of all created memories
+            return normalized_memories
+            
+        except Exception as e:
+            logger.error(f"Failed to create memory: {e}", exc_info=True)
+            
+            # Record failed memory operation
+            metrics_collector = get_metrics_collector()
+            metrics_collector.record_memory_operation("create", "failed")
+            
+            raise APIError(
+                code=ErrorCode.MEMORY_CREATE_FAILED,
+                message=f"Failed to create memory: {str(e)}",
+                status_code=500,
+            )
+    
+    def get_memory(
+        self,
+        memory_id: int,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Get a memory by ID.
+        
+        Args:
+            memory_id: Memory ID
+            user_id: User ID for access control
+            agent_id: Agent ID for access control
+            
+        Returns:
+            Memory data
+            
+        Raises:
+            APIError: If memory not found
+        """
+        try:
+            memory = self.memory.get(
+                memory_id=memory_id,
+                user_id=user_id,
+                agent_id=agent_id,
+            )
+            
+            if memory is None:
+                raise APIError(
+                    code=ErrorCode.MEMORY_NOT_FOUND,
+                    message=f"Memory {memory_id} not found",
+                    status_code=404,
+                )
+            
+            # Handle field name mismatch: storage uses "data" but get_memory returns "content"
+            # If content is empty, try to get it from the underlying storage payload
+            if not memory.get("content") or memory.get("content") == "":
+                try:
+                    # Access underlying storage to get raw payload with "data" field
+                    storage_adapter = self.memory.storage
+                    result = storage_adapter.vector_store.get(memory_id)
+                    if result and result.payload:
+                        data_content = result.payload.get("data", "")
+                        if data_content:
+                            memory["content"] = data_content
+                except Exception as e:
+                    logger.warning(f"Failed to get content from storage payload for memory {memory_id}: {e}")
+            
+            return memory
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to get memory {memory_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to get memory: {str(e)}",
+                status_code=500,
+            )
+    
+    def list_memories(
+        self,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """
+        List memories with pagination.
+        
+        Args:
+            user_id: Filter by user ID
+            agent_id: Filter by agent ID
+            limit: Maximum number of results
+            offset: Number of results to skip
+            
+        Returns:
+            List of memories
+        """
+        try:
+            result = self.memory.get_all(
+                user_id=user_id,
+                agent_id=agent_id,
+                limit=limit,
+                offset=offset,
+            )
+            
+            # Extract results from the dictionary response
+            # get_all returns {"results": [...], "relations": [...]}
+            memories_list = result.get("results", [])
+            
+            # Filter out non-dict items and ensure all items are dictionaries
+            filtered_memories = []
+            for item in memories_list:
+                if isinstance(item, dict):
+                    filtered_memories.append(item)
+                else:
+                    logger.warning(f"Skipping non-dict item in memories list: {type(item)} - {item}")
+            
+            return filtered_memories
+            
+        except Exception as e:
+            logger.error(f"Failed to list memories: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to list memories: {str(e)}",
+                status_code=500,
+            )
+    
+    def update_memory(
+        self,
+        memory_id: int,
+        content: Optional[str] = None,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Update a memory.
+        
+        Args:
+            memory_id: Memory ID
+            content: New content (optional)
+            user_id: User ID for access control
+            agent_id: Agent ID for access control
+            metadata: Updated metadata (optional)
+            
+        Returns:
+            Updated memory data
+            
+        Raises:
+            APIError: If update fails
+        """
+        try:
+            # First check if memory exists
+            existing = self.get_memory(memory_id, user_id, agent_id)
+            
+            # At least one of content or metadata must be provided
+            if content is None and metadata is None:
+                raise ValueError("At least one of content or metadata must be provided")
+            
+            # Use existing content if new content not provided
+            final_content = content if content is not None else existing.get("content", "")
+            
+            # Merge metadata if both existing and new metadata exist
+            final_metadata = metadata
+            if metadata is not None and existing.get("metadata"):
+                final_metadata = {**existing.get("metadata", {}), **metadata}
+            elif existing.get("metadata"):
+                final_metadata = existing.get("metadata")
+            
+            result = self.memory.update(
+                memory_id=memory_id,
+                content=final_content,
+                user_id=user_id,
+                agent_id=agent_id,
+                metadata=final_metadata,
+            )
+            
+            # Ensure result contains id field (storage.update_memory returns payload without id)
+            if result and "id" not in result:
+                result["id"] = memory_id
+                result["memory_id"] = memory_id
+            
+            logger.info(f"Memory updated: {memory_id}")
+            return result
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to update memory {memory_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.MEMORY_UPDATE_FAILED,
+                message=f"Failed to update memory: {str(e)}",
+                status_code=500,
+            )
+    
+    def delete_memory(
+        self,
+        memory_id: int,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+    ) -> bool:
+        """
+        Delete a memory.
+        
+        Args:
+            memory_id: Memory ID
+            user_id: User ID for access control
+            agent_id: Agent ID for access control
+            
+        Returns:
+            True if deleted successfully
+            
+        Raises:
+            APIError: If deletion fails
+        """
+        try:
+            # First check if memory exists
+            self.get_memory(memory_id, user_id, agent_id)
+            
+            success = self.memory.delete(
+                memory_id=memory_id,
+                user_id=user_id,
+                agent_id=agent_id,
+            )
+            
+            if not success:
+                raise APIError(
+                    code=ErrorCode.MEMORY_DELETE_FAILED,
+                    message=f"Failed to delete memory {memory_id}",
+                    status_code=500,
+                )
+            
+            logger.info(f"Memory deleted: {memory_id}")
+            return True
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to delete memory {memory_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.MEMORY_DELETE_FAILED,
+                message=f"Failed to delete memory: {str(e)}",
+                status_code=500,
+            )
+    
+    def bulk_delete_memories(
+        self,
+        memory_ids: List[int],
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Delete multiple memories.
+        
+        Args:
+            memory_ids: List of memory IDs
+            user_id: User ID for access control
+            agent_id: Agent ID for access control
+            
+        Returns:
+            Dictionary with deletion results
+        """
+        deleted = []
+        failed = []
+        
+        for memory_id in memory_ids:
+            try:
+                self.delete_memory(memory_id, user_id, agent_id)
+                deleted.append(memory_id)
+            except APIError as e:
+                failed.append({"memory_id": memory_id, "error": e.message})
+        
+        return {
+            "deleted": deleted,
+            "failed": failed,
+            "total": len(memory_ids),
+            "deleted_count": len(deleted),
+            "failed_count": len(failed),
+        }
+    
+    def batch_create_memories(
+        self,
+        memories: List[Dict[str, Any]],
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        infer: bool = True,
+    ) -> Dict[str, Any]:
+        """
+        Create multiple memories in batch.
+        
+        Args:
+            memories: List of memory items, each containing:
+                - content: Memory content
+                - metadata: Optional metadata (overrides common metadata)
+                - filters: Optional filters (overrides common filters)
+                - scope: Optional scope
+                - memory_type: Optional memory type
+            user_id: Common user ID for all memories
+            agent_id: Common agent ID for all memories
+            run_id: Common run ID for all memories
+            infer: Enable intelligent processing
+            
+        Returns:
+            Dictionary with creation results
+        """
+        created = []
+        failed = []
+        
+        for idx, memory_item in enumerate(memories):
+            try:
+                content = memory_item.get("content")
+                if not content:
+                    raise ValueError("Memory content is required")
+                
+                # Use item-specific metadata/filters if provided, otherwise use common ones
+                metadata = memory_item.get("metadata")
+                filters = memory_item.get("filters")
+                scope = memory_item.get("scope")
+                memory_type = memory_item.get("memory_type")
+                
+                result = self.memory.add(
+                    messages=content,
+                    user_id=user_id,
+                    agent_id=agent_id,
+                    run_id=run_id,
+                    metadata=metadata,
+                    filters=filters,
+                    scope=scope,
+                    memory_type=memory_type,
+                    infer=infer,
+                )
+                
+                # Extract memory_id from result
+                # Result format: {"results": [{"id": memory_id, ...}], ...}
+                memory_id = None
+                if "results" in result and len(result["results"]) > 0:
+                    memory_id = result["results"][0].get("id")
+                elif "memory_id" in result:
+                    memory_id = result["memory_id"]
+                elif "id" in result:
+                    memory_id = result["id"]
+                
+                if memory_id is None:
+                    raise ValueError("Failed to extract memory_id from result")
+                
+                created.append({
+                    "index": idx,
+                    "memory_id": memory_id,
+                    "content": content,
+                })
+                
+            except Exception as e:
+                logger.error(f"Failed to create memory at index {idx}: {e}", exc_info=True)
+                failed.append({
+                    "index": idx,
+                    "content": memory_item.get("content", "N/A"),
+                    "error": str(e),
+                })
+        
+        return {
+            "created": created,
+            "failed": failed,
+            "total": len(memories),
+            "created_count": len(created),
+            "failed_count": len(failed),
+        }
+    
+    def batch_update_memories(
+        self,
+        updates: List[Dict[str, Any]],
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Update multiple memories in batch.
+        
+        Args:
+            updates: List of update items, each containing:
+                - memory_id: Memory ID to update
+                - content: Optional new content
+                - metadata: Optional updated metadata
+            user_id: User ID for access control
+            agent_id: Agent ID for access control
+            
+        Returns:
+            Dictionary with update results
+        """
+        updated = []
+        failed = []
+        
+        for idx, update_item in enumerate(updates):
+            try:
+                memory_id = update_item.get("memory_id")
+                if memory_id is None:
+                    raise ValueError("memory_id is required for each update")
+                
+                content = update_item.get("content")
+                metadata = update_item.get("metadata")
+                
+                # At least one of content or metadata must be provided
+                if content is None and metadata is None:
+                    raise ValueError("At least one of content or metadata must be provided")
+                
+                # Get existing memory to merge metadata if needed
+                existing = self.get_memory(memory_id, user_id, agent_id)
+                
+                # Merge metadata if both existing and new metadata exist
+                final_metadata = metadata
+                if metadata is not None and existing.get("metadata"):
+                    final_metadata = {**existing.get("metadata", {}), **metadata}
+                elif existing.get("metadata"):
+                    final_metadata = existing.get("metadata")
+                
+                # Use existing content if new content not provided
+                final_content = content if content is not None else existing.get("content", "")
+                
+                result = self.memory.update(
+                    memory_id=memory_id,
+                    content=final_content,
+                    user_id=user_id,
+                    agent_id=agent_id,
+                    metadata=final_metadata,
+                )
+                
+                updated.append({
+                    "index": idx,
+                    "memory_id": memory_id,
+                })
+                
+            except APIError as e:
+                logger.error(f"Failed to update memory at index {idx}: {e}", exc_info=True)
+                failed.append({
+                    "index": idx,
+                    "memory_id": update_item.get("memory_id"),
+                    "error": e.message,
+                })
+            except Exception as e:
+                logger.error(f"Failed to update memory at index {idx}: {e}", exc_info=True)
+                failed.append({
+                    "index": idx,
+                    "memory_id": update_item.get("memory_id"),
+                    "error": str(e),
+                })
+        
+        return {
+            "updated": updated,
+            "failed": failed,
+            "total": len(updates),
+            "updated_count": len(updated),
+            "failed_count": len(failed),
+        }
\ No newline at end of file
diff --git a/src/server/services/search_service.py b/src/server/services/search_service.py
new file mode 100644
index 0000000..ad6a589
--- /dev/null
+++ b/src/server/services/search_service.py
@@ -0,0 +1,97 @@
+"""
+Search service for PowerMem API
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+from powermem import Memory, auto_config
+from ..models.errors import ErrorCode, APIError
+from ..utils.metrics import get_metrics_collector
+
+logger = logging.getLogger("server")
+
+
+class SearchService:
+    """Service for memory search operations"""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """
+        Initialize search service.
+        
+        Args:
+            config: PowerMem configuration (uses auto_config if None)
+        """
+        if config is None:
+            config = auto_config()
+        
+        self.memory = Memory(config=config)
+        logger.info("SearchService initialized")
+    
+    def search_memories(
+        self,
+        query: str,
+        user_id: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        run_id: Optional[str] = None,
+        filters: Optional[Dict[str, Any]] = None,
+        limit: int = 30,
+    ) -> Dict[str, Any]:
+        """
+        Search memories.
+        
+        Args:
+            query: Search query
+            user_id: Filter by user ID
+            agent_id: Filter by agent ID
+            run_id: Filter by run ID
+            filters: Additional filters
+            limit: Maximum number of results
+            
+        Returns:
+            Search results dictionary
+            
+        Raises:
+            APIError: If search fails
+        """
+        try:
+            if not query or not query.strip():
+                raise APIError(
+                    code=ErrorCode.INVALID_SEARCH_PARAMS,
+                    message="Search query cannot be empty",
+                    status_code=400,
+                )
+            
+            results = self.memory.search(
+                query=query,
+                user_id=user_id,
+                agent_id=agent_id,
+                run_id=run_id,
+                filters=filters,
+                limit=limit,
+            )
+            
+            logger.info(f"Search completed: {len(results.get('results', []))} results")
+            
+            # Record successful memory operation
+            metrics_collector = get_metrics_collector()
+            metrics_collector.record_memory_operation("search", "success")
+            
+            return results
+            
+        except APIError:
+            # Record failed memory operation for API errors
+            metrics_collector = get_metrics_collector()
+            metrics_collector.record_memory_operation("search", "failed")
+            raise
+        except Exception as e:
+            logger.error(f"Search failed: {e}", exc_info=True)
+            
+            # Record failed memory operation
+            metrics_collector = get_metrics_collector()
+            metrics_collector.record_memory_operation("search", "failed")
+            
+            raise APIError(
+                code=ErrorCode.SEARCH_FAILED,
+                message=f"Search failed: {str(e)}",
+                status_code=500,
+            )
diff --git a/src/server/services/user_service.py b/src/server/services/user_service.py
new file mode 100644
index 0000000..dc1cdc7
--- /dev/null
+++ b/src/server/services/user_service.py
@@ -0,0 +1,309 @@
+"""
+User service for PowerMem API
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+from powermem import UserMemory, auto_config
+from ..models.errors import ErrorCode, APIError
+
+logger = logging.getLogger("server")
+
+
+class UserService:
+    """Service for user profile operations"""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """
+        Initialize user service.
+        
+        Args:
+            config: PowerMem configuration (uses auto_config if None)
+        """
+        if config is None:
+            config = auto_config()
+        
+        self.user_memory = UserMemory(config=config)
+        logger.info("UserService initialized")
+    
+    def get_user_profile(self, user_id: str) -> Dict[str, Any]:
+        """
+        Get user profile.
+        
+        Args:
+            user_id: User ID
+            
+        Returns:
+            User profile data
+            
+        Raises:
+            APIError: If profile not found or retrieval fails
+        """
+        try:
+            if not user_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="user_id is required",
+                    status_code=400,
+                )
+            
+            profile = self.user_memory.profile(user_id)
+            
+            if not profile:
+                raise APIError(
+                    code=ErrorCode.USER_NOT_FOUND,
+                    message=f"User profile for {user_id} not found",
+                    status_code=404,
+                )
+            
+            return profile
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to get user profile {user_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to get user profile: {str(e)}",
+                status_code=500,
+            )
+    
+    def update_user_profile(
+        self,
+        user_id: str,
+        profile_content: Optional[str] = None,
+        topics: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Update user profile.
+        
+        Args:
+            user_id: User ID
+            profile_content: Profile content text
+            topics: Structured topics dictionary
+            
+        Returns:
+            Updated profile data
+            
+        Raises:
+            APIError: If update fails
+        """
+        try:
+            if not user_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="user_id is required",
+                    status_code=400,
+                )
+            
+            # Use UserMemory.add() to update profile by constructing a message
+            # that contains the profile information we want to save
+            # This follows the pattern shown in scenario_9_user_memory.md
+            import json
+            
+            if topics is not None:
+                # For structured topics, construct a message that will be extracted as topics
+                message_content = json.dumps(topics, ensure_ascii=False)
+                messages = [{"role": "user", "content": message_content}]
+                result = self.user_memory.add(
+                    messages=messages,
+                    user_id=user_id,
+                    profile_type="topics",
+                )
+            elif profile_content is not None:
+                # For profile content, construct a message containing the profile
+                messages = [{"role": "user", "content": profile_content}]
+                result = self.user_memory.add(
+                    messages=messages,
+                    user_id=user_id,
+                    profile_type="content",
+                )
+            else:
+                # No profile data provided
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="Either profile_content or topics must be provided",
+                    status_code=400,
+                )
+            
+            # Get updated profile using UserMemory.profile() interface
+            profile = self.user_memory.profile(user_id)
+            
+            logger.info(f"User profile updated: {user_id}")
+            return profile
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to update user profile {user_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.PROFILE_UPDATE_FAILED,
+                message=f"Failed to update user profile: {str(e)}",
+                status_code=500,
+            )
+    
+    def get_user_memories(
+        self,
+        user_id: str,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """
+        Get all memories for a user.
+        
+        Args:
+            user_id: User ID
+            limit: Maximum number of results
+            offset: Number of results to skip
+            
+        Returns:
+            List of memories
+        """
+        try:
+            if not user_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="user_id is required",
+                    status_code=400,
+                )
+            
+            result = self.user_memory.get_all(
+                user_id=user_id,
+                limit=limit,
+                offset=offset,
+            )
+            
+            # get_all returns a dict with "results" key, extract the list
+            if isinstance(result, dict):
+                memories = result.get("results", [])
+            elif isinstance(result, list):
+                memories = result
+            else:
+                memories = []
+            
+            return memories
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to get user memories {user_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to get user memories: {str(e)}",
+                status_code=500,
+            )
+    
+    def delete_user_memories(self, user_id: str) -> Dict[str, Any]:
+        """
+        Delete all memories for a user (user profile deletion).
+        
+        Args:
+            user_id: User ID
+            
+        Returns:
+            Deletion result
+            
+        Raises:
+            APIError: If deletion fails
+        """
+        try:
+            if not user_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="user_id is required",
+                    status_code=400,
+                )
+            
+            # Get count of memories before deletion
+            result = self.user_memory.get_all(user_id=user_id)
+            if isinstance(result, dict):
+                total = len(result.get("results", []))
+            elif isinstance(result, list):
+                total = len(result)
+            else:
+                total = 0
+            
+            # Use UserMemory.delete_all() to delete all memories for the user
+            # This is the public interface method, not the internal memory.delete_all()
+            success = self.user_memory.delete_all(user_id=user_id)
+            
+            # If delete_all was successful, all memories were deleted
+            deleted_count = total if success else 0
+            failed_count = 0 if success else total
+            
+            logger.info(f"Deleted {deleted_count} memories for user {user_id}")
+            
+            return {
+                "user_id": user_id,
+                "deleted_count": deleted_count,
+                "failed_count": failed_count,
+                "total": total,
+            }
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to delete user memories {user_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to delete user memories: {str(e)}",
+                status_code=500,
+            )
+    
+    def delete_user_profile(self, user_id: str) -> Dict[str, Any]:
+        """
+        Delete user profile.
+        
+        Args:
+            user_id: User ID
+            
+        Returns:
+            Deletion result
+            
+        Raises:
+            APIError: If deletion fails
+        """
+        try:
+            if not user_id:
+                raise APIError(
+                    code=ErrorCode.INVALID_REQUEST,
+                    message="user_id is required",
+                    status_code=400,
+                )
+            
+            # Check if profile exists first
+            profile = self.user_memory.profile(user_id)
+            if not profile:
+                raise APIError(
+                    code=ErrorCode.USER_NOT_FOUND,
+                    message=f"User profile for {user_id} not found",
+                    status_code=404,
+                )
+            
+            # Use UserMemory.delete_profile() to delete the profile
+            # This is the public interface method
+            success = self.user_memory.delete_profile(user_id=user_id)
+            
+            if not success:
+                raise APIError(
+                    code=ErrorCode.INTERNAL_ERROR,
+                    message=f"Failed to delete user profile for {user_id}",
+                    status_code=500,
+                )
+            
+            logger.info(f"User profile deleted: {user_id}")
+            
+            return {
+                "user_id": user_id,
+                "deleted": True,
+            }
+            
+        except APIError:
+            raise
+        except Exception as e:
+            logger.error(f"Failed to delete user profile {user_id}: {e}", exc_info=True)
+            raise APIError(
+                code=ErrorCode.INTERNAL_ERROR,
+                message=f"Failed to delete user profile: {str(e)}",
+                status_code=500,
+            )
\ No newline at end of file
diff --git a/src/server/utils/__init__.py b/src/server/utils/__init__.py
new file mode 100644
index 0000000..fcf8599
--- /dev/null
+++ b/src/server/utils/__init__.py
@@ -0,0 +1,20 @@
+"""
+Utility functions for PowerMem API Server
+"""
+
+from .converters import (
+    memory_to_response,
+    memory_dict_to_response,
+    search_result_to_response,
+    user_profile_to_response,
+)
+from .validators import validate_user_id, validate_agent_id
+
+__all__ = [
+    "memory_to_response",
+    "memory_dict_to_response",
+    "search_result_to_response",
+    "user_profile_to_response",
+    "validate_user_id",
+    "validate_agent_id",
+]
diff --git a/src/server/utils/converters.py b/src/server/utils/converters.py
new file mode 100644
index 0000000..b050bce
--- /dev/null
+++ b/src/server/utils/converters.py
@@ -0,0 +1,148 @@
+"""
+Data conversion utilities for PowerMem API
+"""
+
+from typing import Any, Dict, List, Optional
+from datetime import datetime
+from ..models.response import MemoryResponse, SearchResult, UserProfileResponse
+
+
+def memory_to_response(memory_data: Dict[str, Any]) -> MemoryResponse:
+    """
+    Convert memory dictionary to MemoryResponse model.
+    
+    Args:
+        memory_data: Memory data dictionary
+        
+    Returns:
+        MemoryResponse instance
+    """
+    # Handle different memory data formats
+    memory_id = memory_data.get("memory_id") or memory_data.get("id")
+    # Handle field name mismatch: storage uses "data" but API expects "content"
+    # get_all returns "memory" field, get_memory returns "content" field
+    content = memory_data.get("memory") or memory_data.get("data") or memory_data.get("content") or memory_data.get("memory_content", "")
+    
+    # Parse timestamps
+    created_at = None
+    updated_at = None
+    
+    if "created_at" in memory_data:
+        created_at = _parse_datetime(memory_data["created_at"])
+    if "updated_at" in memory_data:
+        updated_at = _parse_datetime(memory_data["updated_at"])
+    
+    return MemoryResponse(
+        memory_id=memory_id,
+        content=content,
+        user_id=memory_data.get("user_id"),
+        agent_id=memory_data.get("agent_id"),
+        run_id=memory_data.get("run_id"),
+        metadata=memory_data.get("metadata", {}),
+        created_at=created_at,
+        updated_at=updated_at,
+    )
+
+
+def memory_dict_to_response(memory_dict: Dict[str, Any]) -> MemoryResponse:
+    """
+    Convert memory dict from SDK to MemoryResponse.
+    
+    Args:
+        memory_dict: Memory dictionary from SDK
+        
+    Returns:
+        MemoryResponse instance
+    """
+    return memory_to_response(memory_dict)
+
+
+def search_result_to_response(result: Dict[str, Any]) -> SearchResult:
+    """
+    Convert search result dictionary to SearchResult model.
+    
+    Args:
+        result: Search result dictionary
+        
+    Returns:
+        SearchResult instance
+    """
+    # Handle different field names for content: "memory", "content", "memory_content", "data"
+    content = (
+        result.get("memory") or 
+        result.get("content") or 
+        result.get("memory_content") or 
+        result.get("data") or 
+        ""
+    )
+    
+    return SearchResult(
+        memory_id=result.get("memory_id") or result.get("id"),
+        content=content,
+        score=result.get("score") or result.get("similarity"),
+        metadata=result.get("metadata", {}),
+    )
+
+
+def user_profile_to_response(
+    user_id: str,
+    profile_data: Optional[Dict[str, Any]] = None,
+) -> UserProfileResponse:
+    """
+    Convert user profile data to UserProfileResponse model.
+    
+    Args:
+        user_id: User ID
+        profile_data: Profile data dictionary
+        
+    Returns:
+        UserProfileResponse instance
+    """
+    if not profile_data:
+        return UserProfileResponse(
+            user_id=user_id,
+            profile_content=None,
+            topics=None,
+            updated_at=None,
+        )
+    
+    updated_at = None
+    if "updated_at" in profile_data:
+        updated_at = _parse_datetime(profile_data["updated_at"])
+    
+    return UserProfileResponse(
+        user_id=user_id,
+        profile_content=profile_data.get("profile_content"),
+        topics=profile_data.get("topics"),
+        updated_at=updated_at,
+    )
+
+
+def _parse_datetime(value: Any) -> Optional[datetime]:
+    """
+    Parse datetime from various formats.
+    
+    Args:
+        value: Datetime value (str, datetime, or None)
+        
+    Returns:
+        datetime object or None
+    """
+    if value is None:
+        return None
+    
+    if isinstance(value, datetime):
+        return value
+    
+    if isinstance(value, str):
+        try:
+            # Try ISO format
+            return datetime.fromisoformat(value.replace("Z", "+00:00"))
+        except ValueError:
+            try:
+                # Try other common formats
+                return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+            except ValueError:
+                return None
+    
+    return None
diff --git a/src/server/utils/metrics.py b/src/server/utils/metrics.py
new file mode 100644
index 0000000..b8aa826
--- /dev/null
+++ b/src/server/utils/metrics.py
@@ -0,0 +1,162 @@
+"""
+Metrics collection for Prometheus format output
+"""
+
+import time
+import threading
+from typing import Dict, List, Tuple, Optional
+from collections import defaultdict
+
+
+class MetricsCollector:
+    """Thread-safe metrics collector for Prometheus format"""
+    
+    # Histogram buckets for request duration
+    DURATION_BUCKETS = [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, float('inf')]
+    
+    def __init__(self):
+        self._lock = threading.Lock()
+        # API request counters: (method, endpoint, status) -> count
+        self._api_request_counters: Dict[Tuple[str, str, str], int] = defaultdict(int)
+        # API request durations: (method, endpoint) -> list of durations
+        self._api_request_durations: Dict[Tuple[str, str], List[float]] = defaultdict(list)
+        # Memory operation counters: (operation, status) -> count
+        self._memory_operation_counters: Dict[Tuple[str, str], int] = defaultdict(int)
+        # Error counters: (error_type, endpoint) -> count
+        self._error_counters: Dict[Tuple[str, str], int] = defaultdict(int)
+        self._start_time = time.time()
+        self._max_duration_samples = 10000  # Keep last 10k samples per endpoint
+        
+    def record_api_request(self, method: str, endpoint: str, status_code: int, duration: float):
+        """Record an API request metric"""
+        with self._lock:
+            status = str(status_code)
+            key = (method, endpoint, status)
+            self._api_request_counters[key] += 1
+            
+            # Record duration for histogram
+            duration_key = (method, endpoint)
+            self._api_request_durations[duration_key].append(duration)
+            
+            # Keep only recent samples to avoid memory issues
+            if len(self._api_request_durations[duration_key]) > self._max_duration_samples:
+                self._api_request_durations[duration_key] = \
+                    self._api_request_durations[duration_key][-self._max_duration_samples:]
+    
+    def record_memory_operation(self, operation: str, status: str):
+        """Record a memory operation (create, search, etc.)"""
+        with self._lock:
+            key = (operation, status)
+            self._memory_operation_counters[key] += 1
+    
+    def record_error(self, error_type: str, endpoint: str):
+        """Record an error"""
+        with self._lock:
+            key = (error_type, endpoint)
+            self._error_counters[key] += 1
+    
+    def normalize_endpoint(self, path: str) -> str:
+        """Normalize path to endpoint format (remove IDs, etc.)"""
+        import re
+        # Replace UUIDs
+        path = re.sub(
+            r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
+            '{id}',
+            path,
+            flags=re.IGNORECASE
+        )
+        # Replace numeric IDs in path segments
+        path = re.sub(r'/\d+/', '/{id}/', path)
+        # Remove trailing ID if present
+        path = re.sub(r'/\d+$', '/{id}', path)
+        # Ensure it starts with /api/v1 if it's an API path
+        if not path.startswith('/api/v1'):
+            # Try to extract API path
+            if '/api/v1' in path:
+                path = '/api/v1' + path.split('/api/v1', 1)[1]
+        return path
+    
+    def _calculate_histogram_buckets(self, durations: List[float]) -> Dict[float, int]:
+        """Calculate histogram bucket counts"""
+        buckets = {bucket: 0 for bucket in self.DURATION_BUCKETS}
+        for duration in durations:
+            for bucket in self.DURATION_BUCKETS:
+                if duration <= bucket:
+                    buckets[bucket] += 1
+                    break
+        return buckets
+    
+    def get_metrics(self) -> str:
+        """Get metrics in Prometheus format"""
+        with self._lock:
+            lines = []
+            
+            # powermem_api_requests_total
+            lines.append("# HELP powermem_api_requests_total Total number of API requests")
+            lines.append("# TYPE powermem_api_requests_total counter")
+            for (method, endpoint, status), count in sorted(self._api_request_counters.items()):
+                lines.append(
+                    f'powermem_api_requests_total{{method="{method}",endpoint="{endpoint}",status="{status}"}} {count}'
+                )
+            lines.append("")
+            
+            # powermem_memory_operations_total
+            lines.append("# HELP powermem_memory_operations_total Total number of memory operations")
+            lines.append("# TYPE powermem_memory_operations_total counter")
+            for (operation, status), count in sorted(self._memory_operation_counters.items()):
+                lines.append(
+                    f'powermem_memory_operations_total{{operation="{operation}",status="{status}"}} {count}'
+                )
+            lines.append("")
+            
+            # powermem_api_request_duration_seconds (histogram)
+            lines.append("# HELP powermem_api_request_duration_seconds API request duration in seconds")
+            lines.append("# TYPE powermem_api_request_duration_seconds histogram")
+            for (method, endpoint), durations in sorted(self._api_request_durations.items()):
+                if durations:
+                    buckets = self._calculate_histogram_buckets(durations)
+                    # Output buckets
+                    for bucket in self.DURATION_BUCKETS:
+                        if bucket == float('inf'):
+                            # Use +Inf for the last bucket
+                            lines.append(
+                                f'powermem_api_request_duration_seconds_bucket{{method="{method}",endpoint="{endpoint}",le="+Inf"}} {buckets[bucket]}'
+                            )
+                        else:
+                            # Format bucket value: use .1f for values >= 0.1, .2f for smaller values
+                            if bucket >= 0.1:
+                                bucket_str = f"{bucket:.1f}"
+                            else:
+                                bucket_str = f"{bucket:.2f}"
+                            lines.append(
+                                f'powermem_api_request_duration_seconds_bucket{{method="{method}",endpoint="{endpoint}",le="{bucket_str}"}} {buckets[bucket]}'
+                            )
+                    # Output sum and count
+                    sum_duration = sum(durations)
+                    count = len(durations)
+                    lines.append(
+                        f'powermem_api_request_duration_seconds_sum{{method="{method}",endpoint="{endpoint}"}} {sum_duration:.6f}'
+                    )
+                    lines.append(
+                        f'powermem_api_request_duration_seconds_count{{method="{method}",endpoint="{endpoint}"}} {count}'
+                    )
+            lines.append("")
+            
+            # powermem_errors_total
+            lines.append("# HELP powermem_errors_total Total number of errors")
+            lines.append("# TYPE powermem_errors_total counter")
+            for (error_type, endpoint), count in sorted(self._error_counters.items()):
+                lines.append(
+                    f'powermem_errors_total{{error_type="{error_type}",endpoint="{endpoint}"}} {count}'
+                )
+            
+            return '\n'.join(lines) + '\n'
+
+
+# Global metrics collector instance
+_metrics_collector = MetricsCollector()
+
+
+def get_metrics_collector() -> MetricsCollector:
+    """Get the global metrics collector instance"""
+    return _metrics_collector
diff --git a/src/server/utils/validators.py b/src/server/utils/validators.py
new file mode 100644
index 0000000..47a7266
--- /dev/null
+++ b/src/server/utils/validators.py
@@ -0,0 +1,58 @@
+"""
+Validation utilities for PowerMem API
+"""
+
+from typing import Optional
+from ..models.errors import ErrorCode, APIError
+
+
+def validate_user_id(user_id: Optional[str]) -> Optional[str]:
+    """
+    Validate user ID format.
+    
+    Args:
+        user_id: User ID to validate
+        
+    Returns:
+        Validated user ID
+        
+    Raises:
+        APIError: If user ID is invalid
+    """
+    if user_id is None:
+        return None
+    
+    if not isinstance(user_id, str) or not user_id.strip():
+        raise APIError(
+            code=ErrorCode.INVALID_REQUEST,
+            message="Invalid user_id format",
+            status_code=400,
+        )
+    
+    return user_id.strip()
+
+
+def validate_agent_id(agent_id: Optional[str]) -> Optional[str]:
+    """
+    Validate agent ID format.
+    
+    Args:
+        agent_id: Agent ID to validate
+        
+    Returns:
+        Validated agent ID
+        
+    Raises:
+        APIError: If agent ID is invalid
+    """
+    if agent_id is None:
+        return None
+    
+    if not isinstance(agent_id, str) or not agent_id.strip():
+        raise APIError(
+            code=ErrorCode.INVALID_REQUEST,
+            message="Invalid agent_id format",
+            status_code=400,
+        )
+    
+    return agent_id.strip()