From 33cec84d170258fea7dfc918b5b7aa3277373ebe Mon Sep 17 00:00:00 2001 From: Ekaterina Sirazitdinova Date: Wed, 4 Jun 2025 22:29:35 +0400 Subject: [PATCH 1/5] Add chat-llama-nemotron example as a regular directory --- community/chat-llama-nemotron/.gitignore | 156 +++ community/chat-llama-nemotron/README.md | 135 ++ .../backend-dynamo/.gitignore | 43 + .../backend-dynamo/README.md | 118 ++ .../config/agg_llama_nemotron_4b.yaml | 36 + .../backend-dynamo/llm-proxy/README.md | 86 ++ .../backend-dynamo/llm-proxy/config.yaml | 33 + .../backend-dynamo/llm-proxy/proxy.py | 72 ++ .../backend-dynamo/llm-proxy/requirements.txt | 4 + .../backend-rag/.gitignore | 58 + .../chat-llama-nemotron/backend-rag/README.md | 73 ++ .../backend-rag/requirements.txt | 11 + .../backend-rag/src/app.py | 462 +++++++ .../backend-rag/src/config/app_config.yaml | 27 + .../backend-rag/src/config/config_loader.py | 44 + .../backend-rag/src/config/rag_config.yaml | 30 + .../backend-rag/src/rag_service.py | 417 +++++++ .../backend-rag/src/requirements.txt | 8 + .../chat-llama-nemotron/frontend/.gitignore | 28 + .../chat-llama-nemotron/frontend/README.md | 36 + .../chat-llama-nemotron/frontend/package.json | 50 + .../frontend/public/config/app_config.yaml | 78 ++ .../frontend/public/index.html | 100 ++ .../frontend/public/robots.txt | 3 + .../chat-llama-nemotron/frontend/src/App.css | 1089 +++++++++++++++++ .../chat-llama-nemotron/frontend/src/App.js | 610 +++++++++ .../frontend/src/components/FileIngestion.css | 198 +++ .../frontend/src/components/FileIngestion.js | 295 +++++ .../frontend/src/config/app_config.yaml | 32 + .../frontend/src/config/config_loader.js | 87 ++ .../frontend/src/index.css | 14 + .../chat-llama-nemotron/frontend/src/index.js | 11 + 32 files changed, 4444 insertions(+) create mode 100644 community/chat-llama-nemotron/.gitignore create mode 100644 community/chat-llama-nemotron/README.md create mode 100644 community/chat-llama-nemotron/backend-dynamo/.gitignore create mode 100644 community/chat-llama-nemotron/backend-dynamo/README.md create mode 100644 community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml create mode 100644 community/chat-llama-nemotron/backend-dynamo/llm-proxy/README.md create mode 100644 community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml create mode 100644 community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py create mode 100644 community/chat-llama-nemotron/backend-dynamo/llm-proxy/requirements.txt create mode 100644 community/chat-llama-nemotron/backend-rag/.gitignore create mode 100644 community/chat-llama-nemotron/backend-rag/README.md create mode 100644 community/chat-llama-nemotron/backend-rag/requirements.txt create mode 100644 community/chat-llama-nemotron/backend-rag/src/app.py create mode 100644 community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml create mode 100644 community/chat-llama-nemotron/backend-rag/src/config/config_loader.py create mode 100644 community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml create mode 100644 community/chat-llama-nemotron/backend-rag/src/rag_service.py create mode 100644 community/chat-llama-nemotron/backend-rag/src/requirements.txt create mode 100644 community/chat-llama-nemotron/frontend/.gitignore create mode 100644 community/chat-llama-nemotron/frontend/README.md create mode 100644 community/chat-llama-nemotron/frontend/package.json create mode 100644 community/chat-llama-nemotron/frontend/public/config/app_config.yaml create mode 100644 community/chat-llama-nemotron/frontend/public/index.html create mode 100644 community/chat-llama-nemotron/frontend/public/robots.txt create mode 100644 community/chat-llama-nemotron/frontend/src/App.css create mode 100644 community/chat-llama-nemotron/frontend/src/App.js create mode 100644 community/chat-llama-nemotron/frontend/src/components/FileIngestion.css create mode 100644 community/chat-llama-nemotron/frontend/src/components/FileIngestion.js create mode 100644 community/chat-llama-nemotron/frontend/src/config/app_config.yaml create mode 100644 community/chat-llama-nemotron/frontend/src/config/config_loader.js create mode 100644 community/chat-llama-nemotron/frontend/src/index.css create mode 100644 community/chat-llama-nemotron/frontend/src/index.js diff --git a/community/chat-llama-nemotron/.gitignore b/community/chat-llama-nemotron/.gitignore new file mode 100644 index 00000000..63f826dc --- /dev/null +++ b/community/chat-llama-nemotron/.gitignore @@ -0,0 +1,156 @@ +# Dependencies +node_modules/ +.pnp/ +.pnp.js +package-lock.json +yarn.lock + +# Testing +coverage/ +.nyc_output/ +test-results/ +junit.xml + +# Production +build/ +dist/ +out/ +.next/ +.nuxt/ +.cache/ +.output/ + +# Environment files +.env +.env.* +!.env.example +.env.local +.env.development.local +.env.test.local +.env.production.local +.env*.local +*.env + +# Logs +npm-debug.log* +yarn-debug.log* +yarn-error.log* +logs/ +*.log +debug.log +error.log + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*.sublime-workspace +*.sublime-project +.project +.classpath +.settings/ +*.code-workspace + +# OS +.DS_Store +Thumbs.db +desktop.ini +$RECYCLE.BIN/ +*.lnk + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.env/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.python-version +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +dist/ +build/ +eggs/ +parts/ +bin/ +var/ +sdist/ +develop-eggs/ +.installed.cfg +lib/ +lib64/ + +# RAG specific +data/ +embeddings/ +*.faiss +*.pkl +*.bin +*.vec +*.model +*.index +chunks/ +documents/ +vectors/ +corpus/ +indexes/ + +# Temporary files +*.tmp +*.temp +*.bak +*.swp +*~ +*.swx +*.swo +*.swn +*.bak +*.orig +*.rej +*.patch +*.diff + +# Build artifacts +*.min.js +*.min.css +*.map +*.gz +*.br +*.zip +*.tar +*.tar.gz +*.tgz +*.rar +*.7z + +# Debug +.debug/ +debug/ +debug.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Local development +.local/ +local/ +local.* \ No newline at end of file diff --git a/community/chat-llama-nemotron/README.md b/community/chat-llama-nemotron/README.md new file mode 100644 index 00000000..c4c5bc22 --- /dev/null +++ b/community/chat-llama-nemotron/README.md @@ -0,0 +1,135 @@ +# Chat with Llama-3.1-Nemotron-Nano-4B-v1.1 + +A React-based chat interface for interacting with an LLM, featuring RAG (Retrieval-Augmented Generation) capabilities and NVIDIA Dynamo backend serving NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1. + +## Project Structure + +``` +. +├── frontend/ # React frontend application +├── backend-rag/ # RAG service backend +└── backend-dynamo/ # NVIDIA Dynamo backend service + └── llm-proxy/ # Proxy server for NVIDIA Dynamo +``` + +## Prerequisites + +- Node.js 18 or higher +- Python 3.8 or higher +- NVIDIA GPU with CUDA support (for LLM serving with NVIDIA Dynamo) +- Docker (optional, for containerized deployment) +- Git + +## Configuration + +### Frontend + +The frontend configuration is managed through YAML files in `frontend/public/config/`: + +- `app_config.yaml`: Main application configuration: + - API endpoints + - UI settings + - File upload settings + +See [frontend/README.md](frontend/README.md) + +### Backend + +Each service has its own configuration files: + +- RAG backend: see [backend-rag/README.md](backend-rag/README.md) +- LLM Proxy: see [backend-dynamo/llm-proxy/README.md](backend-dynamo/llm-proxy/README.md) +- DynamoDB backend: see [backend-dynamo/README.md](backend-dynamo/README.md) + + +## Setup + +### Llama-3.1-Nemotron-Nano-4B-v1.1 running on a GPU Server + +This step should be performed on a machine with a GPU. + +Set NVIDIA Dynamo backend running Llama-3.1-Nemotron-Nano-4B-v1.1 following the instruction [backend-dynamo/README.md](backend-dynamo/README.md). + +### Local client with a local RAG database + +These steps can be performed locally and don't require a GPU. + +1. Clone the repository: + ```bash + git clone + cd react-llama-client + ``` + +2. Install frontend dependencies: + ```bash + cd frontend + npm install + ``` + +3. Set up backend services: + + For Unix/macOS: + ```bash + # RAG Backend + cd backend-rag + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + + # LLM Proxy + cd backend-dynamo/llm-proxy + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + ``` + + For Windows: + ```bash + # RAG Backend + cd backend-rag + python -m venv venv + .\venv\Scripts\activate + pip install -r requirements.txt + + # LLM Proxy + cd backend-dynamo\llm-proxy + python -m venv venv + .\venv\Scripts\activate + pip install -r requirements.txt + ``` + +4. Start the services (each in a new terminal): + + For Unix/macOS: + ```bash + # Start frontend (in frontend directory) + cd frontend + npm start + + # Start RAG backend (in backend-rag directory) + cd backend-rag + source venv/bin/activate + python src/app.py + + # Start LLM proxy (in backend-dynamo/llm-proxy directory) + cd backend-dynamo/llm-proxy + source venv/bin/activate + python proxy.py + ``` + + For Windows: + ```bash + # Start frontend (in frontend directory) + cd frontend + npm start + + # Start RAG backend (in backend-rag directory) + cd backend-rag + .\venv\Scripts\activate + python src\app.py + + # Start LLM proxy (in backend-dynamo\llm-proxy directory) + cd backend-dynamo\llm-proxy + .\venv\Scripts\activate + python proxy.py + ``` diff --git a/community/chat-llama-nemotron/backend-dynamo/.gitignore b/community/chat-llama-nemotron/backend-dynamo/.gitignore new file mode 100644 index 00000000..a41f1e0e --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/.gitignore @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.env/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Logs +logs/ +*.log + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Environment variables +.env +.env.local +.env.*.local + +# AWS +.aws/ +aws.json +credentials.json \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/README.md b/community/chat-llama-nemotron/backend-dynamo/README.md new file mode 100644 index 00000000..87778383 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/README.md @@ -0,0 +1,118 @@ +# NVIDIA Dynamo Backend Service + +This is the [NVIDIA Dynamo](https://github.com/ai-dynamo/dynamo) backend service for the chat application. It provides the core LLM capabilities using [NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1). + +**NVIDIA Dynamo** is an open-source high-throughput low-latency inference framework designed for serving generative AI and reasoning models in multi-node distributed environments. Dynamo is designed to be inference engine agnostic (supports TRT-LLM, vLLM, SGLang or others). + +**NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1** is a large language model (LLM) which is a derivative of [nvidia/Llama-3.1-Minitron-4B-Width-Base](https://huggingface.co/nvidia/Llama-3.1-Minitron-4B-Width-Base), which is created from Llama 3.1 8B using the [LLM compression technique](https://arxiv.org/abs/2408.11796) by NVIDIA and offers improvements in model accuracy and efficiency. It is a reasoning model that is post trained for reasoning, human chat preferences, and tasks, such as RAG and tool calling. + +## Prerequisites + +The LLM Llama-3.1-Nemotron-Nano-4B-v1.1 requires a GPU. Make sure to execute this server application on either a local or a remote workstation featuring an NVIDIA GPU instance. It does not have to be on the same device as the the rag-backedn and the frontend. + +- Ubuntu 24.02 (preferred) +- Python 3.12 or higher +- CUDA 12.8 or higher +- CUDA driver 545.23.08 or higher +- Docker 23.x or higher +- Rust 1.86.0 + +## Setup + +Building the Dynamo Base Image. + +```bash +# Assuming you have alreday cloned this demo repository. Make sure you're in the backend-dynamo directory +cd backend-dynamo + +# Get the source code for Dynamo. +git clone https://github.com/ai-dynamo/dynamo.git + +# Switch to the specific state to ensure compatibility +git checkout 14e1d446323266ebc1f14f7569a9b7cddb52d36c + +cd dynamo + +# Build container for Dynamo serve with VLLM support + +# On an x86 machine +./container/build.sh --framework vllm + +# On an ARM machine (ex: GB200) +# ./container/build.sh --framework vllm --platform linux/arm64 +``` + +## Configuration + +Model configuration for NVIDIA Dymano can be found in `backend-dynamo/config`. + +## Running the Service + +Firstly, check that no other containers are running. We want to make sure that there is no resiurce concurency. + +```bash +docker ps + +# Close any running containers +docker stop +``` + +After that, we need to run the services (etcd and NATS) using Docker Compose. + +```bash +docker compose -f deploy/metrics/docker-compose.yml up -d +``` + +Then, let's execute the container which we have built. + +```bash +# Allow port so that remote client can discover it. Dynamo will be using port 8000 +sudo ufw allow 8000 + +# Execute the container +./container/run.sh --gpus all -it --framework vllm -v "$(pwd)/../config:/workspace/examples/llm/configs" +``` + +Once the container has started, inside the container, let's start our server. + +```bash +# Navigate to the directory with example scripts +cd examples/llm + +# Start the service +dynamo serve graphs.agg:Frontend -f configs/agg_llama_nemotron_4b.yaml +``` + +## NVIDIA Dynamo API Endpoints + +NVIDIA Dynamo supports the following API endpoints: + +- **POST `/v1/completions`** — Generate text completions from a prompt. +- **POST `/v1/embeddings`** — Get vector embeddings for input text. +- **POST `/v1/models`** — Manage or load models (implementation-dependent). +- **GET `/v1/models`** — List available models. +- **POST `/v1/tokenizer`** — Tokenize or detokenize text. +- **POST `/v1/images/generations`** — Generate images from text prompts. +- **POST `/v1/audio/transcriptions`** — Transcribe audio to text. +- **POST `/v1/audio/translations`** — Translate audio to text in another language. + +In this demo we are using **POST `/v1/completions`**. + +## Troubleshooting + +Common issues and solutions: + +1. CUDA/GPU issues: + - Verify CUDA installation: `nvidia-smi` + - Check GPU memory availability + - Ensure correct CUDA version is installed + +2. Model loading issues: + - Verify model files are present + - Check model path configuration + - Ensure sufficient disk space + +3. Performance issues: + - Monitor GPU utilization + - Check batch size settings + - Verify memory allocation diff --git a/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml b/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml new file mode 100644 index 00000000..f0596d54 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the8B Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +Common: + model: nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1 + max-model-len: 131072 + +Frontend: + served_model_name: nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1 + endpoint: dynamo.Processor.chat/completions + port: 8000 + +Processor: + router: round-robin + common-configs: [model, max-model-len] + + +VllmWorker: + router: random + tensor-parallel-size: 1 + ServiceArgs: + workers: 1 + resources: + gpu: 1 + common-configs: [model, max-model-len] \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/README.md b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/README.md new file mode 100644 index 00000000..d82d7c02 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/README.md @@ -0,0 +1,86 @@ +# NVIDIA Dynamo Proxy + +A lightweight proxy server for the NVIDIA Dynamo LLM server that handles CORS and request forwarding. + +## Prerequisites + +- Python 3.8 or higher +- Network access to NVIDIA Dynamo server + +## Setup + +For Unix/macOS: +```bash +# Create and activate virtual environment +python3 -m venv venv +source venv/bin/activate + +# Install dependencies +pip install -r requirements.txt +``` + +For Windows: +```bash +# Create and activate virtual environment +python -m venv venv +.\venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt +``` + +## Configuration + +The proxy can be configured through configuration File (`config.yaml`): +```yaml +# NVIDIA Dynamo Server Configuration +llm: + # IP will be provided by frontend via X-LLM-IP header + port: "8000" + +# Proxy Configuration +proxy: + port: "8002" + cors: + allow_origins: ["http://localhost:3000"] + allow_credentials: true + allow_methods: ["*"] + allow_headers: ["*", "X-LLM-IP"] + +# Server configuration +server: + # IP will be provided by environment variable or user input + port: 8002 +``` + +## Running the Proxy + +For Unix/macOS: +```bash +# Make sure you're in the llm-proxy directory +cd backend-dynamo/llm-proxy + +# Activate the virtual environment if not already activated +source venv/bin/activate + +# Start the proxy server +python proxy.py +``` + +For Windows: +```bash +# Make sure you're in the llm-proxy directory +cd backend-dynamo\llm-proxy + +# Activate the virtual environment if not already activated +.\venv\Scripts\activate + +# Start the proxy server +python proxy.py +``` + +The proxy will start on http://localhost:8003 by default. + +## API Endpoints + +- `POST /v1/chat/completions`: Forwards chat completion requests to the NVIDIA Dynamo server \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml new file mode 100644 index 00000000..248f2366 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml @@ -0,0 +1,33 @@ +# NVIDIA Dynamo Proxy Configuration + +# LLM Server Configuration +llm: + # IP will be provided by frontend via X-LLM-IP header + port: "8000" + timeout: 30 # Request timeout in seconds + +# Proxy Server Configuration +proxy: + port: "8002" + host: "0.0.0.0" # Listen on all interfaces + log_level: "info" + + # CORS Configuration + cors: + allow_origins: + - "http://localhost:3000" # React development server + - "http://127.0.0.1:3000" # Alternative localhost + allow_credentials: true + allow_methods: ["GET", "POST", "OPTIONS"] + allow_headers: ["*", "X-LLM-IP", "Content-Type"] + +# Logging Configuration +logging: + level: "INFO" + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + file: "proxy.log" # Optional: log to file + +# Server configuration +server: + # IP will be provided by environment variable or user input + port: 8002 \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py new file mode 100644 index 00000000..eb47bf66 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py @@ -0,0 +1,72 @@ +""" +Simple NVIDIA Dynamo Proxy Server + +A lightweight proxy server that forwards chat completion requests to NVIDIA Dynamo LLM server. +""" + +from fastapi import FastAPI, HTTPException, Header +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from typing import List, Dict, Optional +import httpx +import logging +import yaml +from pathlib import Path + +# Basic logging setup +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Load configuration +def load_config(): + config_path = Path(__file__).parent / "config.yaml" + with open(config_path, 'r') as f: + return yaml.safe_load(f) + +config = load_config() + +app = FastAPI(title="NVIDIA Dynamo Proxy") + +# Configure CORS +app.add_middleware( + CORSMiddleware, + allow_origins=config["proxy"]["cors"]["allow_origins"], + allow_credentials=config["proxy"]["cors"]["allow_credentials"], + allow_methods=config["proxy"]["cors"]["allow_methods"], + allow_headers=config["proxy"]["cors"]["allow_headers"] +) + +# Initialize HTTP client +http_client = httpx.AsyncClient() + +class ChatCompletionRequest(BaseModel): + model: str + messages: List[Dict[str, str]] + stream: bool = False + max_tokens: Optional[int] = None + +@app.post("/v1/chat/completions") +async def proxy_chat_completions(request: ChatCompletionRequest, x_llm_ip: str = Header(None)): + """Forward chat completion requests to NVIDIA Dynamo server""" + if not x_llm_ip: + raise HTTPException(status_code=400, detail="X-LLM-IP header is required") + + try: + # Forward request to NVIDIA Dynamo server + response = await http_client.post( + f"http://{x_llm_ip}:{config['llm']['port']}/v1/chat/completions", + json=request.dict(), + timeout=config['llm']['timeout'] + ) + return response.json() + except Exception as e: + logger.error(f"Error: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run( + app, + host=config["proxy"]["host"], + port=int(config["proxy"]["port"]) + ) \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/requirements.txt b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/requirements.txt new file mode 100644 index 00000000..c0191a27 --- /dev/null +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/requirements.txt @@ -0,0 +1,4 @@ +fastapi==0.104.1 +uvicorn==0.24.0 +httpx==0.25.1 +pyyaml==6.0.1 \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/.gitignore b/community/chat-llama-nemotron/backend-rag/.gitignore new file mode 100644 index 00000000..de5d5903 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/.gitignore @@ -0,0 +1,58 @@ +# Python +__pycache__/ +/src/__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.env/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# RAG specific +storage/ +documents/ +index/ +embeddings/ +*.faiss +*.pkl +*.bin +*.vec +*.model +*.index +chunks/ +vectors/ +corpus/ +indexes/ +/src/index/* +/src/index/documents.json +/storage/* + +# Logs +logs/ +*.log + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Environment variables +.env +.env.local +.env.*.local \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/README.md b/community/chat-llama-nemotron/backend-rag/README.md new file mode 100644 index 00000000..4b53dc62 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/README.md @@ -0,0 +1,73 @@ +# RAG Backend Service + +This is the RAG (Retrieval-Augmented Generation) backend service for the chat application. It provides APIs for document processing, search, and chat functionality using FAISS for vector similarity search and Sentence Transformers for text embeddings. + +## Prerequisites + +- Python 3.8 or higher +- NVIDIA GPU with CUDA support (optional, for faster processing) +- Sufficient disk space for document storage and vector indices + +## Setup + +For Unix/macOS: +```bash +# Create and activate virtual environment +python -m venv venv +source venv/bin/activate + +# Install dependencies +pip install -r requirements.txt +``` + +For Windows: +```bash +# Create and activate virtual environment +python -m venv venv +.\venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt +``` + +## Configuration + +The service can be configured through the configuration files: + +- `src/config/app_config.yaml` +- `src/config/rag_config.yaml` + + +## Running the Service + +For Unix/macOS: +```bash +# Make sure you're in the backend-rag directory +cd backend-rag + +# Activate the virtual environment if not already activated +source venv/bin/activate + +# Start the server +python src/app.py +``` + +For Windows: +```bash +# Make sure you're in the backend-rag directory +cd backend-rag + +# Activate the virtual environment if not already activated +.\venv\Scripts\activate + +# Start the server +python src\app.py +``` + +## Architecture + +This service implements a RAG (Retrieval-Augmented Generation) system that: +1. Processes and chunks documents +2. Generates embeddings +3. Stores vectors in a FAISS index +4. Provides semantic search capabilities \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/requirements.txt b/community/chat-llama-nemotron/backend-rag/requirements.txt new file mode 100644 index 00000000..85fa9dc7 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.104.1 +uvicorn==0.24.0 +python-multipart==0.0.6 +sentence-transformers==2.2.2 +faiss-cpu==1.7.4 +PyPDF2==3.0.1 +python-docx==1.0.1 +beautifulsoup4==4.12.2 +markdown==3.5.1 +tqdm==4.66.1 +PyYAML==6.0.1 \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/app.py b/community/chat-llama-nemotron/backend-rag/src/app.py new file mode 100644 index 00000000..3bf30ec7 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/app.py @@ -0,0 +1,462 @@ +from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from typing import List, Dict, Any, Optional +from rag_service import RAGService +import os +from pathlib import Path +import PyPDF2 +import io +import logging +import json +from fastapi.responses import StreamingResponse, FileResponse +from queue import Queue +import threading +import uuid +from datetime import datetime +import docx +from bs4 import BeautifulSoup +from config.config_loader import config_loader + +# Load configurations +logger = logging.getLogger(__name__) +logger.info("Loading configurations for FastAPI app...") +app_config = config_loader.get_app_config() +rag_config = config_loader.get_rag_config() +logger.info(f"Loaded app config: {app_config}") +logger.info(f"Loaded RAG config: {rag_config}") + +# Configure logging +logging.basicConfig( + level=getattr(logging, rag_config['logging']['level']), + format=rag_config['logging']['format'] +) + +app = FastAPI( + title=app_config['app']['name'], + version=app_config['app']['version'] +) + +logger.info(f"Initialized FastAPI app with title: {app_config['app']['name']}, version: {app_config['app']['version']}") + +# Configure CORS +app.add_middleware( + CORSMiddleware, + allow_origins=app_config['cors']['allow_origins'], + allow_credentials=app_config['cors']['allow_credentials'], + allow_methods=app_config['cors']['allow_methods'], + allow_headers=app_config['cors']['allow_headers'], + expose_headers=app_config['cors']['expose_headers'] +) + +logger.info(f"Configured CORS with origins: {app_config['cors']['allow_origins']}") + +# Initialize RAG service and index directory +INDEX_DIR = Path(app_config['storage']['index_dir']) +STORAGE_DIR = Path(app_config['storage']['documents_dir']) + +# Create directories if they don't exist +INDEX_DIR.mkdir(parents=True, exist_ok=True) +STORAGE_DIR.mkdir(parents=True, exist_ok=True) + +# Initialize RAG service +rag_service = RAGService() + +# Initialize job queues dictionary +job_queues: Dict[str, Queue] = {} + +# Load or create RAG index +try: + if INDEX_DIR.exists() and any(INDEX_DIR.iterdir()): + rag_service.load_index(INDEX_DIR) + logger.info("Loaded existing RAG index") + else: + rag_service.create_index() + rag_service.save_index(INDEX_DIR) + logger.info("Created new RAG index") +except Exception as e: + logger.error(f"Error initializing RAG index: {str(e)}") + rag_service.create_index() + rag_service.save_index(INDEX_DIR) + logger.info("Created new RAG index after error") + +# Supported file types and their extensions +SUPPORTED_EXTENSIONS = app_config['supported_files'] +logger.info(f"Configured supported file types: {SUPPORTED_EXTENSIONS}") + +def get_file_extension(content_type: str) -> str: + """Get file extension from content type""" + return SUPPORTED_EXTENSIONS.get(content_type, '') + +def is_supported_file(content_type: str) -> bool: + """Check if file type is supported""" + return content_type in SUPPORTED_EXTENSIONS + +async def process_pdf(content: bytes) -> List[Dict[str, Any]]: + """Process PDF file and extract text with metadata""" + pdf_file = io.BytesIO(content) + pdf_reader = PyPDF2.PdfReader(pdf_file) + text = "" + + for page in pdf_reader.pages: + text += page.extract_text() + "\n" + + return [{ + "text": text, + "chunk_type": "pdf" + }] + +async def process_text(content: bytes) -> List[Dict[str, Any]]: + """Process text file and extract content""" + text = content.decode('utf-8') + return [{ + "text": text, + "chunk_type": "text_file" + }] + +async def process_markdown(content: bytes) -> List[Dict[str, Any]]: + """Process markdown file and extract content""" + text = content.decode('utf-8') + return [{ + "text": text, + "chunk_type": "markdown" + }] + +async def process_docx(content: bytes) -> List[Dict[str, Any]]: + """Process Word document and extract content""" + docx_file = io.BytesIO(content) + doc = docx.Document(docx_file) + text = "\n".join(para.text for para in doc.paragraphs if para.text.strip()) + return [{ + "text": text, + "chunk_type": "docx" + }] + +async def process_html(content: bytes) -> List[Dict[str, Any]]: + """Process HTML file and extract content""" + html = content.decode('utf-8') + soup = BeautifulSoup(html, 'html.parser') + text = "\n".join(section.text.strip() for section in soup.find_all(['h1', 'h2', 'h3', 'p']) if section.text.strip()) + return [{ + "text": text, + "chunk_type": "html" + }] + +async def process_file_content(content: bytes, content_type: str) -> List[Dict[str, Any]]: + """Process file content based on its type""" + processors = { + 'application/pdf': process_pdf, + 'text/plain': process_text, + 'text/markdown': process_markdown, + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': process_docx, + 'text/html': process_html + } + + processor = processors.get(content_type) + if not processor: + raise HTTPException(status_code=400, detail=f"Unsupported file type: {content_type}") + + return await processor(content) + +def save_file_bytes(file_bytes: bytes, file_id: str, extension: str): + file_path = os.path.join(STORAGE_DIR, f"{file_id}{extension}") + with open(file_path, "wb") as buffer: + buffer.write(file_bytes) + return file_path + +@app.get("/") +async def root(): + return {"message": "Backend server is running"} + +@app.get("/api/rag-status") +async def get_rag_status(): + """Get the current status of the RAG index""" + try: + count = rag_service.get_document_count() + return { + "document_count": count, + "is_empty": count == 0 + } + except Exception as e: + logger.error(f"Error getting RAG status: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/clear-rag") +async def clear_rag(): + """Clear RAG index and remove all stored files""" + try: + # Get counts before clearing + deleted_chunks = rag_service.get_document_count() + deleted_documents = rag_service.get_unique_document_count() + + # Clear RAG index + rag_service.clear() + rag_service.save_index(INDEX_DIR) + + # Clear stored files + if STORAGE_DIR.exists(): + for file in STORAGE_DIR.iterdir(): + if file.is_file(): + file.unlink() + + logger.info(f"Successfully cleared RAG index. Deleted {deleted_chunks} chunks and {deleted_documents} documents.") + return { + "message": f"Successfully cleared RAG index. Deleted {deleted_chunks} chunks and {deleted_documents} documents.", + "deleted_chunks": deleted_chunks, + "deleted_documents": deleted_documents + } + except Exception as e: + logger.error(f"Error clearing RAG index: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/upload") +async def upload_files(files: List[UploadFile] = File(...), background_tasks: BackgroundTasks = None): + try: + logger.info(f"Received upload request for {len(files)} files") + job_id = str(uuid.uuid4()) + job_queue = Queue() + job_queues[job_id] = job_queue + + processed_chunks = [] + saved_files = [] + chunks_metadata = [] + + for file in files: + logger.info(f"Processing file: {file.filename} (type: {file.content_type})") + + if not is_supported_file(file.content_type): + logger.warning(f"Unsupported file type: {file.content_type}") + raise HTTPException(status_code=400, detail=f"Unsupported file type: {file.content_type}") + + file_id = None + extension = None + + try: + file_id = str(uuid.uuid4()) + extension = get_file_extension(file.content_type) + file_bytes = await file.read() # Read once + + # Save the file + file_path = save_file_bytes(file_bytes, file_id, extension) + saved_files.append((file_id, extension)) + logger.info(f"Saved file with ID: {file_id}") + + # Process file content + chunks = await process_file_content(file_bytes, file.content_type) + + if not chunks: + logger.warning(f"No content extracted from file: {file.filename}") + # Remove the saved file if no content was extracted + if file_path and os.path.exists(file_path): + os.remove(file_path) + raise HTTPException(status_code=400, detail=f"No content could be extracted from file: {file.filename}") + + # Process each chunk + for chunk in chunks: + chunk_id = str(uuid.uuid4()) + source_file = f"{file_id}{extension}" + chunk_metadata = { + "chunk_id": chunk_id, + "text": chunk["text"], + "source_file": source_file, + "chunk_type": chunk["chunk_type"], + "page_number": chunk.get("page_number", 1), + "upload_time": datetime.now().isoformat() + } + + chunks_metadata.append(chunk_metadata) + processed_chunks.append(chunk["text"]) + + logger.info(f"Successfully processed file: {file.filename} with {len(chunks)} chunks") + + except Exception as e: + logger.error(f"Error processing file {file.filename}: {str(e)}") + # Remove the saved file if processing failed + if file_id and extension: + file_path = os.path.join(STORAGE_DIR, f"{file_id}{extension}") + if os.path.exists(file_path): + os.remove(file_path) + raise HTTPException(status_code=400, detail=f"Error processing file {file.filename}: {str(e)}") + + if not processed_chunks: + raise HTTPException(status_code=400, detail="No valid content found in any of the uploaded files") + + logger.info(f"Starting background processing for {len(processed_chunks)} chunks from {len(saved_files)} files") + # Start processing in a background thread + thread = threading.Thread(target=process_documents, args=(processed_chunks, chunks_metadata, job_queue)) + thread.start() + logger.info(f"Background processing started with job_id: {job_id}") + + return { + "job_id": job_id, + "saved_files": [f"{fid}{ext}" for fid, ext in saved_files], + "message": f"Successfully saved {len(saved_files)} files and started processing" + } + + except HTTPException as he: + logger.error(f"HTTP error in upload endpoint: {str(he)}") + raise he + except Exception as e: + logger.error(f"Unexpected error in upload endpoint: {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + +@app.get("/api/upload/progress/{job_id}") +async def upload_progress(job_id: str): + return StreamingResponse(progress_generator(job_id), media_type="text/event-stream") + +class SearchRequest(BaseModel): + query: str + k: Optional[int] = None # Will use rag_config['search']['default_k'] if not provided + use_rag: bool = rag_config['search']['use_rag'] + +@app.post("/api/search") +async def search(request: SearchRequest): + try: + logger.info(f"Search request received: {request.query} (use_rag: {request.use_rag})") + if request.use_rag: + results = rag_service.search(request.query, request.k) + else: + # If RAG is disabled, return an empty result + results = [] + return {"results": results} + except Exception as e: + logger.error(f"Error in search: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + +async def progress_generator(job_id: str): + """Generate progress updates for SSE""" + job_queue = job_queues.get(job_id) + if not job_queue: + yield f"data: {json.dumps({'type': 'error', 'message': 'Invalid job id'})}\n\n" + return + + try: + while True: + progress = job_queue.get() + yield f"data: {json.dumps(progress)}\n\n" + + # Break the loop on completion or error + if progress["type"] in ["complete", "error"]: + # Clean up the job queue + del job_queues[job_id] + break + except Exception as e: + logger.error(f"Error in progress generator: {str(e)}") + yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" + if job_id in job_queues: + del job_queues[job_id] + +def process_documents(documents: List[str], metadata: List[Dict[str, Any]], job_queue: Queue): + """Process documents in background with progress updates""" + try: + total_docs = len(documents) + job_queue.put({ + "type": "processing", + "stage": "start", + "progress_percent": 0, + "message": "Starting document processing..." + }) + + # Add documents with progress tracking + for i, (doc, meta) in enumerate(zip(documents, metadata), 1): + # Calculate progress percentage + progress_percent = int((i - 1) / total_docs * 100) + + # Update progress before processing each document + job_queue.put({ + "type": "processing", + "stage": "progress", + "progress_percent": progress_percent, + "message": f"Processing document {i} of {total_docs}..." + }) + + # Process the document + rag_service.add_documents([doc], [meta]) + + # Update progress after processing + progress_percent = int(i / total_docs * 100) + job_queue.put({ + "type": "processing", + "stage": "progress", + "progress_percent": progress_percent, + "message": f"Processed document {i} of {total_docs}" + }) + + # Save the index + job_queue.put({ + "type": "processing", + "stage": "progress", + "progress_percent": 95, + "message": "Saving index..." + }) + rag_service.save_index(INDEX_DIR) + + # Send completion + job_queue.put({ + "type": "complete", + "message": "Processing complete", + "progress_percent": 100, + "stage": "complete" + }) + + except Exception as e: + logger.error(f"Error in process_documents: {str(e)}") + job_queue.put({ + "type": "error", + "message": str(e) + }) + # Ensure the queue is cleaned up even on error + if job_id in job_queues: + del job_queues[job_id] + +@app.get("/api/document/{file_id}") +async def get_document(file_id: str): + """Serve a stored document for viewing in browser""" + try: + # Validate file_id + if not file_id or not file_id.strip(): + raise HTTPException(status_code=400, detail="Invalid file ID") + + # Check if file exists + file_path = os.path.join(STORAGE_DIR, file_id) + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail=f"Document not found: {file_id}") + + # Check if it's a file (not a directory) + if not os.path.isfile(file_path): + raise HTTPException(status_code=400, detail="Invalid document path") + + # Check file size + file_size = os.path.getsize(file_path) + if file_size == 0: + raise HTTPException(status_code=400, detail="Document is empty") + + # Determine content type based on file extension + content_type = "application/pdf" # Default to PDF + if file_id.endswith('.txt'): + content_type = "text/plain" + elif file_id.endswith('.md'): + content_type = "text/markdown" + elif file_id.endswith('.docx'): + content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + elif file_id.endswith('.html'): + content_type = "text/html" + + return FileResponse( + file_path, + media_type=content_type, + filename=file_id, + headers={ + "Content-Disposition": f"inline; filename={file_id}" + } + ) + except HTTPException as he: + raise he + except Exception as e: + logger.error(f"Error serving document {file_id}: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error serving document: {str(e)}") + +if __name__ == "__main__": + import uvicorn + logger.info("Starting server...") + uvicorn.run(app, host="0.0.0.0", port=8001) \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml b/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml new file mode 100644 index 00000000..bf06e68f --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml @@ -0,0 +1,27 @@ +# FastAPI Application Configuration +app: + name: "RAG Service API" + version: "1.0.0" + debug: true + +# CORS Configuration +cors: + allow_origins: + - "http://localhost:3000" # Frontend origin + allow_credentials: true + allow_methods: ["*"] + allow_headers: ["*"] + expose_headers: ["*"] + +# File Storage Configuration +storage: + documents_dir: "storage/documents" + index_dir: "index" + +# Supported File Types +supported_files: + application/pdf: .pdf + text/plain: .txt + text/markdown: .md + application/vnd.openxmlformats-officedocument.wordprocessingml.document: .docx + text/html: .html \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py b/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py new file mode 100644 index 00000000..b361d8b4 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py @@ -0,0 +1,44 @@ +import yaml +import os +from pathlib import Path +from typing import Dict, Any +import logging + +# Configure logging +logging.basicConfig(level=logging.ERROR) # Only log errors in production +logger = logging.getLogger(__name__) + +class ConfigLoader: + def __init__(self, config_dir: str = None): + """Initialize the config loader with the config directory path""" + if config_dir is None: + config_dir = os.path.dirname(os.path.abspath(__file__)) + self.config_dir = Path(config_dir) + self.configs: Dict[str, Dict[str, Any]] = {} + + def load_config(self, config_name: str) -> Dict[str, Any]: + """Load a specific configuration file""" + if config_name in self.configs: + return self.configs[config_name] + + config_path = self.config_dir / f"{config_name}.yaml" + + if not config_path.exists(): + logger.error(f"Configuration file not found: {config_path}") + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + self.configs[config_name] = config + return config + + def get_app_config(self) -> Dict[str, Any]: + """Get the application configuration""" + return self.load_config('app_config') + + def get_rag_config(self) -> Dict[str, Any]: + """Get the RAG service configuration""" + return self.load_config('rag_config') + +# Create a singleton instance +config_loader = ConfigLoader() \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml b/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml new file mode 100644 index 00000000..6fda16a3 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml @@ -0,0 +1,30 @@ +# RAG Service Configuration +model: + name: "all-MiniLM-L6-v2" + dimension: 384 # dimension of the embeddings + +# Text Processing Configuration +text_processing: + chunk_size: 6000 + chunk_overlap: 500 + min_chunk_words: 10 # Minimum number of words in a chunk to be considered valid + +# Processing Configuration +processing: + batch_size: 32 # number of chunks to process in each batch + max_workers: 4 # maximum number of parallel workers for processing + use_gpu: false # whether to use GPU if available + +# Search Configuration +search: + default_k: 5 # Default number of results to return + use_rag: true # Whether to use RAG by default + +# Index Configuration +index: + type: "faiss" # Type of index to use + +# Logging Configuration +logging: + level: "INFO" + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/rag_service.py b/community/chat-llama-nemotron/backend-rag/src/rag_service.py new file mode 100644 index 00000000..3f3de097 --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/rag_service.py @@ -0,0 +1,417 @@ +from typing import List, Dict, Any +import faiss +import numpy as np +from sentence_transformers import SentenceTransformer +import json +from pathlib import Path +import logging +import re +from multiprocessing import Queue +from config.config_loader import config_loader +import torch +from langchain.text_splitter import RecursiveCharacterTextSplitter + +# Load configurations +logger = logging.getLogger(__name__) +logger.info("Loading configurations for RAG service...") +rag_config = config_loader.get_rag_config() +app_config = config_loader.get_app_config() +logger.info(f"Loaded RAG config: {rag_config}") +logger.info(f"Loaded app config: {app_config}") + +# Configure logging +logging.basicConfig( + level=getattr(logging, rag_config['logging']['level']), + format=rag_config['logging']['format'] +) + +class RAGService: + def __init__(self): + """Initialize RAG service with configuration from YAML""" + model_config = rag_config['model'] + text_config = rag_config['text_processing'] + processing_config = rag_config['processing'] + search_config = rag_config['search'] + + logger.info(f"Initializing RAG service with model: {model_config['name']}") + logger.info(f"Using model dimension: {model_config['dimension']}") + logger.info(f"Using chunk size: {text_config['chunk_size']}, overlap: {text_config['chunk_overlap']}") + logger.info(f"Using batch size: {processing_config['batch_size']}, max workers: {processing_config['max_workers']}") + + # Initialize the model with proper configuration + self.model = SentenceTransformer( + model_config['name'], + device='cuda' if torch.cuda.is_available() else 'cpu' + ) + self.query_instruction = search_config.get('query_instruction', "Represent this sentence for searching relevant passages: ") + + self.index = None + self.documents = [] + self.metadata = [] # Store metadata for each document + self.dimension = model_config['dimension'] + self.chunk_size = text_config['chunk_size'] + self.chunk_overlap = text_config['chunk_overlap'] + self.min_chunk_words = text_config['min_chunk_words'] + self.batch_size = processing_config['batch_size'] # Get batch size from config + self.max_workers = processing_config['max_workers'] # Get max workers from config + self.search_multiplier = search_config.get('deduplication_multiplier', 2) # Get search multiplier from config + + # Initialize text splitter + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap, + length_function=len, + separators=text_config.get('separators', ["\n\n", "\n", ". ", "! ", "? ", ", ", " "]), + is_separator_regex=False, + keep_separator=False + ) + + # Create index if it doesn't exist + if not self.index: + self.create_index() + + def create_index(self): + """Create a new FAISS index""" + logger.info("Creating new FAISS index") + # Use L2 distance for normalized vectors (equivalent to cosine similarity) + self.index = faiss.IndexFlatL2(self.dimension) + self.documents = [] # Clear documents when creating new index + self.metadata = [] # Clear metadata when creating new index + logger.info("Cleared documents and metadata lists") + + def chunk_text(self, text: str) -> List[str]: + """Split text into overlapping chunks using RecursiveCharacterTextSplitter""" + # Clean the text + text = re.sub(r'\s+', ' ', text).strip() + + # Use the langchain text splitter + chunks = self.text_splitter.split_text(text) + + logger.info(f"Created {len(chunks)} chunks from text") + return chunks + + def add_documents(self, documents: List[str], metadata: List[Dict[str, Any]] = None, job_queue: Queue = None): + """Add documents to the index with progress tracking""" + try: + if not self.index: + self.create_index() + + if metadata is None: + metadata = [{} for _ in documents] + + total_docs = len(documents) + total_chunks = 0 + processed_chunks = 0 + batch_size = self.batch_size + + # Process each document into chunks + all_chunks = [] + all_metadata = [] + + for doc, meta in zip(documents, metadata): + # Split document into chunks + doc_chunks = self.chunk_text(doc) + # Create metadata for each chunk + for chunk in doc_chunks: + chunk_meta = meta.copy() + chunk_meta["chunk_text"] = chunk + all_chunks.append(chunk) + all_metadata.append(chunk_meta) + + total_chunks_to_process = len(all_chunks) + logger.info(f"Total chunks to process: {total_chunks_to_process}") + + # Send initial progress + if job_queue: + job_queue.put({ + "type": "processing", + "stage": "start", + "total_chunks": total_chunks_to_process, + "processed_chunks": 0, + "current_document": 0, + "total_documents": total_docs, + "current_batch": 0, + "total_batches": 0 + }) + + # Process chunks in batches + total_batches = (len(all_chunks) + batch_size - 1) // batch_size + logger.info(f"Processing {len(all_chunks)} chunks in {total_batches} batches") + + for batch_start in range(0, len(all_chunks), batch_size): + batch_end = min(batch_start + batch_size, len(all_chunks)) + batch = all_chunks[batch_start:batch_end] + batch_metadata = all_metadata[batch_start:batch_end] + current_batch = batch_start // batch_size + 1 + + # Generate embeddings for batch + embeddings = self.model.encode(batch) + + # Normalize embeddings for cosine similarity + faiss.normalize_L2(embeddings) + + # Add embeddings to FAISS index + self.index.add(np.array(embeddings).astype('float32')) + + # Store original chunks and their metadata + self.documents.extend(batch) + self.metadata.extend(batch_metadata) + processed_chunks += len(batch) + total_chunks = len(self.documents) + + # Calculate progress percentage + progress_percent = (processed_chunks / total_chunks_to_process) * 100 + + # Send progress update after each batch + if job_queue: + job_queue.put({ + "type": "processing", + "stage": "progress", + "total_chunks": total_chunks_to_process, + "processed_chunks": processed_chunks, + "current_document": total_docs, + "total_documents": total_docs, + "current_batch": current_batch, + "total_batches": total_batches, + "progress_percent": progress_percent + }) + + logger.info(f"Processed batch {current_batch}/{total_batches}") + + # Send completion + if job_queue: + job_queue.put({ + "type": "processing", + "stage": "complete", + "total_chunks": total_chunks, + "processed_chunks": processed_chunks, + "progress_percent": 100, + "current_document": total_docs, + "total_documents": total_docs, + "current_batch": total_batches, + "total_batches": total_batches + }) + + logger.info(f"Completed processing {total_chunks} chunks") + except Exception as e: + logger.error(f"Error adding documents: {str(e)}") + if job_queue: + job_queue.put({ + "type": "error", + "message": str(e) + }) + raise + + def get_document_count(self) -> int: + """Get the current number of chunks in the index""" + count = len(self.documents) + logger.info(f"Current chunk count: {count}") + return count + + def get_unique_document_count(self) -> int: + """Get the number of unique source documents in the index""" + # Get all source files from metadata + source_files = [] + for meta in self.metadata: + source_file = meta.get("source_file") + if source_file and isinstance(source_file, str) and source_file.strip(): + source_files.append(source_file) + + # Count unique source files + unique_docs = set(source_files) + count = len(unique_docs) + logger.info(f"Current unique document count: {count} (from {len(source_files)} total source files)") + return count + + def clear(self): + """Clear the index and documents""" + logger.info("Clearing RAG index and documents") + self.documents = [] # Clear documents first + self.metadata = [] # Clear metadata + self.create_index() # This will also clear documents again, but that's fine + logger.info("RAG index and documents cleared") + + def search(self, query: str, k: int = None) -> List[Dict[str, Any]]: + """Search for similar documents using the query""" + try: + if not self.index or len(self.documents) == 0: + logger.warning("No documents in index") + return [] + + # Use configured default_k from RAG config + if k is None: + k = rag_config['search']['default_k'] + + # Generate query embedding + query_embedding = self.model.encode([query])[0] + + # Normalize query embedding for cosine similarity + faiss.normalize_L2(query_embedding.reshape(1, -1)) + + # Log query embedding stats + logger.info(f"Query embedding stats - min: {query_embedding.min():.4f}, max: {query_embedding.max():.4f}, mean: {query_embedding.mean():.4f}, norm: {np.linalg.norm(query_embedding):.4f}") + + # Search for more results than needed to account for deduplication + search_k = k * self.search_multiplier # Use configurable multiplier + + # Search the index + distances, indices = self.index.search(np.array([query_embedding]).astype('float32'), search_k) + + # Log raw distances + logger.info(f"Raw distances - min: {distances[0].min():.4f}, max: {distances[0].max():.4f}, mean: {distances[0].mean():.4f}") + + # Get the results and deduplicate + results = [] + seen_contents = set() + + for i, (distance, idx) in enumerate(zip(distances[0], indices[0])): + if idx < len(self.documents): # Ensure index is valid + # Convert L2 distance to cosine similarity using a more robust method + # For normalized vectors, cosine similarity = 1 - (L2_distance^2)/2 + # Add a small epsilon to prevent numerical instability + epsilon = 1e-6 + distance = max(0, min(2, distance)) # Clamp distance to [0, 2] + similarity = max(0, min(1, 1 - (distance * distance) / 2 + epsilon)) + + # Log similarity calculation + logger.info(f"Result {i+1} - Distance: {distance:.4f}, Similarity: {similarity:.4f}") + + # Get the document text and metadata + document_text = self.documents[idx] + metadata = self.metadata[idx] + + # Check if this content is too similar to any existing result + is_duplicate = False + normalized_text = document_text.lower().strip() + + # Skip if we've seen this exact content before + if normalized_text in seen_contents: + continue + + # Check for partial matches (one content is contained within another) + for seen_text in seen_contents: + if normalized_text in seen_text or seen_text in normalized_text: + is_duplicate = True + break + + if not is_duplicate: + seen_contents.add(normalized_text) + results.append({ + "text": document_text, + "score": float(similarity), + "source_file": metadata.get("source_file") + }) + + # Stop if we have enough unique results + if len(results) >= k: + break + + # Log final results + if results: + logger.info(f"Final results - min score: {min(r['score'] for r in results):.4f}, max score: {max(r['score'] for r in results):.4f}") + else: + logger.info("No results found after deduplication") + + return results + + except Exception as e: + logger.error(f"Error in search: {str(e)}") + return [] + + def save_index(self, directory: str): + """Save the index and documents to disk""" + if not self.index: + logger.warning("No index to save") + return + + directory = Path(directory) + directory.mkdir(parents=True, exist_ok=True) + + try: + # Save FAISS index + index_path = directory / "faiss.index" + faiss.write_index(self.index, str(index_path)) + logger.info(f"Saved FAISS index to {index_path}") + + # Save documents and metadata + docs_path = directory / "documents.json" + with open(docs_path, "w") as f: + json.dump({ + "documents": self.documents, + "metadata": self.metadata + }, f) + logger.info(f"Saved {len(self.documents)} chunks to {docs_path}") + except Exception as e: + logger.error(f"Error saving index: {str(e)}") + raise + + def load_index(self, directory: str): + """Load the index and documents from disk""" + directory = Path(directory) + + try: + # Load FAISS index + index_path = directory / "faiss.index" + if index_path.exists(): + self.index = faiss.read_index(str(index_path)) + logger.info(f"Loaded FAISS index from {index_path}") + else: + logger.info("No existing FAISS index found") + self.create_index() + + # Load documents and metadata + docs_path = directory / "documents.json" + if docs_path.exists(): + with open(docs_path, "r") as f: + data = json.load(f) + self.documents = data.get("documents", []) + self.metadata = data.get("metadata", []) + logger.info(f"Loaded {len(self.documents)} chunks from {docs_path}") + else: + logger.info("No existing documents found") + self.documents = [] + self.metadata = [] + except Exception as e: + logger.error(f"Error loading index: {str(e)}") + # Create new index if loading fails + self.create_index() + self.documents = [] + self.metadata = [] + + def index_documents(self, documents: List[str], metadata: List[Dict[str, Any]] = None) -> None: + """Index a list of documents""" + try: + if not documents: + logger.warning("No documents to index") + return + + # Generate embeddings for all documents + embeddings = self.model.encode(documents) + + # Log embedding stats before normalization + logger.info(f"Pre-normalization stats - min: {embeddings.min():.4f}, max: {embeddings.max():.4f}, mean: {embeddings.mean():.4f}") + logger.info(f"Pre-normalization norms - min: {np.linalg.norm(embeddings, axis=1).min():.4f}, max: {np.linalg.norm(embeddings, axis=1).max():.4f}, mean: {np.linalg.norm(embeddings, axis=1).mean():.4f}") + + # Normalize embeddings for cosine similarity + faiss.normalize_L2(embeddings) + + # Log embedding stats after normalization + logger.info(f"Post-normalization stats - min: {embeddings.min():.4f}, max: {embeddings.max():.4f}, mean: {embeddings.mean():.4f}") + logger.info(f"Post-normalization norms - min: {np.linalg.norm(embeddings, axis=1).min():.4f}, max: {np.linalg.norm(embeddings, axis=1).max():.4f}, mean: {np.linalg.norm(embeddings, axis=1).mean():.4f}") + + # Create FAISS index + dimension = embeddings.shape[1] + self.index = faiss.IndexFlatL2(dimension) + + # Add vectors to the index + self.index.add(embeddings.astype('float32')) + + # Store documents and metadata + self.documents = documents + self.metadata = metadata if metadata else [{}] * len(documents) + + logger.info(f"Indexed {len(documents)} documents with dimension {dimension}") + + except Exception as e: + logger.error(f"Error in index_documents: {str(e)}") + raise \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/requirements.txt b/community/chat-llama-nemotron/backend-rag/src/requirements.txt new file mode 100644 index 00000000..3857af3f --- /dev/null +++ b/community/chat-llama-nemotron/backend-rag/src/requirements.txt @@ -0,0 +1,8 @@ +fastapi==0.104.1 +uvicorn==0.24.0 +python-multipart==0.0.6 +sentence-transformers==2.2.2 +faiss-cpu==1.7.4 +numpy==1.24.3 +PyPDF2==3.0.1 +tqdm==4.66.1 \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/.gitignore b/community/chat-llama-nemotron/frontend/.gitignore new file mode 100644 index 00000000..767a8453 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/.gitignore @@ -0,0 +1,28 @@ +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# production +/build + +# misc +.DS_Store +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# IDE +.idea/ +.vscode/ +*.swp +*.swo \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/README.md b/community/chat-llama-nemotron/frontend/README.md new file mode 100644 index 00000000..91689886 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/README.md @@ -0,0 +1,36 @@ +# Frontend Application + +This is the React frontend for the chat application. It provides a user interface for uploading documents, searching, and chatting with the AI model. + +## Prerequisites + +- Node.js 18 or higher +- npm 8 or higher +- Modern web browser with JavaScript enabled + +## Setup + +1. Install dependencies: +```bash +npm install +``` + +## Configuration + +The application can be configured through YAML configuration file (in `public/config/`): + + - `app_config.yaml`: Main application settings + +## Running the Application + +1. Make sure you're in the frontend directory: +```bash +cd frontend +``` + +2. Start the development server: +```bash +npm start +``` + +The application will start on http://localhost:3000 \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/package.json b/community/chat-llama-nemotron/frontend/package.json new file mode 100644 index 00000000..7a1562d2 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/package.json @@ -0,0 +1,50 @@ +{ + "name": "llama-chat-ui", + "version": "0.1.0", + "private": true, + "description": "A React-based chat interface for interacting with NVIDIA Dynamo server", + "dependencies": { + "@emotion/react": "^11.11.0", + "@emotion/styled": "^11.11.0", + "@mui/material": "^5.13.0", + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^13.4.0", + "@testing-library/user-event": "^13.5.0", + "axios": "^1.4.0", + "js-yaml": "^4.1.0", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-markdown": "^10.1.0", + "react-scripts": "5.0.1", + "remark-gfm": "^4.0.1", + "web-vitals": "^2.1.4" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "react-scripts test", + "eject": "react-scripts eject" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + }, + "devDependencies": { + "buffer": "^6.0.3", + "react-app-rewired": "^2.2.1" + } +} diff --git a/community/chat-llama-nemotron/frontend/public/config/app_config.yaml b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml new file mode 100644 index 00000000..7e327d66 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml @@ -0,0 +1,78 @@ +# Frontend Application Configuration +app: + name: "RAG Client" + version: "1.0.0" + environment: "development" + +# API Configuration +api: + ip: "localhost" + port: "8001" + base_url: "http://localhost:8001" # This will be overridden by the dynamic config + endpoints: + upload: "/api/upload" + search: "/api/search" + clear: "/api/clear-rag" + status: "/api/rag-status" + progress: "/api/upload/progress" + document: "/api/document" + +# NVIDIA Dynamo Server Configuration +llm: + name: "NVIDIA Dynamo" + port: "8000" + endpoints: + chat: "/v1/chat/completions" + cors: + allow_origins: ["http://localhost:3000"] + allow_credentials: true + allow_methods: ["POST", "OPTIONS"] + allow_headers: ["Content-Type", "Accept", "Origin"] + model: + name: "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1" + max_tokens: 32768 + temperature: 0.6 + top_p: 0.95 + +# LLM Proxy Configuration +llm_proxy: + port: 8002 + # IP will be provided by user input + endpoints: + chat: "/v1/chat/completions" + +# UI Configuration +ui: + + components: + file_upload: + max_file_size: 10485760 # 10MB in bytes + accepted_file_types: + - "application/pdf" + - "text/plain" + - "text/markdown" + - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + - "text/html" + timeouts: + status_reset: 3000 # milliseconds + error_reset: 3000 # milliseconds + messages: + starting: "Starting upload..." + processing: "Processing files..." + complete: "Processing complete" + error: "Connection error. Please try again." + + search: + placeholder: "Enter your search query..." + relevance_threshold: 0.3 + + progress: + polling_interval: 1000 # milliseconds + + chat: + summary: + # Number of recent messages to include when generating conversation summaries + # This is separate from RAG search results and only affects summary generation + max_messages: 5 + system_prompt: "detailed thinking {status}" + rag_prefix: "Relevant information from knowledge base:\n" \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/public/index.html b/community/chat-llama-nemotron/frontend/public/index.html new file mode 100644 index 00000000..5b49843e --- /dev/null +++ b/community/chat-llama-nemotron/frontend/public/index.html @@ -0,0 +1,100 @@ + + + + + + + + + Chat with LLM + + + + + +
+ + + diff --git a/community/chat-llama-nemotron/frontend/public/robots.txt b/community/chat-llama-nemotron/frontend/public/robots.txt new file mode 100644 index 00000000..e9e57dc4 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/public/robots.txt @@ -0,0 +1,3 @@ +# https://www.robotstxt.org/robotstxt.html +User-agent: * +Disallow: diff --git a/community/chat-llama-nemotron/frontend/src/App.css b/community/chat-llama-nemotron/frontend/src/App.css new file mode 100644 index 00000000..ab775816 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/App.css @@ -0,0 +1,1089 @@ +.App { + text-align: center; + min-height: 100vh; + display: flex; + flex-direction: column; + background-color: #f0f2f5; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + box-sizing: border-box; +} + +.App-header { + background-color: #282c34; + padding: 2rem 20px; + color: white; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 1.5rem; +} + +.App-header h1 { + margin: 0; + color: white; +} + +.server-ip-input { + display: flex; + align-items: center; + gap: 1rem; + margin: 0; + padding: 0.75rem 1.25rem; + background-color: rgba(255, 255, 255, 0.1); + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); + transition: all 0.3s ease; + max-width: 500px; + width: 100%; +} + +.server-ip-input:hover { + background-color: rgba(255, 255, 255, 0.15); + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); +} + +.server-ip-input label { + font-size: 0.95rem; + color: rgba(255, 255, 255, 0.9); + white-space: nowrap; + font-weight: 500; + letter-spacing: 0.01em; +} + +.server-ip-input input { + flex: 1; + padding: 0.75rem 1rem; + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 6px; + font-size: 0.95rem; + min-width: 0; + background-color: rgba(255, 255, 255, 0.1); + color: white; + transition: all 0.2s ease; + width: 100%; + box-sizing: border-box; +} + +.server-ip-input input::placeholder { + color: rgba(255, 255, 255, 0.5); +} + +.server-ip-input input:focus { + outline: none; + border-color: #76B900; + background-color: rgba(255, 255, 255, 0.15); + box-shadow: 0 0 0 2px rgba(118, 185, 0, 0.25); +} + +.server-ip-input input:hover:not(:focus) { + border-color: rgba(255, 255, 255, 0.3); + background-color: rgba(255, 255, 255, 0.12); +} + +.controls { + margin: 0; + display: flex; + flex-wrap: nowrap; + gap: 20px; + align-items: center; + justify-content: center; + max-width: 800px; + width: 100%; +} + +/* RAG Toggle Switch */ +.rag-toggle { + display: flex; + align-items: center; + background-color: rgba(255, 255, 255, 0.1); + padding: 0 12px; + border-radius: 4px; + height: 40px; + min-width: 140px; + transition: background-color 0.2s ease; +} + +.rag-toggle:hover { + background-color: rgba(255, 255, 255, 0.15); +} + +.toggle-label { + margin-left: 10px; + font-size: 14px; + color: white; + white-space: nowrap; + line-height: 40px; + font-weight: 400; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +.switch { + position: relative; + display: inline-block; + width: 50px; + height: 24px; + margin: 8px 0; +} + +.switch input { + opacity: 0; + width: 0; + height: 0; +} + +.slider { + position: absolute; + cursor: pointer; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: #ccc; + transition: .4s; + border-radius: 24px; +} + +.slider:before { + position: absolute; + content: ""; + height: 16px; + width: 16px; + left: 4px; + bottom: 4px; + background-color: white; + transition: .4s; + border-radius: 50%; +} + +input:checked + .slider { + background-color: #76B900; +} + +input:focus + .slider { + box-shadow: 0 0 1px #76B900; +} + +input:checked + .slider:before { + transform: translateX(26px); +} + +.slider.round { + border-radius: 24px; +} + +.slider.round:before { + border-radius: 50%; +} + +.controls button { + padding: 8px 16px; + border: none; + border-radius: 4px; + background-color: #61dafb; + color: #282c34; + cursor: pointer; + font-weight: 500; + transition: all 0.2s ease; + height: 40px; + display: flex; + align-items: center; + justify-content: center; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +.controls button:hover { + background-color: #4fa8c7; + transform: translateY(-1px); +} + +.controls button:active { + transform: translateY(0); +} + +.chat-container { + flex: 1; + display: flex; + flex-direction: column; + padding: 1rem; + max-width: 1200px; + width: calc(100% - 2rem); + margin: 0 auto; + overflow: hidden; + box-sizing: border-box; +} + +.summary-container { + background-color: white; + border-radius: 8px; + padding: 1rem; + margin-bottom: 1rem; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +.summary-container details { + cursor: pointer; +} + +.summary-container summary { + font-weight: 500; + color: #282c34; +} + +.messages { + flex: 1; + overflow-y: auto; + padding: 1rem; + display: flex; + flex-direction: column; + gap: 1rem; + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + text-align: left; + scroll-behavior: smooth; +} + +.message { + max-width: 80%; + padding: 0.75rem 1rem; + border-radius: 8px; + position: relative; + text-align: left; + animation: fadeIn 0.3s ease; +} + +@keyframes fadeIn { + from { opacity: 0; transform: translateY(10px); } + to { opacity: 1; transform: translateY(0); } +} + +.message.user { + align-self: flex-end; + background-color: #5B2E8C; /* NVIDIA purple */ + color: white; +} + +.message.assistant { + align-self: flex-start; + background-color: #e9ecef; + color: #212529; +} + +.message p { + margin: 1rem; + line-height: 1.5; +} + +.message li { + margin: 1rem; + line-height: 1.5; +} + +.message details { + margin-top: 0.5rem; + font-size: 0.9rem; +} + +.message details summary { + cursor: pointer; + color: #6c757d; + display: flex; + align-items: center; + justify-content: space-between; +} + +.message details p { + margin-top: 0.25rem; + margin-bottom: 0.25rem; + padding: 0.5rem; + border-radius: 4px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + font-size: 0.95em; + line-height: 1.5; + color: #333; +} + +.input-form { + display: flex; + gap: 1rem; + padding: 1rem; + background-color: white; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + margin-top: 1rem; + align-items: center; + transition: box-shadow 0.2s ease; +} + +.input-form:focus-within { + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15); +} + +.input-form input { + flex: 1; + padding: 0.75rem; + border: 1px solid #ced4da; + border-radius: 4px; + font-size: 1rem; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + height: 40px; + box-sizing: border-box; + transition: all 0.2s ease; +} + +.input-form input:focus { + outline: none; + border-color: #5B2E8C; + box-shadow: 0 0 0 2px rgba(91, 46, 140, 0.25); +} + +.input-form button { + padding: 0 12px; + border: none; + border-radius: 4px; + background-color: #76B900; + color: white; + cursor: pointer; + transition: all 0.2s ease; + height: 40px; + font-size: 14px; + white-space: nowrap; + font-weight: 400; + letter-spacing: 0.01em; + min-width: 100px; + display: flex; + align-items: center; + justify-content: center; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + box-sizing: border-box; +} + +.input-form button:hover:not(:disabled) { + background-color: #8ed600; + transform: translateY(-1px); +} + +.input-form button:active:not(:disabled) { + transform: translateY(0); +} + +.input-form button:disabled { + background-color: #a8d65c; + cursor: not-allowed; + opacity: 0.7; +} + +.loading-dots { + display: flex; + gap: 4px; + padding: 8px 12px; + background-color: #e9ecef; + border-radius: 8px; + width: fit-content; +} + +.loading-dots span { + width: 8px; + height: 8px; + background-color: #6c757d; + border-radius: 50%; + animation: bounce 1.4s infinite ease-in-out both; +} + +.loading-dots span:nth-child(1) { + animation-delay: -0.32s; +} + +.loading-dots span:nth-child(2) { + animation-delay: -0.16s; +} + +@keyframes bounce { + 0%, 80%, 100% { + transform: scale(0); + } + 40% { + transform: scale(1.0); + } +} + +@media (max-width: 768px) { + .controls { + flex-direction: column; + gap: 15px; + padding: 0 15px; + width: 100%; + max-width: 330px; + margin: 0 auto; + align-items: stretch; + } + + .server-ip-input { + flex-direction: column; + align-items: stretch; + padding: 1rem; + gap: 0.75rem; + width: 100%; + max-width: 300px; + margin: 0 auto; + } + + .server-ip-input label { + text-align: left; + margin-bottom: 0.25rem; + } + + .server-ip-input input { + width: 100%; + min-width: unset; + } + + .rag-toggle { + width: 100%; + height: 40px; + justify-content: center; + min-width: unset; + display: flex; + align-items: center; + padding: 0; + margin: 0; + } + + .switch { + margin: 0; + } + + .toggle-label { + line-height: 40px; + margin: 0 0 0 10px; + } + + .button-group { + width: 100%; + flex-direction: column; + gap: 15px; + min-width: unset; + margin: 0; + } + + .button-group button { + width: 100%; + height: 40px !important; + min-width: unset; + padding: 0; + display: flex; + align-items: center; + justify-content: center; + font-size: 14px; + white-space: nowrap; + flex: none; + margin: 0; + } + + .App-header h1 { + font-size: 1.25rem; + margin: 0 0 15px 0; + } + + .App-header button { + height: 40px; + padding: 0 15px; + display: flex; + align-items: center; + justify-content: center; + margin: 0; + } + + .chat-container { + padding: 0.5rem; + width: calc(100% - 1rem); + } + + .messages { + padding: 0.5rem; + } + + .message { + max-width: 90%; + } + + .input-form { + padding: 0.75rem; + margin-top: 0.5rem; + } + + .input-form input { + padding: 0.5rem; + } + + .input-form button { + padding: 0.5rem 1rem; + min-width: 80px; + } +} + +.message .markdown-body { + font-size: 1rem; + line-height: 1.8; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + margin-bottom: 1rem; +} + +/* Base spacing for all text elements */ +.message .markdown-body p, +.message .markdown-body li, +.message .markdown-body blockquote, +.message .markdown-body pre, +.message .markdown-body table { + margin: 2rem 0; + line-height: 1.8; +} + +/* Remove margins from first and last elements */ +.message .markdown-body p:first-child, +.message .markdown-body li:first-child, +.message .markdown-body blockquote:first-child, +.message .markdown-body pre:first-child, +.message .markdown-body table:first-child { + margin-top: 0; +} + +.message .markdown-body p:last-child, +.message .markdown-body li:last-child, +.message .markdown-body blockquote:last-child, +.message .markdown-body pre:last-child, +.message .markdown-body table:last-child { + margin-bottom: 0; +} + +/* Headers with consistent spacing */ +.message .markdown-body h1, +.message .markdown-body h2, +.message .markdown-body h3, +.message .markdown-body h4 { + margin: 3rem 0 2rem 0; + font-weight: 600; + line-height: 1.4; +} + +.message .markdown-body h1 { + font-size: 1.8rem; + border-bottom: 2px solid rgba(0, 0, 0, 0.1); + padding-bottom: 0.8rem; +} + +.message .markdown-body h2 { + font-size: 1.5rem; + border-bottom: 1px solid rgba(0, 0, 0, 0.1); + padding-bottom: 0.6rem; +} + +.message .markdown-body h3 { + font-size: 1.3rem; +} + +.message .markdown-body h4 { + font-size: 1.1rem; +} + +/* List specific spacing */ +.message .markdown-body ul, +.message .markdown-body ol { + margin: 2rem 0; + padding-left: 2.5rem; +} + +.message .markdown-body li { + margin: 1rem 0; + line-height: 1.8; +} + +.message .markdown-body li > p { + margin: 1rem 0; +} + +.message .markdown-body li > p:first-child { + margin-top: 0; +} + +.message .markdown-body li > p:last-child { + margin-bottom: 0; +} + +.message .markdown-body li > ul, +.message .markdown-body li > ol { + margin: 1rem 0; + padding-left: 2rem; +} + +/* Blockquote specific spacing */ +.message .markdown-body blockquote { + padding: 1.5rem 2rem; + border-left: 4px solid rgba(0, 0, 0, 0.1); + background-color: rgba(0, 0, 0, 0.02); + border-radius: 0 4px 4px 0; +} + +.message .markdown-body blockquote p { + margin: 1rem 0; +} + +.message .markdown-body blockquote p:first-child { + margin-top: 0; +} + +.message .markdown-body blockquote p:last-child { + margin-bottom: 0; +} + +.message .markdown-body code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', monospace; + font-size: 0.9em; + padding: 0.2em 0.4em; + background-color: rgba(0, 0, 0, 0.05); + border-radius: 3px; + color: #e83e8c; +} + +.message .markdown-body pre { + margin: 1rem 0; + padding: 1rem; + background-color: rgba(0, 0, 0, 0.03); + border-radius: 6px; + overflow-x: auto; + border: 1px solid rgba(0, 0, 0, 0.1); +} + +.message .markdown-body pre code { + padding: 0; + background-color: transparent; + color: inherit; + font-size: 0.9em; + line-height: 1.5; +} + +.message .markdown-body table { + margin: 1rem 0; + border-collapse: collapse; + width: 100%; + background-color: white; + border-radius: 6px; + overflow: hidden; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); +} + +.message .markdown-body th, +.message .markdown-body td { + padding: 0.75rem; + border: 1px solid rgba(0, 0, 0, 0.1); + text-align: left; +} + +.message .markdown-body th { + background-color: rgba(0, 0, 0, 0.02); + font-weight: 600; +} + +.message .markdown-body tr:nth-child(even) { + background-color: rgba(0, 0, 0, 0.01); +} + +.message .markdown-body hr { + margin: 1.5rem 0; + border: none; + border-top: 2px solid rgba(0, 0, 0, 0.1); +} + +.message .markdown-body img { + max-width: 100%; + height: auto; + border-radius: 6px; + margin: 1rem 0; +} + +.message .markdown-body a { + color: #5B2E8C; + text-decoration: none; + border-bottom: 1px solid rgba(91, 46, 140, 0.2); + transition: border-color 0.2s; +} + +.message .markdown-body a:hover { + border-color: #5B2E8C; +} + +/* User message specific styles */ +.message.user .markdown-body { + color: white; +} + +.message.user .markdown-body h1, +.message.user .markdown-body h2, +.message.user .markdown-body h3, +.message.user .markdown-body h4 { + color: white; + border-color: rgba(255, 255, 255, 0.2); +} + +.message.user .markdown-body blockquote { + background-color: rgba(255, 255, 255, 0.1); + border-left-color: rgba(255, 255, 255, 0.3); +} + +.message.user .markdown-body code { + background-color: rgba(255, 255, 255, 0.1); + color: #ffb6c1; +} + +.message.user .markdown-body pre { + background-color: rgba(255, 255, 255, 0.1); + border-color: rgba(255, 255, 255, 0.2); +} + +.message.user .markdown-body table { + background-color: rgba(255, 255, 255, 0.1); +} + +.message.user .markdown-body th { + background-color: rgba(255, 255, 255, 0.15); +} + +.message.user .markdown-body tr:nth-child(even) { + background-color: rgba(255, 255, 255, 0.05); +} + +.message.user .markdown-body hr { + border-color: rgba(255, 255, 255, 0.2); +} + +.message.user .markdown-body a { + color: #ffb6c1; + border-color: rgba(255, 255, 255, 0.3); +} + +.message.user .markdown-body a:hover { + border-color: #ffb6c1; +} + +.thinking-process, +.references { + margin: 1.5rem 0 1rem 0; + border: 1px solid rgba(0, 0, 0, 0.1); + border-radius: 6px; + background-color: #f5f6f7; + overflow: hidden; +} + +.thinking-process:first-child { + margin-top: 1rem; +} + +.references:first-child { + margin-top: 1rem; +} + +.thinking-process + .references { + margin-top: 1.5rem; +} + +.thinking-process summary, +.references summary { + cursor: pointer; + padding: 8px 12px; + font-weight: 500; + color: #666; + display: flex; + align-items: center; + justify-content: space-between; + transition: all 0.2s ease; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + background-color: #f5f6f7; +} + +.thinking-process[open] summary, +.references[open] summary { + background-color: #d8d9db; + color: #333; +} + +.thinking-process summary:hover, +.references summary:hover { + background-color: #e8e9eb; + color: #333; +} + +.thinking-process summary::after, +.references summary::after { + content: '▼'; + font-size: 0.8em; + transition: transform 0.2s ease; +} + +.thinking-process[open] summary::after, +.references[open] summary::after { + transform: rotate(180deg); +} + +.thinking-content, +.references-content { + padding: 12px; + border-top: 1px solid rgba(0, 0, 0, 0.1); + background-color: #f5f6f7; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +.message.assistant .thinking-content, +.message.assistant .references-content { + background-color: #f5f6f7; +} + +.message.user .thinking-content, +.message.user .references-content { + background-color: #f5f6f7; + border-color: rgba(255, 255, 255, 0.2); +} + +.reference-item { + margin-bottom: 12px; + padding: 12px; + background-color: white; + border-radius: 4px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); + transition: box-shadow 0.2s ease; + position: relative; + padding-bottom: 12px; +} + +.reference-item:hover { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); +} + +.reference-item:last-child { + margin-bottom: 0; +} + +.reference-score { + font-size: 0.9em; + color: #666; + margin-bottom: 8px; + font-weight: 500; +} + +.reference-document { + font-size: 0.9em; + margin-bottom: 8px; + font-weight: 500; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +.document-link { + position: absolute; + top: 12px; + right: 12px; + color: #5B2E8C; + text-decoration: none; + border: 1px solid #5B2E8C; + padding: 4px 8px; + border-radius: 4px; + font-size: 0.9em; + transition: all 0.2s ease; + background-color: white; + z-index: 2; +} + +.document-link:hover { + background-color: #5B2E8C; + color: white; + border-color: #4a2570; +} + +.reference-text { + background-color: white; + padding: 15px; + border-radius: 8px; + margin-bottom: 24px; + min-height: 60px; + position: relative; + cursor: pointer; + max-height: calc(100% - 48px); + overflow-y: auto; +} + +.reference-text.truncated { + max-height: 200px; + overflow: hidden; +} + +.reference-text.truncated::after { + content: '▼'; + position: absolute; + bottom: 8px; + right: 8px; + font-size: 0.8em; + color: #5B2E8C; + background-color: rgba(91, 46, 140, 0.1); + padding: 4px 8px; + border-radius: 4px; + pointer-events: none; + z-index: 1; +} + +.reference-text.expanded { + max-height: none; +} + +.reference-text.expanded::after { + content: '▲'; + position: absolute; + bottom: 8px; + right: 8px; + font-size: 0.8em; + color: #5B2E8C; + background-color: rgba(91, 46, 140, 0.1); + padding: 4px 8px; + border-radius: 4px; + pointer-events: none; + z-index: 1; +} + +/* Hide the arrow for non-truncated text */ +.reference-text:not(.truncated):not(.expanded)::after { + display: none; +} + +/* Add hover effect to the entire reference text */ +.reference-text.truncated:hover, +.reference-text.expanded:hover { + background-color: rgba(91, 46, 140, 0.05); +} + +/* Add hover effect to the arrow indicator */ +.reference-text.truncated:hover::after, +.reference-text.expanded:hover::after { + background-color: rgba(91, 46, 140, 0.2); +} + +.reference-text .expand-button { + display: none; /* Hide the text button since we're using the arrow indicator */ +} + +.button-group { + display: flex; + gap: 20px; + min-width: auto; +} + +.button-group button { + padding: 8px 12px; + border: none; + border-radius: 4px; + background-color: rgba(255, 255, 255, 0.1); + color: white; + cursor: pointer; + transition: background-color 0.2s; + height: 40px; + font-size: 14px; + white-space: nowrap; + font-weight: 400; + letter-spacing: 0.01em; + min-width: 100px; + display: flex; + align-items: center; + justify-content: center; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; +} + +.button-group button:hover { + background-color: #76B900; +} + +.error-message { + background-color: #fff3f3; + color: #d32f2f; + padding: 12px 35px 12px 20px; + border-radius: 4px; + margin: 10px 0; + border: 1px solid #ffcdd2; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + animation: slideIn 0.3s ease-out; + font-size: 0.9rem; + text-align: center; + max-width: 600px; + width: 100%; + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + position: relative; + overflow: hidden; +} + +.error-message::after { + content: ''; + position: absolute; + bottom: 0; + left: 0; + width: 100%; + height: 3px; + background-color: #d32f2f; + animation: progress 5s linear forwards; +} + +.error-icon { + font-size: 1.1rem; +} + +.error-message .close-button { + position: absolute; + right: 8px; + top: 50%; + transform: translateY(-50%); + background: none; + border: none; + color: #d32f2f; + cursor: pointer; + padding: 4px; + font-size: 1.1rem; + opacity: 0.7; + transition: opacity 0.2s ease; +} + +.error-message .close-button:hover { + opacity: 1; +} + +@keyframes slideIn { + from { + transform: translateY(-10px); + opacity: 0; + } + to { + transform: translateY(0); + opacity: 1; + } +} + +@keyframes progress { + from { + width: 100%; + } + to { + width: 0%; + } +} + +@keyframes fadeOut { + from { + opacity: 1; + transform: translateY(0); + } + to { + opacity: 0; + transform: translateY(-10px); + } +} + +.error-message.fade-out { + animation: fadeOut 0.3s ease-out forwards; +} diff --git a/community/chat-llama-nemotron/frontend/src/App.js b/community/chat-llama-nemotron/frontend/src/App.js new file mode 100644 index 00000000..f66ff0b9 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/App.js @@ -0,0 +1,610 @@ +import React, { useState, useRef, useEffect } from 'react'; +import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; +import FileIngestion from './components/FileIngestion'; +import configLoader from './config/config_loader'; +import './App.css'; + +function App() { + const [messages, setMessages] = useState([]); + const [detailedThinking, setDetailedThinking] = useState(false); + const [conversationSummary, setConversationSummary] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [isTyping, setIsTyping] = useState(false); + const [useRag, setUseRag] = useState(true); + const [expandedRefs, setExpandedRefs] = useState({}); + const [appConfig, setAppConfig] = useState(null); + const [serverIp, setServerIp] = useState(() => { + // Load IP from localStorage on initial render + const savedIp = localStorage.getItem('serverIp') || ''; + return savedIp; + }); + const [error, setError] = useState(null); + const messagesEndRef = useRef(null); + const errorTimeoutRef = useRef(null); + + useEffect(() => { + // Load configuration + const loadConfig = async () => { + try { + const loadedConfig = await configLoader.getAppConfig(); + + if (!loadedConfig) { + return; + } + + setAppConfig(loadedConfig); + } catch (error) { + // Silent error handling for production + } + }; + loadConfig(); + }, []); + + // Save IP to localStorage and configLoader whenever it changes + useEffect(() => { + if (serverIp) { + localStorage.setItem('serverIp', serverIp); + configLoader.serverIp = serverIp; + } else { + localStorage.removeItem('serverIp'); + configLoader.serverIp = ''; + } + }, [serverIp]); + + // Auto-dismiss error after 5 seconds + useEffect(() => { + if (error) { + // Clear any existing timeout + if (errorTimeoutRef.current) { + clearTimeout(errorTimeoutRef.current); + } + + // Set new timeout + errorTimeoutRef.current = setTimeout(() => { + const errorElement = document.querySelector('.error-message'); + if (errorElement) { + errorElement.classList.add('fade-out'); + // Remove the error after animation completes + setTimeout(() => { + setError(null); + errorTimeoutRef.current = null; + }, 300); // Match the animation duration + } + }, 5000); + } + + return () => { + if (errorTimeoutRef.current) { + clearTimeout(errorTimeoutRef.current); + } + }; + }, [error]); + + const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); + }; + + useEffect(() => { + scrollToBottom(); + }, [messages]); + + const updateSummary = async (messages) => { + try { + const maxMessages = appConfig?.ui?.chat?.summary?.max_messages || 5; + const summaryMessages = messages.slice(-maxMessages).filter(msg => + msg.role === 'user' || msg.role === 'assistant' + ); + + const response = await fetch(`${configLoader.api.llmServer.url}/v1/chat/completions`, { + method: 'POST', + headers: { + ...configLoader.api.llmServer.headers, + 'X-LLM-IP': serverIp + }, + body: JSON.stringify({ + model: appConfig?.llm?.model?.name || "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1", + messages: [ + { + role: "system", + content: appConfig?.ui?.chat?.summary?.system_prompt || "Provide a brief summary of the key points from this conversation. Focus on the main topics and decisions made." + }, + ...summaryMessages + ], + stream: false, + max_tokens: appConfig?.llm?.model?.max_tokens || 350 + }), + }); + + if (response.ok) { + const data = await response.json(); + return data.choices[0].message.content; + } + return "Unable to generate summary"; + } catch (error) { + return `Error generating summary: ${error.message}`; + } + }; + + const searchRAG = async (query) => { + if (!useRag) return []; + + try { + if (!appConfig) { + return []; + } + + console.log('Performing RAG search for query:', query); + + const response = await fetch(`${configLoader.api.proxy}/api/search`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query, + use_rag: true, + k: appConfig?.ui?.components?.search?.default_k || 5 + }), + }); + + if (!response.ok) { + console.error('RAG search failed:', await response.text()); + throw new Error('RAG search failed'); + } + + const data = await response.json(); + console.log('Raw RAG search results:', data); + + // Log detailed information about each result + if (data.results && data.results.length > 0) { + console.log('RAG Results Analysis:'); + data.results.forEach((result, index) => { + console.log(`\nResult ${index + 1}:`); + console.log(`- Score: ${result.score}`); + console.log(`- Source: ${result.source_file}`); + console.log(`- Text Preview: ${result.text.substring(0, 100)}...`); + }); + } else { + console.log('No RAG results found'); + } + + return data.results || []; + } catch (error) { + console.error('RAG search error:', error); + return []; + } + }; + + const prepareContextMessages = (messages, detailedThinking, conversationSummary, ragResults) => { + const contextMessages = []; + + // Add system message with safe fallback + const systemPrompt = appConfig?.ui?.chat?.context?.system_prompt || 'detailed thinking {status}.'; + contextMessages.push({ + role: "system", + content: systemPrompt.replace('{status}', detailedThinking ? 'on' : 'off') + }); + + // Add conversation summary if exists and is not empty + if (conversationSummary && conversationSummary.trim() !== '') { + contextMessages.push({ + role: "user", + content: `Previous conversation summary: ${conversationSummary}` + }); + } + + // Process messages in pairs to ensure proper alternation + let i = 0; + while (i < messages.length) { + if (i < messages.length && messages[i].role === 'user') { + contextMessages.push({ + role: messages[i].role, + content: messages[i].content + }); + i++; + if (i < messages.length && messages[i].role === 'assistant') { + contextMessages.push({ + role: messages[i].role, + content: messages[i].content + }); + i++; + } + } else { + i++; + } + } + + return contextMessages; + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + const input = e.target.elements.messageInput; + const message = input.value.trim(); + + if (!message) return; + + if (!serverIp) { + setError('Please enter the IP address of the NVIDIA Dynamo server before sending messages.'); + return; + } + + // Add user message + const userMessage = { role: 'user', content: message }; + setMessages(prev => [...prev, userMessage]); + input.value = ''; + setIsLoading(true); + setIsTyping(true); + + try { + let relevantResults = []; + + // Only perform RAG search if enabled + if (useRag) { + console.log('RAG is enabled, performing search...'); + const results = await searchRAG(message); + // Filter results by relevance score with safe fallback + const relevanceThreshold = appConfig?.ui?.components?.search?.relevance_threshold || 0.3; + relevantResults = results.filter(result => result.score > relevanceThreshold); + + console.log('Filtered RAG results:', { + totalResults: results.length, + relevantResults: relevantResults.length, + threshold: relevanceThreshold, + results: relevantResults.map(r => ({ + score: r.score, + source: r.source_file, + preview: r.text.substring(0, 100) + })) + }); + } else { + console.log('RAG is disabled, skipping search'); + } + + // Prepare context messages with RAG results + const contextMessages = prepareContextMessages( + messages, + detailedThinking, + conversationSummary, + relevantResults + ); + + // Add the new user message with RAG context if available + let userContent = message; + if (relevantResults.length > 0) { + const ragPrefix = appConfig?.ui?.chat?.context?.rag_prefix || 'Relevant information from knowledge base:\n'; + const ragContext = relevantResults.map(r => r.text).join("\n\n"); + userContent = `${message}\n\n${ragPrefix}${ragContext}`; + console.log('Combined user message with RAG context:', userContent); + } + + contextMessages.push({ + role: 'user', + content: userContent + }); + + console.log('Sending context to LLM:', { + messageCount: contextMessages.length, + hasRAGContext: relevantResults.length > 0, + systemMessages: contextMessages.filter(m => m.role === 'system').length, + contextPreview: contextMessages.map(m => ({ + role: m.role, + contentPreview: m.content.substring(0, 100) + '...' + })) + }); + + const response = await fetch(`${configLoader.api.llmServer.url}/v1/chat/completions`, { + method: 'POST', + headers: { + ...configLoader.api.llmServer.headers, + 'X-LLM-IP': serverIp + }, + body: JSON.stringify({ + model: appConfig?.llm?.model?.name || "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1", + messages: contextMessages, + stream: false, + max_tokens: appConfig?.llm?.model?.max_tokens || 32768, + temperature: appConfig?.llm?.model?.temperature || 0.6, + top_p: appConfig?.llm?.model?.top_p || 0.95 + }), + }); + + if (!response.ok) { + throw new Error('LLM server is not accessible'); + } + + const data = await response.json(); + const messageContent = data.choices[0].message.content; + + if (!messageContent) { + throw new Error('Received empty response from LLM server'); + } + + const assistantMessage = { + role: 'assistant', + content: messageContent, + references: useRag ? JSON.stringify(relevantResults) : '', + showThinking: detailedThinking + }; + + console.log('Received LLM response:', { + hasReferences: relevantResults.length > 0, + referenceCount: relevantResults.length, + thinkingEnabled: detailedThinking, + responsePreview: messageContent.substring(0, 100) + '...', + hasThinkingTags: messageContent.includes('') + }); + + setMessages(prev => [...prev, assistantMessage]); + + // Update summary after every 3 messages + if ((messages.length + 2) % 3 === 0) { + const newSummary = await updateSummary([...messages, userMessage, assistantMessage]); + setConversationSummary(newSummary); + } + } catch (error) { + setIsTyping(false); + setMessages(prev => [...prev, { + role: 'assistant', + content: error.message.includes('LLM server is not accessible') + ? 'The LLM server is not accessible at the moment. Please check if the server is running and try again.' + : 'Sorry, there was an error processing your request. Please try again.', + references: '', + showThinking: detailedThinking + }]); + } finally { + setIsLoading(false); + setIsTyping(false); + } + }; + + const clearChat = () => { + setMessages([]); + setConversationSummary(''); + }; + + const clearRAG = async () => { + try { + // Get current RAG status + const statusResponse = await fetch(`http://localhost:8001/api/rag-status`); + const statusData = await statusResponse.json(); + + // If RAG is empty, just notify the user + if (statusData.document_count === 0) { + alert('RAG index is already empty.'); + return; + } + + // If RAG is not empty, ask for confirmation + const confirmed = window.confirm('Are you sure you would like to clear the RAG database?'); + if (!confirmed) { + return; + } + + // Clear the RAG index on the backend + const response = await fetch(`http://localhost:8001/api/clear-rag`, { + method: 'POST', + }); + + if (!response.ok) { + throw new Error('Failed to clear RAG'); + } + + const data = await response.json(); + // Don't clear messages array - preserve chat history and context + + alert(`Successfully cleared RAG index. Deleted ${data.deleted_chunks} chunks and ${data.deleted_documents} documents.`); + } catch (error) { + console.error('Error clearing RAG:', error); + alert('Failed to clear RAG index. Please check if the RAG server is running on port 8001.'); + } + }; + + const toggleDetailedThinking = () => { + setDetailedThinking(prev => !prev); + }; + + const toggleReference = (index) => { + setExpandedRefs(prev => ({ + ...prev, + [index]: !prev[index] + })); + }; + + const renderMessage = (message) => { + if (message.role === 'user') { + return {message.content}; + } + + // For assistant messages, handle thinking content and references + const thinkingRegex = /<(?:think|thinking|reasoning)>([\s\S]*?)<\/(?:think|thinking|reasoning)>/g; + const parts = message.content.split(thinkingRegex); + const hasThinkingContent = parts.length > 1; + const shouldShowThinking = message.showThinking !== undefined ? message.showThinking : detailedThinking; + + const content = parts.map((part, i) => { + if (i % 2 === 1) { + // This is thinking content + const trimmedContent = part.trim(); + return shouldShowThinking && hasThinkingContent && trimmedContent !== '' ? ( +
+ Show thinking process +
+ {part} +
+
+ ) : null; + } + // This is regular content + return part ? {part} : null; + }); + + // Add references section if available + const references = message.references ? (() => { + try { + const parsedRefs = JSON.parse(message.references); + const relevanceThreshold = appConfig?.ui?.components?.search?.relevance_threshold || 0.3; + const filteredRefs = parsedRefs.filter(ref => ref.score > relevanceThreshold); + + // Only show references section if there are filtered references + return filteredRefs.length > 0 ? ( +
+ Show references +
+ {filteredRefs.map((ref, index) => ( +
+
Relevance: {Math.round(ref.score * 100)}%
+ +
600 + ? (expandedRefs[index] ? 'expanded' : 'truncated') + : '' + }`} + onClick={ref.text.length > 600 ? () => toggleReference(index) : undefined} + style={{ cursor: ref.text.length > 600 ? 'pointer' : 'default' }} + > + + {ref.text} + + {!expandedRefs[index] && ref.text.length > 600 && ( +
+ Show more +
+ )} + {expandedRefs[index] && ref.text.length > 600 && ( +
+ Show less +
+ )} +
+
+ ))} +
+
+ ) : null; + } catch (error) { + return null; + } + })() : null; + + return ( + <> + {content} + {references} + + ); + }; + + return ( +
+
+

Chat with Llama-3.1-Nemotron-Nano-4B-v1.1

+
+ + { + setServerIp(e.target.value); + }} + placeholder="Enter server IP" + /> +
+ {error && ( +
+ ⚠️ + {error} + +
+ )} +
+
+ + Use RAG +
+
+ + Use Reasoning +
+
+ + +
+
+
+ +
+ + +
+ {messages.map((message, index) => ( +
+ {renderMessage(message)} +
+ ))} + {isTyping && ( +
+
+ + + +
+
+ )} +
+
+ +
+ + +
+
+
+ ); +} + +export default App; diff --git a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css new file mode 100644 index 00000000..65dea5d7 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css @@ -0,0 +1,198 @@ +.file-ingestion { + padding: 10px; + background-color: #f5f5f5; + border-radius: 8px; + margin: 10px 0; +} + +.file-ingestion h2 { + margin: 0 0 10px 0; + color: #333; +} + +.file-upload-area { + padding: 15px; + border: 2px dashed #ccc; + border-radius: 8px; + text-align: center; + margin-bottom: 10px; + background-color: white; + transition: all 0.3s ease; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + min-height: 120px; +} + +.file-upload-area.dragging { + border-color: #76B900; + background-color: rgba(118, 185, 0, 0.05); +} + +.file-upload-area.uploading { + border-color: #e0e0e0; + background-color: #f8f8f8; + cursor: not-allowed; +} + +.file-upload-area.uploading * { + pointer-events: none; +} + +.file-upload-area.uploading .error, +.file-upload-area.uploading .error * { + pointer-events: auto; +} + +.upload-button { + display: inline-block; + padding: 10px 20px; + background-color: rgba(255, 255, 255, 0.1); + color: #333; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 14px; + transition: all 0.2s ease; + font-weight: 400; + letter-spacing: 0.01em; + margin: 0; +} + +.upload-button:hover { + background-color: #76B900; + color: white; +} + +.upload-button.disabled { + background-color: #e0e0e0; + color: #999; + cursor: not-allowed; + pointer-events: none; +} + +.file-upload-area.dragging .upload-button { + background-color: #76B900; + color: white; +} + +.progress-container { + margin: 0 auto; + width: 80%; + max-width: 300px; + position: relative; +} + +.progress-container:not(:empty) { + margin-top: 15px; +} + +.progress-bar { + width: 100%; + height: 4px; + background-color: rgba(118, 185, 0, 0.1); + border-radius: 2px; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background-color: #76B900; + transition: width 0.3s ease; +} + +.upload-status { + margin-top: 8px; + font-size: 14px; + color: #666; +} + +.error { + background-color: #fff3f3; + color: #d32f2f; + padding: 12px 35px 12px 20px; + border-radius: 4px; + margin: 15px auto 0; + border: 1px solid #ffcdd2; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + animation: slideIn 0.3s ease-out; + font-size: 0.9rem; + text-align: center; + width: 80%; + max-width: 300px; + display: flex; + align-items: center; + justify-content: center; + position: relative; + overflow: hidden; +} + +.error::after { + content: ''; + position: absolute; + bottom: 0; + left: 0; + width: 100%; + height: 3px; + background-color: #d32f2f; + animation: progress 3s linear forwards; +} + +.error-icon { + font-size: 1.1rem; +} + +.error .close-button { + position: absolute; + right: 8px; + top: 50%; + transform: translateY(-50%); + background: none; + border: none; + color: #d32f2f; + cursor: pointer; + padding: 4px; + font-size: 1.1rem; + opacity: 0.7; + transition: opacity 0.2s ease; +} + +.error .close-button:hover { + opacity: 1; +} + +@keyframes slideIn { + from { + transform: translateY(-10px); + opacity: 0; + } + to { + transform: translateY(0); + opacity: 1; + } +} + +@keyframes progress { + from { + width: 100%; + } + to { + width: 0%; + } +} + +@keyframes fadeOut { + from { + opacity: 1; + transform: translateY(0); + } + to { + opacity: 0; + transform: translateY(-10px); + } +} + +.error.fade-out { + animation: fadeOut 0.3s ease-out forwards; +} \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js new file mode 100644 index 00000000..3e63ff40 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js @@ -0,0 +1,295 @@ +import React, { useState, useEffect, useRef } from 'react'; +import './FileIngestion.css'; +import configLoader from '../config/config_loader'; + +function FileIngestion() { + const [isDragging, setIsDragging] = useState(false); + const [uploadStatus, setUploadStatus] = useState(''); + const [uploadProgress, setUploadProgress] = useState(0); + const [isUploading, setIsUploading] = useState(false); + const [currentJobId, setCurrentJobId] = useState(null); + const [error, setError] = useState(null); + const [appConfig, setAppConfig] = useState(null); + const fileInputRef = useRef(null); + const errorTimeoutRef = useRef(null); + + useEffect(() => { + // Load configuration + const loadConfig = async () => { + try { + const loadedConfig = await configLoader.getAppConfig(); + if (loadedConfig) { + setAppConfig(loadedConfig); + } + } catch (error) { + console.error('Error loading config:', error); + } + }; + loadConfig(); + }, []); + + // eslint-disable-next-line react-hooks/exhaustive-deps + useEffect(() => { + let eventSource; + if (currentJobId) { + // Use local RAG server for progress tracking + eventSource = new EventSource(`http://localhost:8001/api/upload/progress/${currentJobId}`); + + eventSource.onmessage = (event) => { + const data = JSON.parse(event.data); + console.log('Progress update:', data); + + if (data.type === 'processing') { + if (data.stage === 'progress') { + // Smoothly update progress + const targetProgress = data.progress_percent; + const currentProgress = uploadProgress; + + // If the jump is too large, animate it + if (Math.abs(targetProgress - currentProgress) > 5) { + const step = (targetProgress - currentProgress) / 5; + let current = currentProgress; + + const animate = () => { + current += step; + if ((step > 0 && current >= targetProgress) || + (step < 0 && current <= targetProgress)) { + setUploadProgress(targetProgress); + } else { + setUploadProgress(current); + requestAnimationFrame(animate); + } + }; + + requestAnimationFrame(animate); + } else { + setUploadProgress(targetProgress); + } + + // Update status message if provided + if (data.message) { + setUploadStatus(data.message); + } + } + } else if (data.type === 'complete') { + setUploadProgress(100); + setUploadStatus(appConfig?.ui?.components?.file_upload?.messages?.complete || 'Processing complete'); + eventSource.close(); + setCurrentJobId(null); + // Reset the file input + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + // Reset all states after a short delay + setTimeout(() => { + setUploadStatus(''); + setUploadProgress(0); + setIsUploading(false); + setIsDragging(false); + }, appConfig?.ui?.components?.file_upload?.timeouts?.status_reset || 2000); + } else if (data.type === 'error') { + setError(appConfig?.ui?.components?.file_upload?.messages?.error || 'Connection error. Please try again.'); + eventSource.close(); + setCurrentJobId(null); + // Reset the file input + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + // Reset all states after a short delay + setTimeout(() => { + setUploadStatus(''); + setUploadProgress(0); + setIsUploading(false); + setIsDragging(false); + }, appConfig?.ui?.components?.file_upload?.timeouts?.error_reset || 2000); + } + }; + + eventSource.onerror = () => { + console.error('EventSource failed'); + eventSource.close(); + setCurrentJobId(null); + // Reset the file input + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + setError(appConfig?.ui?.components?.file_upload?.messages?.error || 'Connection error. Please try again.'); + // Reset all states after a short delay + setTimeout(() => { + setUploadStatus(''); + setUploadProgress(0); + setIsUploading(false); + setIsDragging(false); + }, appConfig?.ui?.components?.file_upload?.timeouts?.error_reset || 2000); + }; + } + + return () => { + if (eventSource) { + eventSource.close(); + } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentJobId, appConfig]); + + // Clean up timeouts on unmount + useEffect(() => { + return () => { + if (errorTimeoutRef.current) { + clearTimeout(errorTimeoutRef.current); + } + }; + }, []); + + // Handle error auto-dismissal + useEffect(() => { + if (error) { + if (errorTimeoutRef.current) { + clearTimeout(errorTimeoutRef.current); + } + errorTimeoutRef.current = setTimeout(() => { + setError(null); + }, 3000); + } + }, [error]); + + const handleDragOver = (e) => { + e.preventDefault(); + if (!isUploading) { + setIsDragging(true); + } + }; + + const handleDragLeave = (e) => { + e.preventDefault(); + setIsDragging(false); + }; + + const uploadFiles = async (files) => { + try { + setIsUploading(true); + setUploadProgress(0); + setUploadStatus(appConfig?.ui?.components?.file_upload?.messages?.starting || 'Starting upload...'); + + const formData = new FormData(); + files.forEach(file => { + formData.append('files', file); + }); + + // Use local RAG server for file upload + const response = await fetch(`http://localhost:8001/api/upload`, { + method: 'POST', + body: formData, + }).catch(error => { + if (error.name === 'TypeError' && error.message.includes('Failed to fetch')) { + throw new Error('RAG server is not accessible. Please check if the RAG server is running on port 8001.'); + } + throw error; + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || 'Upload failed'); + } + + const result = await response.json(); + setCurrentJobId(result.job_id); + setUploadStatus(appConfig?.ui?.components?.file_upload?.messages?.processing || 'Processing files...'); + + } catch (error) { + console.error('Upload error:', error); + setError(error.message.includes('Server is not accessible') + ? 'Server is not accessible. Please check if the server is running and try again.' + : (appConfig?.ui?.components?.file_upload?.messages?.error || 'Connection error. Please try again.')); + setUploadProgress(0); + // Reset the file input on error + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + setTimeout(() => { + setUploadStatus(''); + setIsUploading(false); + }, appConfig?.ui?.components?.file_upload?.timeouts?.error_reset || 3000); + } + }; + + const handleDrop = async (e) => { + e.preventDefault(); + setIsDragging(false); + if (isUploading) return; + + const files = Array.from(e.dataTransfer.files); + if (files.length === 0) return; + await uploadFiles(files); + }; + + const handleFileSelect = async (e) => { + if (isUploading) return; + + const files = Array.from(e.target.files); + if (files.length === 0) return; + await uploadFiles(files); + }; + + return ( +
+ + +
+ {isUploading && !error && ( + <> +
+
+
+ {uploadStatus &&
{uploadStatus}
} + + )} +
+ {error && ( +
+ {error} + +
+ )} +
+ ); +} + +export default FileIngestion; \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/src/config/app_config.yaml b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml new file mode 100644 index 00000000..677708a0 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml @@ -0,0 +1,32 @@ +# UI Configuration +ui: + components: + file_upload: + max_file_size: 10485760 # 10MB in bytes + accepted_file_types: + - "application/pdf" + - "text/plain" + - "text/markdown" + - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + - "text/html" + timeouts: + status_reset: 3000 # milliseconds + error_reset: 3000 # milliseconds + messages: + starting: "Starting upload..." + processing: "Processing files..." + complete: "Processing complete" + error: "Connection error. Please try again." + + search: + placeholder: "Enter your search query..." + relevance_threshold: 0.3 # Adjusted from 0.5 to 0.3 for better results + + progress: + polling_interval: 1000 # milliseconds + + chat: + summary: + max_messages: 5 + system_prompt: "detailed thinking {status}" + rag_prefix: "Relevant information from knowledge base:\n" \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/src/config/config_loader.js b/community/chat-llama-nemotron/frontend/src/config/config_loader.js new file mode 100644 index 00000000..98c481e1 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/config/config_loader.js @@ -0,0 +1,87 @@ +import yaml from 'js-yaml'; + +class ConfigLoader { + constructor() { + if (ConfigLoader.instance) { + return ConfigLoader.instance; + } + ConfigLoader.instance = this; + + this.configs = {}; + this._serverIp = localStorage.getItem('serverIp') || ''; + + // Initialize RAG server URL + const ip = window.appConfig?.api?.ip || 'localhost'; + const port = process.env.REACT_APP_API_PORT || window.appConfig?.api?.port || '8001'; + this._ragServerUrl = `http://${ip}:${port}`; + console.log('RAG server URL:', this._ragServerUrl); + } + + set serverIp(ip) { + this._serverIp = ip; + localStorage.setItem('serverIp', ip); + } + + get serverIp() { + return this._serverIp; + } + + get api() { + const self = this; + return { + get proxy() { + return self._ragServerUrl; + }, + get llmServer() { + const ip = window.appConfig?.llm_proxy?.ip || 'localhost'; + const port = process.env.REACT_APP_LLM_PROXY_PORT || window.appConfig?.llm_proxy?.port || '8002'; + const url = `http://${ip}:${port}`; + return { + url, + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'X-LLM-IP': self._serverIp + } + }; + } + }; + } + + async loadConfig(configName) { + // Return cached config if available + if (this.configs[configName]) { + return this.configs[configName]; + } + + try { + const publicPath = `/config/${configName}.yaml`; + const response = await fetch(publicPath); + + if (!response.ok) { + throw new Error(`Failed to load config: ${configName}`); + } + + const yamlText = await response.text(); + const config = yaml.load(yamlText); + + if (!config) { + throw new Error('YAML parsing resulted in null or undefined'); + } + + this.configs[configName] = config; + return config; + } catch (error) { + console.error(`Error loading config ${configName}:`, error); + throw error; + } + } + + async getAppConfig() { + return this.loadConfig('app_config'); + } +} + +// Create a singleton instance +const configLoader = new ConfigLoader(); +export default configLoader; \ No newline at end of file diff --git a/community/chat-llama-nemotron/frontend/src/index.css b/community/chat-llama-nemotron/frontend/src/index.css new file mode 100644 index 00000000..c4b77667 --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/index.css @@ -0,0 +1,14 @@ +body { + margin: 0; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + background-color: #f5f5f5; +} + +code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; +} diff --git a/community/chat-llama-nemotron/frontend/src/index.js b/community/chat-llama-nemotron/frontend/src/index.js new file mode 100644 index 00000000..2cb1087e --- /dev/null +++ b/community/chat-llama-nemotron/frontend/src/index.js @@ -0,0 +1,11 @@ +import React from 'react'; +import ReactDOM from 'react-dom/client'; +import './index.css'; +import App from './App'; + +const root = ReactDOM.createRoot(document.getElementById('root')); +root.render( + + + +); From da7c6e09693537bd9c5ce44419a2a6c53073f01c Mon Sep 17 00:00:00 2001 From: Ekaterina Sirazitdinova Date: Fri, 13 Jun 2025 16:13:46 +0400 Subject: [PATCH 2/5] added spdx and referenced the demo in the community readme --- community/README.md | 6 +++++- .../config/agg_llama_nemotron_4b.yaml | 10 +++++++--- .../backend-dynamo/llm-proxy/config.yaml | 18 ++++++++++++++++++ .../backend-dynamo/llm-proxy/proxy.py | 18 ++++++++++++++++++ .../chat-llama-nemotron/backend-rag/src/app.py | 18 ++++++++++++++++++ .../backend-rag/src/config/app_config.yaml | 18 ++++++++++++++++++ .../backend-rag/src/config/config_loader.py | 18 ++++++++++++++++++ .../backend-rag/src/config/rag_config.yaml | 18 ++++++++++++++++++ .../backend-rag/src/rag_service.py | 18 ++++++++++++++++++ .../frontend/public/config/app_config.yaml | 18 ++++++++++++++++++ .../chat-llama-nemotron/frontend/src/App.css | 18 ++++++++++++++++++ .../chat-llama-nemotron/frontend/src/App.js | 18 ++++++++++++++++++ .../frontend/src/components/FileIngestion.css | 18 ++++++++++++++++++ .../frontend/src/components/FileIngestion.js | 18 ++++++++++++++++++ .../frontend/src/config/app_config.yaml | 17 +++++++++++++++++ .../frontend/src/config/config_loader.js | 18 ++++++++++++++++++ .../chat-llama-nemotron/frontend/src/index.css | 18 ++++++++++++++++++ .../chat-llama-nemotron/frontend/src/index.js | 18 ++++++++++++++++++ 18 files changed, 299 insertions(+), 4 deletions(-) diff --git a/community/README.md b/community/README.md index 52deb5f3..eacc0fb2 100644 --- a/community/README.md +++ b/community/README.md @@ -82,4 +82,8 @@ Community examples are sample code and deployments for RAG pipelines that are no * [AI Podcast Assistant](./ai-podcast-assistant/) - This example demonstrates a comprehensive workflow for processing podcast audio using the Phi-4-Multimodal LLM through NVIDIA NIM Microservices. It includes functionality for generating detailed notes from audio content, creating concise summaries, and translating both transcriptions and summaries into different languages. The implementation handles long audio files by automatically chunking them for efficient processing and preserves formatting during translation. \ No newline at end of file + This example demonstrates a comprehensive workflow for processing podcast audio using the Phi-4-Multimodal LLM through NVIDIA NIM Microservices. It includes functionality for generating detailed notes from audio content, creating concise summaries, and translating both transcriptions and summaries into different languages. The implementation handles long audio files by automatically chunking them for efficient processing and preserves formatting during translation. + + * [Chat with LLM](./chat-llama-nemotron/) + + This is a React-based conversational UI designed for interacting with a powerful local LLM. It incorporates RAG to enhance contextual understanding and is backed by an NVIDIA Dynamo inference server running the NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1 model. The setup enables low-latency, cloud-free AI assistant capabilities, with live document search and reasoning, all deployable on local or edge infrastructure. \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml b/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml index f0596d54..78dc5792 100644 --- a/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml +++ b/community/chat-llama-nemotron/backend-dynamo/config/agg_llama_nemotron_4b.yaml @@ -1,17 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Licensed under the8B Apache License, Version 2.0 (the "License"); +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + + Common: model: nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1 max-model-len: 131072 diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml index 248f2366..75ede742 100644 --- a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/config.yaml @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + # NVIDIA Dynamo Proxy Configuration # LLM Server Configuration diff --git a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py index eb47bf66..d5757bd1 100644 --- a/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py +++ b/community/chat-llama-nemotron/backend-dynamo/llm-proxy/proxy.py @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """ Simple NVIDIA Dynamo Proxy Server diff --git a/community/chat-llama-nemotron/backend-rag/src/app.py b/community/chat-llama-nemotron/backend-rag/src/app.py index 3bf30ec7..ca76916f 100644 --- a/community/chat-llama-nemotron/backend-rag/src/app.py +++ b/community/chat-llama-nemotron/backend-rag/src/app.py @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel diff --git a/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml b/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml index bf06e68f..b2a88164 100644 --- a/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml +++ b/community/chat-llama-nemotron/backend-rag/src/config/app_config.yaml @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + # FastAPI Application Configuration app: name: "RAG Service API" diff --git a/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py b/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py index b361d8b4..5e160a69 100644 --- a/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py +++ b/community/chat-llama-nemotron/backend-rag/src/config/config_loader.py @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import yaml import os from pathlib import Path diff --git a/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml b/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml index 6fda16a3..a177fc39 100644 --- a/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml +++ b/community/chat-llama-nemotron/backend-rag/src/config/rag_config.yaml @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + # RAG Service Configuration model: name: "all-MiniLM-L6-v2" diff --git a/community/chat-llama-nemotron/backend-rag/src/rag_service.py b/community/chat-llama-nemotron/backend-rag/src/rag_service.py index 3f3de097..2c2aa439 100644 --- a/community/chat-llama-nemotron/backend-rag/src/rag_service.py +++ b/community/chat-llama-nemotron/backend-rag/src/rag_service.py @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from typing import List, Dict, Any import faiss import numpy as np diff --git a/community/chat-llama-nemotron/frontend/public/config/app_config.yaml b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml index 7e327d66..4ef14342 100644 --- a/community/chat-llama-nemotron/frontend/public/config/app_config.yaml +++ b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml @@ -1,3 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + # Frontend Application Configuration app: name: "RAG Client" diff --git a/community/chat-llama-nemotron/frontend/src/App.css b/community/chat-llama-nemotron/frontend/src/App.css index ab775816..4979120e 100644 --- a/community/chat-llama-nemotron/frontend/src/App.css +++ b/community/chat-llama-nemotron/frontend/src/App.css @@ -1,3 +1,21 @@ +/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2023-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + + .App { text-align: center; min-height: 100vh; diff --git a/community/chat-llama-nemotron/frontend/src/App.js b/community/chat-llama-nemotron/frontend/src/App.js index f66ff0b9..83ee2e78 100644 --- a/community/chat-llama-nemotron/frontend/src/App.js +++ b/community/chat-llama-nemotron/frontend/src/App.js @@ -1,3 +1,21 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright (c) 2023-2025, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + import React, { useState, useRef, useEffect } from 'react'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; diff --git a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css index 65dea5d7..3d51c877 100644 --- a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css +++ b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.css @@ -1,3 +1,21 @@ +/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2023-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + + .file-ingestion { padding: 10px; background-color: #f5f5f5; diff --git a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js index 3e63ff40..bf2e174b 100644 --- a/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js +++ b/community/chat-llama-nemotron/frontend/src/components/FileIngestion.js @@ -1,3 +1,21 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright (c) 2023-2025, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + import React, { useState, useEffect, useRef } from 'react'; import './FileIngestion.css'; import configLoader from '../config/config_loader'; diff --git a/community/chat-llama-nemotron/frontend/src/config/app_config.yaml b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml index 677708a0..962990ed 100644 --- a/community/chat-llama-nemotron/frontend/src/config/app_config.yaml +++ b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml @@ -1,3 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # UI Configuration ui: components: diff --git a/community/chat-llama-nemotron/frontend/src/config/config_loader.js b/community/chat-llama-nemotron/frontend/src/config/config_loader.js index 98c481e1..245e8ef0 100644 --- a/community/chat-llama-nemotron/frontend/src/config/config_loader.js +++ b/community/chat-llama-nemotron/frontend/src/config/config_loader.js @@ -1,3 +1,21 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright (c) 2023-2025, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + import yaml from 'js-yaml'; class ConfigLoader { diff --git a/community/chat-llama-nemotron/frontend/src/index.css b/community/chat-llama-nemotron/frontend/src/index.css index c4b77667..73169ce1 100644 --- a/community/chat-llama-nemotron/frontend/src/index.css +++ b/community/chat-llama-nemotron/frontend/src/index.css @@ -1,3 +1,21 @@ +/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2023-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + + body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', diff --git a/community/chat-llama-nemotron/frontend/src/index.js b/community/chat-llama-nemotron/frontend/src/index.js index 2cb1087e..c639ce01 100644 --- a/community/chat-llama-nemotron/frontend/src/index.js +++ b/community/chat-llama-nemotron/frontend/src/index.js @@ -1,3 +1,21 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright (c) 2023-2025, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + import React from 'react'; import ReactDOM from 'react-dom/client'; import './index.css'; From 2308543e936f5eb8527a3501cd1d25c028e4a1bf Mon Sep 17 00:00:00 2001 From: Ekaterina Sirazitdinova Date: Fri, 13 Jun 2025 16:15:32 +0400 Subject: [PATCH 3/5] minor formatting fix --- community/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/README.md b/community/README.md index eacc0fb2..49b50e60 100644 --- a/community/README.md +++ b/community/README.md @@ -84,6 +84,6 @@ Community examples are sample code and deployments for RAG pipelines that are no This example demonstrates a comprehensive workflow for processing podcast audio using the Phi-4-Multimodal LLM through NVIDIA NIM Microservices. It includes functionality for generating detailed notes from audio content, creating concise summaries, and translating both transcriptions and summaries into different languages. The implementation handles long audio files by automatically chunking them for efficient processing and preserves formatting during translation. - * [Chat with LLM](./chat-llama-nemotron/) +* [Chat with LLM](./chat-llama-nemotron/) This is a React-based conversational UI designed for interacting with a powerful local LLM. It incorporates RAG to enhance contextual understanding and is backed by an NVIDIA Dynamo inference server running the NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1 model. The setup enables low-latency, cloud-free AI assistant capabilities, with live document search and reasoning, all deployable on local or edge infrastructure. \ No newline at end of file From 100e90457be1db5f6f81538d831bfa89bd96b913 Mon Sep 17 00:00:00 2001 From: Ekaterina Sirazitdinova Date: Fri, 13 Jun 2025 16:16:40 +0400 Subject: [PATCH 4/5] made the title more descriptive --- community/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/README.md b/community/README.md index 49b50e60..5a074cc2 100644 --- a/community/README.md +++ b/community/README.md @@ -84,6 +84,6 @@ Community examples are sample code and deployments for RAG pipelines that are no This example demonstrates a comprehensive workflow for processing podcast audio using the Phi-4-Multimodal LLM through NVIDIA NIM Microservices. It includes functionality for generating detailed notes from audio content, creating concise summaries, and translating both transcriptions and summaries into different languages. The implementation handles long audio files by automatically chunking them for efficient processing and preserves formatting during translation. -* [Chat with LLM](./chat-llama-nemotron/) +* [Chat with LLM Llama 3.1 Nemotron Nano 4B](./chat-llama-nemotron/) This is a React-based conversational UI designed for interacting with a powerful local LLM. It incorporates RAG to enhance contextual understanding and is backed by an NVIDIA Dynamo inference server running the NVIDIA Llama-3.1-Nemotron-Nano-4B-v1.1 model. The setup enables low-latency, cloud-free AI assistant capabilities, with live document search and reasoning, all deployable on local or edge infrastructure. \ No newline at end of file From b74e78ba69a9fa55372b03ecb0651e5fbf65a90e Mon Sep 17 00:00:00 2001 From: Ekaterina Sirazitdinova Date: Fri, 4 Jul 2025 17:02:01 +0400 Subject: [PATCH 5/5] minor improvements --- .../backend-rag/requirements.txt | 19 ++++++- .../backend-rag/src/rag_service.py | 53 ++++++++++++++----- .../frontend/public/config/app_config.yaml | 2 +- .../frontend/public/robots.txt | 3 -- .../chat-llama-nemotron/frontend/src/App.css | 46 ++++++++++++++++ .../chat-llama-nemotron/frontend/src/App.js | 31 +++++++---- .../frontend/src/config/app_config.yaml | 5 +- 7 files changed, 129 insertions(+), 30 deletions(-) delete mode 100644 community/chat-llama-nemotron/frontend/public/robots.txt diff --git a/community/chat-llama-nemotron/backend-rag/requirements.txt b/community/chat-llama-nemotron/backend-rag/requirements.txt index 85fa9dc7..d2c65920 100644 --- a/community/chat-llama-nemotron/backend-rag/requirements.txt +++ b/community/chat-llama-nemotron/backend-rag/requirements.txt @@ -1,11 +1,28 @@ +# Web Framework and API fastapi==0.104.1 uvicorn==0.24.0 python-multipart==0.0.6 -sentence-transformers==2.2.2 + +# Data Validation +pydantic==2.5.0 + +# RAG and ML Dependencies +sentence-transformers>=2.5.0 faiss-cpu==1.7.4 +numpy==1.24.3 +torch==2.1.0 + +# Text Processing +langchain>=0.1.0 +langchain-community>=0.0.10 +langchain-text-splitters>=0.0.1 + +# File Processing PyPDF2==3.0.1 python-docx==1.0.1 beautifulsoup4==4.12.2 markdown==3.5.1 + +# Utilities tqdm==4.66.1 PyYAML==6.0.1 \ No newline at end of file diff --git a/community/chat-llama-nemotron/backend-rag/src/rag_service.py b/community/chat-llama-nemotron/backend-rag/src/rag_service.py index 2c2aa439..c82c5cad 100644 --- a/community/chat-llama-nemotron/backend-rag/src/rag_service.py +++ b/community/chat-llama-nemotron/backend-rag/src/rag_service.py @@ -27,7 +27,7 @@ from multiprocessing import Queue from config.config_loader import config_loader import torch -from langchain.text_splitter import RecursiveCharacterTextSplitter +# Simple text splitter implementation # Load configurations logger = logging.getLogger(__name__) @@ -59,7 +59,7 @@ def __init__(self): # Initialize the model with proper configuration self.model = SentenceTransformer( model_config['name'], - device='cuda' if torch.cuda.is_available() else 'cpu' + device='cpu' # Force CPU to avoid MPS/CUDA issues ) self.query_instruction = search_config.get('query_instruction', "Represent this sentence for searching relevant passages: ") @@ -74,15 +74,8 @@ def __init__(self): self.max_workers = processing_config['max_workers'] # Get max workers from config self.search_multiplier = search_config.get('deduplication_multiplier', 2) # Get search multiplier from config - # Initialize text splitter - self.text_splitter = RecursiveCharacterTextSplitter( - chunk_size=self.chunk_size, - chunk_overlap=self.chunk_overlap, - length_function=len, - separators=text_config.get('separators', ["\n\n", "\n", ". ", "! ", "? ", ", ", " "]), - is_separator_regex=False, - keep_separator=False - ) + # Initialize text splitter configuration + self.separators = text_config.get('separators', ["\n\n", "\n", ". ", "! ", "? ", ", ", " "]) # Create index if it doesn't exist if not self.index: @@ -98,12 +91,44 @@ def create_index(self): logger.info("Cleared documents and metadata lists") def chunk_text(self, text: str) -> List[str]: - """Split text into overlapping chunks using RecursiveCharacterTextSplitter""" + """Split text into overlapping chunks using simple implementation""" # Clean the text text = re.sub(r'\s+', ' ', text).strip() - # Use the langchain text splitter - chunks = self.text_splitter.split_text(text) + if len(text) <= self.chunk_size: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + # Find the end of the current chunk + end = start + self.chunk_size + + if end >= len(text): + # Last chunk + chunk = text[start:].strip() + if chunk: + chunks.append(chunk) + break + + # Try to find a good break point + best_break = end + for separator in self.separators: + # Look for the separator in the overlap region + overlap_start = max(start, end - self.chunk_overlap) + pos = text.rfind(separator, overlap_start, end) + if pos > start: + best_break = pos + len(separator) + break + + # Extract the chunk + chunk = text[start:best_break].strip() + if chunk: + chunks.append(chunk) + + # Move to next chunk with overlap + start = max(start + 1, best_break - self.chunk_overlap) logger.info(f"Created {len(chunks)} chunks from text") return chunks diff --git a/community/chat-llama-nemotron/frontend/public/config/app_config.yaml b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml index 4ef14342..8768dbb9 100644 --- a/community/chat-llama-nemotron/frontend/public/config/app_config.yaml +++ b/community/chat-llama-nemotron/frontend/public/config/app_config.yaml @@ -48,7 +48,7 @@ llm: allow_headers: ["Content-Type", "Accept", "Origin"] model: name: "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1" - max_tokens: 32768 + max_tokens: 28000 temperature: 0.6 top_p: 0.95 diff --git a/community/chat-llama-nemotron/frontend/public/robots.txt b/community/chat-llama-nemotron/frontend/public/robots.txt deleted file mode 100644 index e9e57dc4..00000000 --- a/community/chat-llama-nemotron/frontend/public/robots.txt +++ /dev/null @@ -1,3 +0,0 @@ -# https://www.robotstxt.org/robotstxt.html -User-agent: * -Disallow: diff --git a/community/chat-llama-nemotron/frontend/src/App.css b/community/chat-llama-nemotron/frontend/src/App.css index 4979120e..59f29ac3 100644 --- a/community/chat-llama-nemotron/frontend/src/App.css +++ b/community/chat-llama-nemotron/frontend/src/App.css @@ -69,6 +69,13 @@ letter-spacing: 0.01em; } +.ip-input-container { + display: flex; + align-items: center; + flex: 1; + gap: 0.5rem; +} + .server-ip-input input { flex: 1; padding: 0.75rem 1rem; @@ -99,6 +106,34 @@ background-color: rgba(255, 255, 255, 0.12); } +.toggle-ip-visibility { + padding: 0.75rem; + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 6px; + background-color: rgba(255, 255, 255, 0.1); + color: rgba(255, 255, 255, 0.9); + cursor: pointer; + font-size: 1rem; + transition: all 0.2s ease; + display: flex; + align-items: center; + justify-content: center; + min-width: 44px; + height: 44px; + box-sizing: border-box; +} + +.toggle-ip-visibility:hover { + background-color: rgba(255, 255, 255, 0.15); + border-color: rgba(255, 255, 255, 0.3); +} + +.toggle-ip-visibility:focus { + outline: none; + border-color: #76B900; + box-shadow: 0 0 0 2px rgba(118, 185, 0, 0.25); +} + .controls { margin: 0; display: flex; @@ -457,6 +492,17 @@ input:checked + .slider:before { min-width: unset; } + .ip-input-container { + flex-direction: row; + gap: 0.5rem; + } + + .toggle-ip-visibility { + min-width: 44px; + height: 44px; + flex-shrink: 0; + } + .rag-toggle { width: 100%; height: 40px; diff --git a/community/chat-llama-nemotron/frontend/src/App.js b/community/chat-llama-nemotron/frontend/src/App.js index 83ee2e78..2d62da63 100644 --- a/community/chat-llama-nemotron/frontend/src/App.js +++ b/community/chat-llama-nemotron/frontend/src/App.js @@ -37,6 +37,7 @@ function App() { const savedIp = localStorage.getItem('serverIp') || ''; return savedIp; }); + const [showIp, setShowIp] = useState(false); const [error, setError] = useState(null); const messagesEndRef = useRef(null); const errorTimeoutRef = useRef(null); @@ -322,7 +323,7 @@ function App() { model: appConfig?.llm?.model?.name || "nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1", messages: contextMessages, stream: false, - max_tokens: appConfig?.llm?.model?.max_tokens || 32768, + max_tokens: appConfig?.llm?.model?.max_tokens || 28000, temperature: appConfig?.llm?.model?.temperature || 0.6, top_p: appConfig?.llm?.model?.top_p || 0.95 }), @@ -530,15 +531,25 @@ function App() {

Chat with Llama-3.1-Nemotron-Nano-4B-v1.1

- { - setServerIp(e.target.value); - }} - placeholder="Enter server IP" - /> +
+ { + setServerIp(e.target.value); + }} + placeholder="Enter server IP" + /> + +
{error && (
diff --git a/community/chat-llama-nemotron/frontend/src/config/app_config.yaml b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml index 962990ed..d4374e28 100644 --- a/community/chat-llama-nemotron/frontend/src/config/app_config.yaml +++ b/community/chat-llama-nemotron/frontend/src/config/app_config.yaml @@ -46,4 +46,7 @@ ui: summary: max_messages: 5 system_prompt: "detailed thinking {status}" - rag_prefix: "Relevant information from knowledge base:\n" \ No newline at end of file + rag_prefix: "Relevant information from knowledge base:\n" + context: + system_prompt: "detailed thinking {status}" + rag_prefix: "Relevant information from knowledge base:\n"