From fe400fbc95c0a44d17460003e07346a3292bc0f7 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Wed, 5 Feb 2025 09:35:12 -0800 Subject: [PATCH 01/23] feat(examples): add live-search-bot implementation Add a new example demonstrating a conversational AI bot that provides real-time news and information using Google's Gemini model and search capabilities. Key features: - Real-time voice interaction using Daily.co - Latest news retrieval using Google Search API - Natural conversation with Gemini AI model - Voice synthesis for bot responses - Voice activity detection for smooth interaction Project structure: - Configuration settings and environment variables - Daily.co service integration - Main application pipeline setup - Logging utilities - Comprehensive README with setup instructions Dependencies: - Core: aiohttp, fastapi, pydantic, websockets - AI/ML: google-generativeai, pipecat-ai[google] - Audio: pyloudnorm, silero-vad, soxr - Services: daily-python --- live-search-bot/.env.example | 8 ++ live-search-bot/README.md | 90 ++++++++++++++++ live-search-bot/requirements.txt | 90 ++++++++++++++++ live-search-bot/src/config/settings.py | 15 +++ live-search-bot/src/main.py | 97 ++++++++++++++++++ live-search-bot/src/services/daily.py | 79 ++++++++++++++ live-search-bot/src/utils/logger.py | 7 ++ requirements.txt | 136 +++++++++++++++++++++++++ 8 files changed, 522 insertions(+) create mode 100644 live-search-bot/.env.example create mode 100644 live-search-bot/README.md create mode 100644 live-search-bot/requirements.txt create mode 100644 live-search-bot/src/config/settings.py create mode 100644 live-search-bot/src/main.py create mode 100644 live-search-bot/src/services/daily.py create mode 100644 live-search-bot/src/utils/logger.py create mode 100644 requirements.txt diff --git a/live-search-bot/.env.example b/live-search-bot/.env.example new file mode 100644 index 00000000..9ec027f2 --- /dev/null +++ b/live-search-bot/.env.example @@ -0,0 +1,8 @@ +# Might need to replace w Hyperbolic's API Keys / Credentials + +# Daily.co API credentials +DAILY_API_KEY=your_daily_api_key_here +DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room + +# Google Gemini API credentials +GEMINI_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/live-search-bot/README.md b/live-search-bot/README.md new file mode 100644 index 00000000..e3009046 --- /dev/null +++ b/live-search-bot/README.md @@ -0,0 +1,90 @@ +# Gemini Search Bot + +A conversational AI bot that provides the latest news and information using Google's Gemini model and search capabilities. + +## Features + +- Real-time voice interaction using Daily.co +- Latest news retrieval using Google Search API +- Natural conversation with Gemini AI model +- Voice synthesis for bot responses +- Voice activity detection for smooth interaction + +## Prerequisites + +- Python 3.8+ +- A Daily.co API key +- A Google Gemini API key + +## Setup + +1. Clone the repository +2. Create a virtual environment: + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +3. Install dependencies: + ```bash + pip install -r requirements.txt + ``` +4. Copy `.env.example` to `.env` and fill in your API keys: + ```bash + cp .env.example .env + ``` + +## Environment Variables + +Create a `.env` file with the following variables: + +``` +# Required +DAILY_API_KEY=your_daily_api_key +GEMINI_API_KEY=your_gemini_api_key + +# Optional +DAILY_SAMPLE_ROOM_URL=your_daily_room_url # URL of an existing Daily room +DAILY_API_URL=https://api.daily.co/v1 # Custom Daily API URL +``` + +## Usage + +Run the bot with default settings (will create a new Daily room): +```bash +python src/main.py +``` + +Or specify an existing Daily room: +```bash +python src/main.py --url https://your-domain.daily.co/room-name +``` + +Command line options: +- `-u, --url`: URL of the Daily room to join +- `-k, --apikey`: Daily API Key (can also be set in .env) + +The bot will: +1. Connect to the specified Daily room (or create a new one) +2. Print the room URL +3. Wait for a participant to join +4. Start the conversation with news-related queries + +## Project Structure + +``` +gemini-search-bot/ +├── src/ +│ ├── config/ +│ │ └── settings.py # Configuration settings +│ ├── services/ +│ │ └── daily.py # Daily.co service setup +│ ├── utils/ +│ │ └── logger.py # Logging configuration +│ └── main.py # Main application +├── requirements.txt # Python dependencies +└── README.md # This file +``` + +## License + +BSD 2-Clause License \ No newline at end of file diff --git a/live-search-bot/requirements.txt b/live-search-bot/requirements.txt new file mode 100644 index 00000000..9f298cc5 --- /dev/null +++ b/live-search-bot/requirements.txt @@ -0,0 +1,90 @@ +# Core Dependencies +aiohttp>=3.8.0 +fastapi>=0.68.0 +loguru>=0.6.0 +pydantic>=2.0.0 +python-dotenv>=0.19.0 +websockets>=10.0 + +# AI and Machine Learning +anthropic>=0.3.0 +google-generativeai>=0.2.0 +langchain>=0.0.200 +openai>=1.0.0 +openpipe>=0.1.0 +pipecat-ai[google]>=0.1.0 +together>=0.1.0 +transformers>=4.30.0 + +# Audio Processing +azure-cognitiveservices-speech>=1.25.0 +deepgram-sdk>=2.3.0 +faster-whisper>=0.5.0 +google-cloud-texttospeech>=2.12.0 +lmnt>=0.1.0 +pyaudio>=0.2.11 +pyloudnorm>=0.1.0 +pyht>=0.1.0 +silero-vad>=0.3.0 +soxr>=0.3.0 + +# Video/Image Processing +Pillow>=9.0.0 + +# Communication Services +daily>=0.7.0 +daily-python>=0.5.0 +fal-client>=0.5.0 +livekit>=0.8.0 + +# Utils +boto3>=1.26.0 +numpy>=1.21.0 +python-dateutil>=2.8.2 + + +# # If buggy, remove versions and try again (Carl - 2025 Feb 10) + +# # Core Dependencies +# aiohttp +# fastapi +# loguru +# pydantic +# python-dotenv +# websockets + +# # AI and Machine Learning +# anthropic +# google-generativeai +# langchain +# openai +# openpipe +# pipecat-ai[google] +# together +# transformers + +# # Audio Processing +# azure-cognitiveservices-speech +# deepgram-sdk +# faster-whisper +# google-cloud-texttospeech +# lmnt +# pyaudio +# pyloudnorm +# pyht +# silero-vad +# soxr + +# # Video/Image Processing +# Pillow + +# # Communication Services +# daily +# daily-python +# fal-client +# livekit + +# # Utils +# boto3 +# numpy +# python-dateutil diff --git a/live-search-bot/src/config/settings.py b/live-search-bot/src/config/settings.py new file mode 100644 index 00000000..acea64ca --- /dev/null +++ b/live-search-bot/src/config/settings.py @@ -0,0 +1,15 @@ +# Function handlers for the LLM +TOOLS = [{"google_search": {}}] + +SYSTEM_INSTRUCTION = """ +You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting. + +Always use the google search API to retrieve the latest news. You must also use it to check which day is today. + +You can: +- Use the Google search API to check the current date. +- Provide the most recent and relevant news from any place by using the google search API. +- Answer any questions the user may have, ensuring your responses are accurate and concise. + +Start each interaction by asking the user about which place they would like to know the information. +""" \ No newline at end of file diff --git a/live-search-bot/src/main.py b/live-search-bot/src/main.py new file mode 100644 index 00000000..8b88daa8 --- /dev/null +++ b/live-search-bot/src/main.py @@ -0,0 +1,97 @@ +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.gemini_multimodal_live.gemini import ( + GeminiMultimodalLiveLLMService, +) +from pipecat.transports.services.daily import DailyParams, DailyTransport + +from config.settings import SYSTEM_INSTRUCTION, TOOLS +from services.daily import configure +from utils.logger import setup_logger + + +load_dotenv(override=True) +setup_logger() + + +async def main(): + """Main application entry point.""" + async with aiohttp.ClientSession() as session: + # Configure Daily room + room_url, token = await configure(session) + + # Set up Daily transport + transport = DailyTransport( + room_url, + token, + "Latest news!", + DailyParams( + audio_out_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + vad_audio_passthrough=True, + ), + ) + + # Initialize Gemini model + llm = GeminiMultimodalLiveLLMService( + api_key=os.getenv("GEMINI_API_KEY"), + voice_id="Puck", # Available voices: Aoede, Charon, Fenrir, Kore, Puck + transcribe_user_audio=True, + transcribe_model_audio=True, + system_instruction=SYSTEM_INSTRUCTION, + tools=TOOLS, + ) + + # Set up conversation context + context = OpenAILLMContext( + [ + { + "role": "user", + "content": ( + "Start by greeting the user warmly, introducing yourself, " + "and mentioning the current day. Be friendly and engaging " + "to set a positive tone for the interaction." + ), + } + ], + ) + context_aggregator = llm.create_context_aggregator(context) + + # Create pipeline + pipeline = Pipeline( + [ + transport.input(), # Transport user input + context_aggregator.user(), # User responses + llm, # LLM processing + transport.output(), # Transport bot output + context_aggregator.assistant(), # Assistant responses + ] + ) + + # Set up pipeline task + task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True)) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + """Handle first participant joining the room.""" + await transport.capture_participant_transcription(participant["id"]) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + # Run the pipeline + runner = PipelineRunner() + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/live-search-bot/src/services/daily.py b/live-search-bot/src/services/daily.py new file mode 100644 index 00000000..0b0954be --- /dev/null +++ b/live-search-bot/src/services/daily.py @@ -0,0 +1,79 @@ +import argparse +import os +from typing import Tuple + +import aiohttp +from loguru import logger +from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper + + +async def configure(session: aiohttp.ClientSession) -> Tuple[str, str]: + """Configure Daily room and return URL and token. + + Args: + session: aiohttp client session + + Returns: + Tuple containing room URL and token + """ + url, token, _ = await configure_with_args(session) + return url, token + + +async def configure_with_args( + session: aiohttp.ClientSession, + parser: argparse.ArgumentParser | None = None +) -> Tuple[str, str, argparse.Namespace]: + """Configure Daily room with command line arguments. + + Args: + session: aiohttp client session + parser: Optional argument parser to extend + + Returns: + Tuple containing room URL, token and parsed args + """ + if not parser: + parser = argparse.ArgumentParser(description="Daily AI News Bot") + parser.add_argument( + "-u", "--url", + type=str, + required=False, + help="URL of the Daily room to join" + ) + parser.add_argument( + "-k", "--apikey", + type=str, + required=False, + help="Daily API Key (needed to create an owner token)" + ) + + args, _ = parser.parse_known_args() + + url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") + key = args.apikey or os.getenv("DAILY_API_KEY") + + if not url: + raise ValueError( + "No Daily room specified. Use -u/--url option or set " + "DAILY_SAMPLE_ROOM_URL in your environment." + ) + + if not key: + raise ValueError( + "No Daily API key specified. Use -k/--apikey option or set " + "DAILY_API_KEY in your environment." + ) + + daily_rest_helper = DailyRESTHelper( + daily_api_key=key, + daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), + aiohttp_session=session, + ) + + # Create token with 1 hour expiration + expiry_time = 60 * 60 + token = await daily_rest_helper.get_token(url, expiry_time) + logger.info(f"Created token for room: {url}") + + return url, token, args \ No newline at end of file diff --git a/live-search-bot/src/utils/logger.py b/live-search-bot/src/utils/logger.py new file mode 100644 index 00000000..575a04fb --- /dev/null +++ b/live-search-bot/src/utils/logger.py @@ -0,0 +1,7 @@ +import sys +from loguru import logger + +def setup_logger(): + """Configure the logger settings.""" + logger.remove(0) + logger.add(sys.stderr, level="DEBUG") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..58230704 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,136 @@ +aenum==3.1.15 +aiofiles==24.1.0 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anthropic==0.45.2 +anyio==4.8.0 +arrow==0.15.2 +attrs==24.3.0 +av==14.1.0 +azure-cognitiveservices-speech==1.42.0 +boto3==1.36.13 +botocore==1.36.13 +cachetools==5.5.1 +certifi==2025.1.31 +charset-normalizer==3.4.1 +Click==7.0 +colorama==0.4.6 +coloredlogs==15.0.1 +ctranslate2==4.5.0 +cursor==1.3.5 +daily==0.2.1 +daily-python==0.14.2 +dataclasses-json==0.6.7 +deepgram-sdk==3.9.0 +deprecation==2.1.0 +distro==1.9.0 +fal_client==0.5.8 +fastapi==0.115.8 +faster-whisper==1.1.1 +filelock==3.17.0 +flatbuffers==25.1.24 +frozenlist==1.5.0 +fsspec==2025.2.0 +future==1.0.0 +google-ai-generativelanguage==0.6.15 +google-api-core==2.24.1 +google-api-python-client==1.7.11 +google-auth==2.38.0 +google-auth-httplib2==0.0.3 +google-auth-oauthlib==0.4.1 +google-cloud-texttospeech==2.24.0 +google-generativeai==0.8.4 +googleapis-common-protos==1.66.0 +grpcio==1.70.0 +grpcio-status==1.70.0 +gspread==3.1.0 +h11==0.14.0 +halo==0.0.28 +httpcore==1.0.7 +httplib2==0.22.0 +httpx==0.27.2 +httpx-sse==0.4.0 +huggingface-hub==0.28.1 +humanfriendly==10.0 +idna==3.10 +Jinja2==3.1.5 +jiter==0.8.2 +jmespath==1.0.1 +jsonpatch==1.33 +jsonpointer==3.0.0 +langchain==0.3.17 +langchain-core==0.3.33 +langchain-text-splitters==0.3.5 +langsmith==0.3.5 +livekit==0.19.1 +lmnt==1.1.7 +log-symbols==0.0.14 +loguru==0.7.3 +Markdown==3.7 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +mpmath==1.3.0 +multidict==6.1.0 +mypy-extensions==1.0.0 +networkx==3.4.2 +numpy==1.26.4 +oauthlib==3.2.2 +onnxruntime==1.20.1 +openai==1.60.2 +openpipe==4.45.0 +orjson==3.10.15 +packaging==24.2 +pillow==11.1.0 +pipecat-ai==0.0.54 +propcache==0.2.1 +proto-plus==1.26.0 +protobuf==5.29.3 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +PyAudio==0.2.14 +pydantic==2.10.6 +pydantic_core==2.27.2 +pyht==0.1.12 +pyloudnorm==0.1.1 +pyparsing==3.2.1 +python-dateutil==2.8.0 +python-dotenv==1.0.1 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rsa==4.9 +s3transfer==0.11.2 +safetensors==0.5.2 +scipy==1.15.1 +setuptools==75.8.0 +silero-vad==5.1.2 +six==1.17.0 +slackclient==2.2.1 +sniffio==1.3.1 +soxr==0.5.0.post1 +spinners==0.0.24 +SQLAlchemy==2.0.37 +sseclient-py==1.7.2 +starlette==0.45.3 +sympy==1.13.1 +tenacity==9.0.0 +termcolor==2.5.0 +together==0.2.4 +tokenizers==0.21.0 +torch==2.6.0 +torchaudio==2.6.0 +tqdm==4.67.1 +transformers==4.48.2 +typer==0.0.9 +types-protobuf==5.29.1.20241207 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +uritemplate==3.0.1 +urllib3==2.3.0 +websockets==13.1 +yarl==1.18.3 +zstandard==0.23.0 From ea26ee250bf51d93a55a98fed72aed15cad3406a Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:58:35 -0800 Subject: [PATCH 02/23] bounty (1/2) feat: Add screen-share capability. refact: simplify all utility files -> runner.py extra: welcomes user to hyperbolic labs on entry. --- live-search-bot/src/config/settings.py | 15 --- live-search-bot/src/services/daily.py | 79 ------------ live-search-bot/src/utils/logger.py | 7 -- .../.env.example | 2 +- multimodal-gpu_marketplace-bot/README.md | 60 +++++++++ multimodal-gpu_marketplace-bot/main.py | 116 ++++++++++++++++++ .../requirements.txt | 0 multimodal-gpu_marketplace-bot/runner.py | 77 ++++++++++++ multimodal-live_search-bot/.env.example | 8 ++ .../README.md | 0 .../main.py | 65 ++++++---- multimodal-live_search-bot/requirements.txt | 90 ++++++++++++++ multimodal-live_search-bot/runner.py | 77 ++++++++++++ multimodal-video-bot/.env.example | 8 ++ multimodal-video-bot/README.md | 60 +++++++++ multimodal-video-bot/main.py | 116 ++++++++++++++++++ multimodal-video-bot/requirements.txt | 90 ++++++++++++++ multimodal-video-bot/runner.py | 77 ++++++++++++ 18 files changed, 819 insertions(+), 128 deletions(-) delete mode 100644 live-search-bot/src/config/settings.py delete mode 100644 live-search-bot/src/services/daily.py delete mode 100644 live-search-bot/src/utils/logger.py rename {live-search-bot => multimodal-gpu_marketplace-bot}/.env.example (86%) create mode 100644 multimodal-gpu_marketplace-bot/README.md create mode 100644 multimodal-gpu_marketplace-bot/main.py rename {live-search-bot => multimodal-gpu_marketplace-bot}/requirements.txt (100%) create mode 100644 multimodal-gpu_marketplace-bot/runner.py create mode 100644 multimodal-live_search-bot/.env.example rename {live-search-bot => multimodal-live_search-bot}/README.md (100%) rename {live-search-bot/src => multimodal-live_search-bot}/main.py (54%) create mode 100644 multimodal-live_search-bot/requirements.txt create mode 100644 multimodal-live_search-bot/runner.py create mode 100644 multimodal-video-bot/.env.example create mode 100644 multimodal-video-bot/README.md create mode 100644 multimodal-video-bot/main.py create mode 100644 multimodal-video-bot/requirements.txt create mode 100644 multimodal-video-bot/runner.py diff --git a/live-search-bot/src/config/settings.py b/live-search-bot/src/config/settings.py deleted file mode 100644 index acea64ca..00000000 --- a/live-search-bot/src/config/settings.py +++ /dev/null @@ -1,15 +0,0 @@ -# Function handlers for the LLM -TOOLS = [{"google_search": {}}] - -SYSTEM_INSTRUCTION = """ -You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting. - -Always use the google search API to retrieve the latest news. You must also use it to check which day is today. - -You can: -- Use the Google search API to check the current date. -- Provide the most recent and relevant news from any place by using the google search API. -- Answer any questions the user may have, ensuring your responses are accurate and concise. - -Start each interaction by asking the user about which place they would like to know the information. -""" \ No newline at end of file diff --git a/live-search-bot/src/services/daily.py b/live-search-bot/src/services/daily.py deleted file mode 100644 index 0b0954be..00000000 --- a/live-search-bot/src/services/daily.py +++ /dev/null @@ -1,79 +0,0 @@ -import argparse -import os -from typing import Tuple - -import aiohttp -from loguru import logger -from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper - - -async def configure(session: aiohttp.ClientSession) -> Tuple[str, str]: - """Configure Daily room and return URL and token. - - Args: - session: aiohttp client session - - Returns: - Tuple containing room URL and token - """ - url, token, _ = await configure_with_args(session) - return url, token - - -async def configure_with_args( - session: aiohttp.ClientSession, - parser: argparse.ArgumentParser | None = None -) -> Tuple[str, str, argparse.Namespace]: - """Configure Daily room with command line arguments. - - Args: - session: aiohttp client session - parser: Optional argument parser to extend - - Returns: - Tuple containing room URL, token and parsed args - """ - if not parser: - parser = argparse.ArgumentParser(description="Daily AI News Bot") - parser.add_argument( - "-u", "--url", - type=str, - required=False, - help="URL of the Daily room to join" - ) - parser.add_argument( - "-k", "--apikey", - type=str, - required=False, - help="Daily API Key (needed to create an owner token)" - ) - - args, _ = parser.parse_known_args() - - url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") - key = args.apikey or os.getenv("DAILY_API_KEY") - - if not url: - raise ValueError( - "No Daily room specified. Use -u/--url option or set " - "DAILY_SAMPLE_ROOM_URL in your environment." - ) - - if not key: - raise ValueError( - "No Daily API key specified. Use -k/--apikey option or set " - "DAILY_API_KEY in your environment." - ) - - daily_rest_helper = DailyRESTHelper( - daily_api_key=key, - daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), - aiohttp_session=session, - ) - - # Create token with 1 hour expiration - expiry_time = 60 * 60 - token = await daily_rest_helper.get_token(url, expiry_time) - logger.info(f"Created token for room: {url}") - - return url, token, args \ No newline at end of file diff --git a/live-search-bot/src/utils/logger.py b/live-search-bot/src/utils/logger.py deleted file mode 100644 index 575a04fb..00000000 --- a/live-search-bot/src/utils/logger.py +++ /dev/null @@ -1,7 +0,0 @@ -import sys -from loguru import logger - -def setup_logger(): - """Configure the logger settings.""" - logger.remove(0) - logger.add(sys.stderr, level="DEBUG") \ No newline at end of file diff --git a/live-search-bot/.env.example b/multimodal-gpu_marketplace-bot/.env.example similarity index 86% rename from live-search-bot/.env.example rename to multimodal-gpu_marketplace-bot/.env.example index 9ec027f2..aa71ec2a 100644 --- a/live-search-bot/.env.example +++ b/multimodal-gpu_marketplace-bot/.env.example @@ -5,4 +5,4 @@ DAILY_API_KEY=your_daily_api_key_here DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room # Google Gemini API credentials -GEMINI_API_KEY=your_gemini_api_key_here \ No newline at end of file +GOOGLE_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/multimodal-gpu_marketplace-bot/README.md b/multimodal-gpu_marketplace-bot/README.md new file mode 100644 index 00000000..cf9734c4 --- /dev/null +++ b/multimodal-gpu_marketplace-bot/README.md @@ -0,0 +1,60 @@ +# Multimodal Video Bot + +A video conferencing bot that can analyze screen shares and camera feeds using Gemini's multimodal capabilities. + +## Features + +- **Prioritized Screen Sharing**: Automatically attempts to capture screen sharing first, falling back to camera if unavailable +- **Voice Activation Detection (VAD)**: Uses Silero VAD for precise audio detection +- **Multimodal Analysis**: Processes both visual and audio inputs using Google's Gemini API +- **Interactive Response**: Provides real-time responses to user queries about visual content + +## Requirements + +- Python 3.12+ +- Google API key with access to Gemini API +- Daily.co API key +- See `requirements.txt` for complete dependencies + +## Environment Setup + +Create a `.env` file with: + +``` +GOOGLE_API_KEY=your_google_api_key +DAILY_API_KEY=your_daily_api_key +DAILY_SAMPLE_ROOM_URL=your_daily_room_url +``` + +## Installation + +```bash +pip install -r requirements.txt +``` + +## Usage + +Run the bot: + +```bash +python src/main.py +``` + +Or with explicit room URL: + +```bash +python src/main.py -u "https://your-domain.daily.co/room" -k "your-daily-api-key" +``` + +## Voice Options + +The bot supports multiple voice options: +- Aoede (default) +- Puck +- Charon +- Kore +- Fenrir + +## Rate Limiting + +The service implements automatic rate limiting and retry mechanisms when interacting with Google's APIs to prevent quota exhaustion. diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py new file mode 100644 index 00000000..f0d72a93 --- /dev/null +++ b/multimodal-gpu_marketplace-bot/main.py @@ -0,0 +1,116 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +# Carl's note: we extended this code from the Daily SDK, which is licensed under the BSD 2-Clause License. +# # The Daily SDK is available at https://github.com/pipecat-ai/pipecat/tree/main + +import asyncio +import os +import sys + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.gemini_multimodal_live.gemini import ( + GeminiMultimodalLiveLLMService, +) +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def main(): + google_api_key = os.getenv("GOOGLE_API_KEY") + if not google_api_key: + raise ValueError("GOOGLE_API_KEY environment variable is not set") + + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + vad_enabled=True, + vad_audio_passthrough=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + ) + + llm = GeminiMultimodalLiveLLMService( + api_key=google_api_key, # Use validated key + voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede + system_instruction="Refer directly to screen elements when the user asks for help.", + transcribe_user_audio=True, + transcribe_model_audio=True, + inference_on_context_initialization=False, + ) + + context = OpenAILLMContext( + [ + { + "role": "user", + "content": "Welcome me to Hyperbolic Labs first. Tell me that I can see your camera feed, but tell me I have to click 'Share Screen' below first so I can help with your screen. Then I can tell you what's on your camera or where to click things!" + }, + ], + ) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + transport.output(), + context_aggregator.assistant(), + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + # Enable both camera and screenshare. From the client side + # send just one. + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="screenVideo" + ) + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="camera" + ) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + await asyncio.sleep(3) + logger.debug("Unpausing audio and video") + llm.set_audio_input_paused(False) + llm.set_video_input_paused(False) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/live-search-bot/requirements.txt b/multimodal-gpu_marketplace-bot/requirements.txt similarity index 100% rename from live-search-bot/requirements.txt rename to multimodal-gpu_marketplace-bot/requirements.txt diff --git a/multimodal-gpu_marketplace-bot/runner.py b/multimodal-gpu_marketplace-bot/runner.py new file mode 100644 index 00000000..2400217d --- /dev/null +++ b/multimodal-gpu_marketplace-bot/runner.py @@ -0,0 +1,77 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +# Carl's note: we borrowed this code from the Daily SDK, which is licensed under the BSD 2-Clause License. +# The Daily SDK is available at https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/runner.py + +import argparse +import os +from typing import Optional +from dotenv import load_dotenv + +import aiohttp + +from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper + +load_dotenv(override=True) + + +async def configure(aiohttp_session: aiohttp.ClientSession): + (url, token, _) = await configure_with_args(aiohttp_session) + return (url, token) + + +async def configure_with_args( + aiohttp_session: aiohttp.ClientSession, + parser: Optional[argparse.ArgumentParser] = None, +): + if not parser: + parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") + parser.add_argument( + "-u", "--url", type=str, required=False, help="URL of the Daily room to join" + ) + parser.add_argument( + "-k", + "--apikey", + type=str, + required=False, + help="Daily API Key (needed to create an owner token for the room)", + ) + + args, unknown = parser.parse_known_args() + + url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") + key = args.apikey or os.getenv("DAILY_API_KEY") + + if not url: + raise Exception( + "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." + ) + + if not key: + raise Exception( + "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." + ) + + daily_api_key = os.environ.get("DAILY_API_KEY") + if not daily_api_key: + raise Exception( + "The environment variable 'DAILY_API_KEY' must be set to run this example." + ) + + daily_rest_helper = DailyRESTHelper( + daily_api_key=key, + daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), + aiohttp_session=aiohttp_session, + ) + + # Create a meeting token for the given room with an expiration 1 hour in + # the future. + expiry_time: float = 60 * 60 + + token = await daily_rest_helper.get_token(url, expiry_time) + + return (url, token, args) diff --git a/multimodal-live_search-bot/.env.example b/multimodal-live_search-bot/.env.example new file mode 100644 index 00000000..aa71ec2a --- /dev/null +++ b/multimodal-live_search-bot/.env.example @@ -0,0 +1,8 @@ +# Might need to replace w Hyperbolic's API Keys / Credentials + +# Daily.co API credentials +DAILY_API_KEY=your_daily_api_key_here +DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room + +# Google Gemini API credentials +GOOGLE_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/live-search-bot/README.md b/multimodal-live_search-bot/README.md similarity index 100% rename from live-search-bot/README.md rename to multimodal-live_search-bot/README.md diff --git a/live-search-bot/src/main.py b/multimodal-live_search-bot/main.py similarity index 54% rename from live-search-bot/src/main.py rename to multimodal-live_search-bot/main.py index 8b88daa8..502ec842 100644 --- a/live-search-bot/src/main.py +++ b/multimodal-live_search-bot/main.py @@ -1,5 +1,13 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + import asyncio import os +import sys +from pathlib import Path import aiohttp from dotenv import load_dotenv @@ -15,22 +23,36 @@ ) from pipecat.transports.services.daily import DailyParams, DailyTransport -from config.settings import SYSTEM_INSTRUCTION, TOOLS -from services.daily import configure -from utils.logger import setup_logger - +sys.path.append(str(Path(__file__).parent.parent)) +from runner import configure load_dotenv(override=True) -setup_logger() + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + +# Function handlers for the LLM +search_tool = {"google_search": {}} +tools = [search_tool] + +system_instruction = """ +You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting. + +Always use the google search API to retrieve the latest news. You must also use it to check which day is today. + +You can: +- Use the Google search API to check the current date. +- Provide the most recent and relevant news from any place by using the google search API. +- Answer any questions the user may have, ensuring your responses are accurate and concise. + +Start each interaction by asking the user about which place they would like to know the information. +""" async def main(): - """Main application entry point.""" async with aiohttp.ClientSession() as session: - # Configure Daily room - room_url, token = await configure(session) + (room_url, token) = await configure(session) - # Set up Daily transport transport = DailyTransport( room_url, token, @@ -43,52 +65,43 @@ async def main(): ), ) - # Initialize Gemini model + # Initialize the Gemini Multimodal Live model llm = GeminiMultimodalLiveLLMService( - api_key=os.getenv("GEMINI_API_KEY"), - voice_id="Puck", # Available voices: Aoede, Charon, Fenrir, Kore, Puck + api_key=os.getenv("GOOGLE_API_KEY"), + voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck transcribe_user_audio=True, transcribe_model_audio=True, - system_instruction=SYSTEM_INSTRUCTION, - tools=TOOLS, + system_instruction=system_instruction, + tools=tools, ) - # Set up conversation context context = OpenAILLMContext( [ { "role": "user", - "content": ( - "Start by greeting the user warmly, introducing yourself, " - "and mentioning the current day. Be friendly and engaging " - "to set a positive tone for the interaction." - ), + "content": "Start by greeting me warmly and introducing me to live search by Hyperbolic Labs and mention the current day. Tell me that I can ask you about any place in the world and you will provide me with the latest news. Then, ask me which place I would like to know about.", } ], ) context_aggregator = llm.create_context_aggregator(context) - # Create pipeline pipeline = Pipeline( [ transport.input(), # Transport user input context_aggregator.user(), # User responses - llm, # LLM processing + llm, # LLM transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant responses + context_aggregator.assistant(), # Assistant spoken responses ] ) - # Set up pipeline task task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True)) @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): - """Handle first participant joining the room.""" await transport.capture_participant_transcription(participant["id"]) await task.queue_frames([context_aggregator.user().get_context_frame()]) - # Run the pipeline runner = PipelineRunner() await runner.run(task) diff --git a/multimodal-live_search-bot/requirements.txt b/multimodal-live_search-bot/requirements.txt new file mode 100644 index 00000000..9f298cc5 --- /dev/null +++ b/multimodal-live_search-bot/requirements.txt @@ -0,0 +1,90 @@ +# Core Dependencies +aiohttp>=3.8.0 +fastapi>=0.68.0 +loguru>=0.6.0 +pydantic>=2.0.0 +python-dotenv>=0.19.0 +websockets>=10.0 + +# AI and Machine Learning +anthropic>=0.3.0 +google-generativeai>=0.2.0 +langchain>=0.0.200 +openai>=1.0.0 +openpipe>=0.1.0 +pipecat-ai[google]>=0.1.0 +together>=0.1.0 +transformers>=4.30.0 + +# Audio Processing +azure-cognitiveservices-speech>=1.25.0 +deepgram-sdk>=2.3.0 +faster-whisper>=0.5.0 +google-cloud-texttospeech>=2.12.0 +lmnt>=0.1.0 +pyaudio>=0.2.11 +pyloudnorm>=0.1.0 +pyht>=0.1.0 +silero-vad>=0.3.0 +soxr>=0.3.0 + +# Video/Image Processing +Pillow>=9.0.0 + +# Communication Services +daily>=0.7.0 +daily-python>=0.5.0 +fal-client>=0.5.0 +livekit>=0.8.0 + +# Utils +boto3>=1.26.0 +numpy>=1.21.0 +python-dateutil>=2.8.2 + + +# # If buggy, remove versions and try again (Carl - 2025 Feb 10) + +# # Core Dependencies +# aiohttp +# fastapi +# loguru +# pydantic +# python-dotenv +# websockets + +# # AI and Machine Learning +# anthropic +# google-generativeai +# langchain +# openai +# openpipe +# pipecat-ai[google] +# together +# transformers + +# # Audio Processing +# azure-cognitiveservices-speech +# deepgram-sdk +# faster-whisper +# google-cloud-texttospeech +# lmnt +# pyaudio +# pyloudnorm +# pyht +# silero-vad +# soxr + +# # Video/Image Processing +# Pillow + +# # Communication Services +# daily +# daily-python +# fal-client +# livekit + +# # Utils +# boto3 +# numpy +# python-dateutil diff --git a/multimodal-live_search-bot/runner.py b/multimodal-live_search-bot/runner.py new file mode 100644 index 00000000..2400217d --- /dev/null +++ b/multimodal-live_search-bot/runner.py @@ -0,0 +1,77 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +# Carl's note: we borrowed this code from the Daily SDK, which is licensed under the BSD 2-Clause License. +# The Daily SDK is available at https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/runner.py + +import argparse +import os +from typing import Optional +from dotenv import load_dotenv + +import aiohttp + +from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper + +load_dotenv(override=True) + + +async def configure(aiohttp_session: aiohttp.ClientSession): + (url, token, _) = await configure_with_args(aiohttp_session) + return (url, token) + + +async def configure_with_args( + aiohttp_session: aiohttp.ClientSession, + parser: Optional[argparse.ArgumentParser] = None, +): + if not parser: + parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") + parser.add_argument( + "-u", "--url", type=str, required=False, help="URL of the Daily room to join" + ) + parser.add_argument( + "-k", + "--apikey", + type=str, + required=False, + help="Daily API Key (needed to create an owner token for the room)", + ) + + args, unknown = parser.parse_known_args() + + url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") + key = args.apikey or os.getenv("DAILY_API_KEY") + + if not url: + raise Exception( + "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." + ) + + if not key: + raise Exception( + "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." + ) + + daily_api_key = os.environ.get("DAILY_API_KEY") + if not daily_api_key: + raise Exception( + "The environment variable 'DAILY_API_KEY' must be set to run this example." + ) + + daily_rest_helper = DailyRESTHelper( + daily_api_key=key, + daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), + aiohttp_session=aiohttp_session, + ) + + # Create a meeting token for the given room with an expiration 1 hour in + # the future. + expiry_time: float = 60 * 60 + + token = await daily_rest_helper.get_token(url, expiry_time) + + return (url, token, args) diff --git a/multimodal-video-bot/.env.example b/multimodal-video-bot/.env.example new file mode 100644 index 00000000..aa71ec2a --- /dev/null +++ b/multimodal-video-bot/.env.example @@ -0,0 +1,8 @@ +# Might need to replace w Hyperbolic's API Keys / Credentials + +# Daily.co API credentials +DAILY_API_KEY=your_daily_api_key_here +DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room + +# Google Gemini API credentials +GOOGLE_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/multimodal-video-bot/README.md b/multimodal-video-bot/README.md new file mode 100644 index 00000000..cf9734c4 --- /dev/null +++ b/multimodal-video-bot/README.md @@ -0,0 +1,60 @@ +# Multimodal Video Bot + +A video conferencing bot that can analyze screen shares and camera feeds using Gemini's multimodal capabilities. + +## Features + +- **Prioritized Screen Sharing**: Automatically attempts to capture screen sharing first, falling back to camera if unavailable +- **Voice Activation Detection (VAD)**: Uses Silero VAD for precise audio detection +- **Multimodal Analysis**: Processes both visual and audio inputs using Google's Gemini API +- **Interactive Response**: Provides real-time responses to user queries about visual content + +## Requirements + +- Python 3.12+ +- Google API key with access to Gemini API +- Daily.co API key +- See `requirements.txt` for complete dependencies + +## Environment Setup + +Create a `.env` file with: + +``` +GOOGLE_API_KEY=your_google_api_key +DAILY_API_KEY=your_daily_api_key +DAILY_SAMPLE_ROOM_URL=your_daily_room_url +``` + +## Installation + +```bash +pip install -r requirements.txt +``` + +## Usage + +Run the bot: + +```bash +python src/main.py +``` + +Or with explicit room URL: + +```bash +python src/main.py -u "https://your-domain.daily.co/room" -k "your-daily-api-key" +``` + +## Voice Options + +The bot supports multiple voice options: +- Aoede (default) +- Puck +- Charon +- Kore +- Fenrir + +## Rate Limiting + +The service implements automatic rate limiting and retry mechanisms when interacting with Google's APIs to prevent quota exhaustion. diff --git a/multimodal-video-bot/main.py b/multimodal-video-bot/main.py new file mode 100644 index 00000000..f0d72a93 --- /dev/null +++ b/multimodal-video-bot/main.py @@ -0,0 +1,116 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +# Carl's note: we extended this code from the Daily SDK, which is licensed under the BSD 2-Clause License. +# # The Daily SDK is available at https://github.com/pipecat-ai/pipecat/tree/main + +import asyncio +import os +import sys + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.gemini_multimodal_live.gemini import ( + GeminiMultimodalLiveLLMService, +) +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def main(): + google_api_key = os.getenv("GOOGLE_API_KEY") + if not google_api_key: + raise ValueError("GOOGLE_API_KEY environment variable is not set") + + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + vad_enabled=True, + vad_audio_passthrough=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + ) + + llm = GeminiMultimodalLiveLLMService( + api_key=google_api_key, # Use validated key + voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede + system_instruction="Refer directly to screen elements when the user asks for help.", + transcribe_user_audio=True, + transcribe_model_audio=True, + inference_on_context_initialization=False, + ) + + context = OpenAILLMContext( + [ + { + "role": "user", + "content": "Welcome me to Hyperbolic Labs first. Tell me that I can see your camera feed, but tell me I have to click 'Share Screen' below first so I can help with your screen. Then I can tell you what's on your camera or where to click things!" + }, + ], + ) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + transport.output(), + context_aggregator.assistant(), + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + # Enable both camera and screenshare. From the client side + # send just one. + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="screenVideo" + ) + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="camera" + ) + await task.queue_frames([context_aggregator.user().get_context_frame()]) + await asyncio.sleep(3) + logger.debug("Unpausing audio and video") + llm.set_audio_input_paused(False) + llm.set_video_input_paused(False) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/multimodal-video-bot/requirements.txt b/multimodal-video-bot/requirements.txt new file mode 100644 index 00000000..9f298cc5 --- /dev/null +++ b/multimodal-video-bot/requirements.txt @@ -0,0 +1,90 @@ +# Core Dependencies +aiohttp>=3.8.0 +fastapi>=0.68.0 +loguru>=0.6.0 +pydantic>=2.0.0 +python-dotenv>=0.19.0 +websockets>=10.0 + +# AI and Machine Learning +anthropic>=0.3.0 +google-generativeai>=0.2.0 +langchain>=0.0.200 +openai>=1.0.0 +openpipe>=0.1.0 +pipecat-ai[google]>=0.1.0 +together>=0.1.0 +transformers>=4.30.0 + +# Audio Processing +azure-cognitiveservices-speech>=1.25.0 +deepgram-sdk>=2.3.0 +faster-whisper>=0.5.0 +google-cloud-texttospeech>=2.12.0 +lmnt>=0.1.0 +pyaudio>=0.2.11 +pyloudnorm>=0.1.0 +pyht>=0.1.0 +silero-vad>=0.3.0 +soxr>=0.3.0 + +# Video/Image Processing +Pillow>=9.0.0 + +# Communication Services +daily>=0.7.0 +daily-python>=0.5.0 +fal-client>=0.5.0 +livekit>=0.8.0 + +# Utils +boto3>=1.26.0 +numpy>=1.21.0 +python-dateutil>=2.8.2 + + +# # If buggy, remove versions and try again (Carl - 2025 Feb 10) + +# # Core Dependencies +# aiohttp +# fastapi +# loguru +# pydantic +# python-dotenv +# websockets + +# # AI and Machine Learning +# anthropic +# google-generativeai +# langchain +# openai +# openpipe +# pipecat-ai[google] +# together +# transformers + +# # Audio Processing +# azure-cognitiveservices-speech +# deepgram-sdk +# faster-whisper +# google-cloud-texttospeech +# lmnt +# pyaudio +# pyloudnorm +# pyht +# silero-vad +# soxr + +# # Video/Image Processing +# Pillow + +# # Communication Services +# daily +# daily-python +# fal-client +# livekit + +# # Utils +# boto3 +# numpy +# python-dateutil diff --git a/multimodal-video-bot/runner.py b/multimodal-video-bot/runner.py new file mode 100644 index 00000000..2400217d --- /dev/null +++ b/multimodal-video-bot/runner.py @@ -0,0 +1,77 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +# Carl's note: we borrowed this code from the Daily SDK, which is licensed under the BSD 2-Clause License. +# The Daily SDK is available at https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/runner.py + +import argparse +import os +from typing import Optional +from dotenv import load_dotenv + +import aiohttp + +from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper + +load_dotenv(override=True) + + +async def configure(aiohttp_session: aiohttp.ClientSession): + (url, token, _) = await configure_with_args(aiohttp_session) + return (url, token) + + +async def configure_with_args( + aiohttp_session: aiohttp.ClientSession, + parser: Optional[argparse.ArgumentParser] = None, +): + if not parser: + parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") + parser.add_argument( + "-u", "--url", type=str, required=False, help="URL of the Daily room to join" + ) + parser.add_argument( + "-k", + "--apikey", + type=str, + required=False, + help="Daily API Key (needed to create an owner token for the room)", + ) + + args, unknown = parser.parse_known_args() + + url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") + key = args.apikey or os.getenv("DAILY_API_KEY") + + if not url: + raise Exception( + "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." + ) + + if not key: + raise Exception( + "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." + ) + + daily_api_key = os.environ.get("DAILY_API_KEY") + if not daily_api_key: + raise Exception( + "The environment variable 'DAILY_API_KEY' must be set to run this example." + ) + + daily_rest_helper = DailyRESTHelper( + daily_api_key=key, + daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), + aiohttp_session=aiohttp_session, + ) + + # Create a meeting token for the given room with an expiration 1 hour in + # the future. + expiry_time: float = 60 * 60 + + token = await daily_rest_helper.get_token(url, expiry_time) + + return (url, token, args) From 5c14edf4e17bd1bba393d9b485498d0b83fe71c9 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 23:16:25 -0800 Subject: [PATCH 03/23] feat(weather): implement weather fetching functionality with API integration TEST --- multimodal-gpu_marketplace-bot/main.py | 90 +++++++++++++++++--------- 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index f0d72a93..23fccfd8 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -10,6 +10,7 @@ import asyncio import os import sys +from datetime import datetime import aiohttp from dotenv import load_dotenv @@ -33,11 +34,55 @@ logger.add(sys.stderr, level="DEBUG") -async def main(): - google_api_key = os.getenv("GOOGLE_API_KEY") - if not google_api_key: - raise ValueError("GOOGLE_API_KEY environment variable is not set") +async def fetch_weather_from_api( + function_name, tool_call_id, args, llm, context, result_callback +): + temperature = 75 if args["format"] == "fahrenheit" else 24 + await result_callback( + { + "conditions": "nice", + "temperature": temperature, + "format": args["format"], + "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), + } + ) + + +tools = [ + { + "function_declarations": [ + { + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the users location.", + }, + }, + "required": ["location", "format"], + }, + }, + ] + } +] + +system_instruction = """ +You are a helpful assistant who can answer questions and use tools. +You have a tool called "get_current_weather" that can be used to get the current weather. If the user asks +for the weather, call this function. +""" + + +async def main(): async with aiohttp.ClientSession() as session: (room_url, token) = await configure(session) @@ -49,26 +94,25 @@ async def main(): audio_out_enabled=True, vad_enabled=True, vad_audio_passthrough=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), ), ) llm = GeminiMultimodalLiveLLMService( - api_key=google_api_key, # Use validated key - voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede - system_instruction="Refer directly to screen elements when the user asks for help.", - transcribe_user_audio=True, - transcribe_model_audio=True, - inference_on_context_initialization=False, + api_key=os.getenv("GOOGLE_API_KEY"), + system_instruction=system_instruction, + tools=tools, ) + llm.register_function("get_current_weather", fetch_weather_from_api) + context = OpenAILLMContext( - [ - { - "role": "user", - "content": "Welcome me to Hyperbolic Labs first. Tell me that I can see your camera feed, but tell me I have to click 'Share Screen' below first so I can help with your screen. Then I can tell you what's on your camera or where to click things!" - }, - ], + [{"role": "user", + "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU."}], ) context_aggregator = llm.create_context_aggregator(context) @@ -77,8 +121,8 @@ async def main(): transport.input(), context_aggregator.user(), llm, - transport.output(), context_aggregator.assistant(), + transport.output(), ] ) @@ -93,19 +137,7 @@ async def main(): @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): - # Enable both camera and screenshare. From the client side - # send just one. - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="screenVideo" - ) - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="camera" - ) await task.queue_frames([context_aggregator.user().get_context_frame()]) - await asyncio.sleep(3) - logger.debug("Unpausing audio and video") - llm.set_audio_input_paused(False) - llm.set_video_input_paused(False) runner = PipelineRunner() From be78b7bdca43ed4e21984096040e3f434fe451b5 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 23:23:46 -0800 Subject: [PATCH 04/23] feat(marketplace): implement GPU marketplace data fetching functionality --- multimodal-gpu_marketplace-bot/main.py | 85 +++++++++++++++++--------- 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 23fccfd8..0dfe0357 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -34,51 +34,76 @@ logger.add(sys.stderr, level="DEBUG") -async def fetch_weather_from_api( +async def fetch_marketplace_data( function_name, tool_call_id, args, llm, context, result_callback ): - temperature = 75 if args["format"] == "fahrenheit" else 24 - await result_callback( - { - "conditions": "nice", - "temperature": temperature, - "format": args["format"], - "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), - } - ) + async with aiohttp.ClientSession() as session: + try: + url = "https://api.hyperbolic.xyz/v1/marketplace" + headers = {"Content-Type": "application/json"} + filters = {} if args["filter_type"] == "all" else {"available": True} + data = {"filters": filters} + + async with session.post(url, json=data, headers=headers) as response: + if response.status == 200: + marketplace_data = await response.json() + available_instances = [ + { + "id": instance["id"], + "gpu_model": instance["hardware"]["gpus"][0]["model"], + "gpu_memory": instance["hardware"]["gpus"][0]["ram"], + "price_per_hour": instance["pricing"]["price"]["amount"], + "location": instance["location"]["region"], + "available": not instance["reserved"] + and instance["gpus_reserved"] < instance["gpus_total"], + } + for instance in marketplace_data["instances"] + if "gpus" in instance["hardware"] + and instance["hardware"]["gpus"] + ] + await result_callback({"instances": available_instances}) + else: + await result_callback( + {"error": f"API request failed with status {response.status}"} + ) + except Exception as e: + await result_callback({"error": str(e)}) tools = [ { "function_declarations": [ { - "name": "get_current_weather", - "description": "Get the current weather", + "name": "get_available_gpus", + "description": "Get the list of available GPU instances in the marketplace", "parameters": { "type": "object", "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { + "filter_type": { "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the users location.", - }, + "enum": ["all", "available_only"], + "description": "Filter type for GPU instances", + } }, - "required": ["location", "format"], + "required": ["filter_type"], }, - }, + } ] } ] system_instruction = """ -You are a helpful assistant who can answer questions and use tools. +You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. + +You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, +or specifications, use this tool to get the most current information. -You have a tool called "get_current_weather" that can be used to get the current weather. If the user asks -for the weather, call this function. +Always be professional and helpful. When listing GPUs: +1. Mention the GPU model, memory, and hourly price +2. Indicate if the instance is currently available +3. Include the location/region + +If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. """ @@ -108,11 +133,15 @@ async def main(): tools=tools, ) - llm.register_function("get_current_weather", fetch_weather_from_api) + llm.register_function("get_available_gpus", fetch_marketplace_data) context = OpenAILLMContext( - [{"role": "user", - "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU."}], + [ + { + "role": "user", + "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU.", + } + ], ) context_aggregator = llm.create_context_aggregator(context) From 76517d148b72c6c81817eeb2f4960977a6461228 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 23:42:22 -0800 Subject: [PATCH 05/23] feat(gpu-marketplace-bot): enhance system instructions for GPU availability and pricing format --- multimodal-gpu_marketplace-bot/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 0dfe0357..b1846be0 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -95,6 +95,8 @@ async def fetch_marketplace_data( system_instruction = """ You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. +After greeting, immediately call `get_available_gpus` to fetch the list of available GPUs. + You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, or specifications, use this tool to get the most current information. @@ -102,6 +104,8 @@ async def fetch_marketplace_data( 1. Mention the GPU model, memory, and hourly price 2. Indicate if the instance is currently available 3. Include the location/region +4. When mentioning prices, use the format "X per hour" (e.g., "10.00 per hour") +4.1 Prices are not a HUNDRED dollars, they use dots not commas. Instead, one point fifty per hour, for example. If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. """ From 36ee0a9d757ea278a2261b8931de078c948fa2a3 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 23:57:15 -0800 Subject: [PATCH 06/23] bug: prices should return in cents. feat(gpu-marketplace-bot): enhance marketplace data fetching with advanced filtering and sorting options --- multimodal-gpu_marketplace-bot/main.py | 180 +++++++++++++++++++++---- 1 file changed, 157 insertions(+), 23 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index b1846be0..aea6722d 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -37,8 +37,8 @@ async def fetch_marketplace_data( function_name, tool_call_id, args, llm, context, result_callback ): - async with aiohttp.ClientSession() as session: - try: + try: + async with aiohttp.ClientSession() as session: url = "https://api.hyperbolic.xyz/v1/marketplace" headers = {"Content-Type": "application/json"} filters = {} if args["filter_type"] == "all" else {"available": True} @@ -49,11 +49,30 @@ async def fetch_marketplace_data( marketplace_data = await response.json() available_instances = [ { - "id": instance["id"], - "gpu_model": instance["hardware"]["gpus"][0]["model"], - "gpu_memory": instance["hardware"]["gpus"][0]["ram"], - "price_per_hour": instance["pricing"]["price"]["amount"], - "location": instance["location"]["region"], + "quantity": f"{instance.get('gpus_total', 1)} X", + "gpu_type": instance["hardware"]["gpus"][0]["model"], + "gpu_ram": f"GPU RAM: {instance['hardware']['gpus'][0]['ram']}GB", + "storage": ( + f"{instance['hardware']['storage'][0]['capacity']} TB" + if instance["hardware"].get("storage") + else "N/A" + ), + "system_ram": ( + f"RAM:\n{instance['hardware']['ram'][0]['capacity'] / 1024:.1f} TB" + if instance["hardware"].get("ram") + else "N/A" + ), + "price": ( + f"${float(instance['pricing']['price']['amount']):.2f}/hr" + if float(instance["pricing"]["price"]["amount"]) >= 1.00 + else f"{int(float(instance['pricing']['price']['amount']) * 100)}¢/hr" + ), + "price_float": float( + instance["pricing"]["price"]["amount"] + ), # For sorting + "status": ( + "Reserved" if instance["reserved"] else "Buy Credits" + ), "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], } @@ -61,13 +80,88 @@ async def fetch_marketplace_data( if "gpus" in instance["hardware"] and instance["hardware"]["gpus"] ] - await result_callback({"instances": available_instances}) + + # Sort by price if requested + if args.get("sort_by") == "price_low_to_high": + available_instances.sort(key=lambda x: x["price_float"]) + + # Filter by price range if specified + price_range = args.get("price_range") + if price_range: + if price_range == "budget": + available_instances = [ + i for i in available_instances if i["price_float"] < 0.5 + ] + elif price_range == "mid": + available_instances = [ + i + for i in available_instances + if 0.5 <= i["price_float"] < 1.0 + ] + elif price_range == "high": + available_instances = [ + i + for i in available_instances + if i["price_float"] >= 1.0 + ] + + # Apply filters + filtered_instances = available_instances + + # Quantity filter + if args.get("quantity"): + if args["quantity"] == "8X+": + filtered_instances = [ + i + for i in filtered_instances + if int(i["quantity"].split(" ")[0]) >= 8 + ] + else: + target_quantity = int(args["quantity"].replace("X", "")) + filtered_instances = [ + i + for i in filtered_instances + if int(i["quantity"].split(" ")[0]) == target_quantity + ] + + # Storage filter + if args.get("storage"): + if args["storage"] == "0-500GB": + filtered_instances = [ + i + for i in filtered_instances + if i["storage"] != "N/A" + and float(i["storage"].split(" ")[0]) <= 500 + ] + elif args["storage"] == "500GB-1TB": + filtered_instances = [ + i + for i in filtered_instances + if i["storage"] != "N/A" + and 500 < float(i["storage"].split(" ")[0]) <= 1000 + ] + + # Sort instances + if args.get("sort_by"): + if args["sort_by"] == "price_low_to_high": + filtered_instances.sort(key=lambda x: x["price_float"]) + elif args["sort_by"] == "price_high_to_low": + filtered_instances.sort( + key=lambda x: x["price_float"], reverse=True + ) + + return await result_callback( + {"tool_call_id": tool_call_id, "instances": filtered_instances} + ) else: - await result_callback( - {"error": f"API request failed with status {response.status}"} + return await result_callback( + { + "tool_call_id": tool_call_id, + "error": f"API request failed with status {response.status}", + } ) - except Exception as e: - await result_callback({"error": str(e)}) + except Exception as e: + return await result_callback({"tool_call_id": tool_call_id, "error": str(e)}) tools = [ @@ -83,7 +177,38 @@ async def fetch_marketplace_data( "type": "string", "enum": ["all", "available_only"], "description": "Filter type for GPU instances", - } + }, + "sort_by": { + "type": "string", + "enum": ["price_low_to_high", "price_high_to_low"], + "description": "Sort instances by price", + }, + "quantity": { + "type": "string", + "enum": [ + "Any", + "1X", + "2X", + "3X", + "4X", + "5X", + "6X", + "7X", + "8X", + "8X+", + ], + "description": "Filter by GPU quantity", + }, + "storage": { + "type": "string", + "enum": ["Any", "0-500GB", "500GB-1TB"], + "description": "Filter by storage capacity", + }, + "location": { + "type": "string", + "enum": ["Any", "US, North America"], + "description": "Filter by location", + }, }, "required": ["filter_type"], }, @@ -93,21 +218,30 @@ async def fetch_marketplace_data( ] system_instruction = """ -You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. +You are a friendly GPU marketplace assistant. Keep responses concise and natural. After greeting, immediately call `get_available_gpus` to fetch the list of available GPUs. -You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, -or specifications, use this tool to get the most current information. +Available sorting and filtering options: +- Price: Low to High or High to Low +- Quantity: 1X to 8X+ GPUs +- Storage: 0-500GB or 500GB-1TB +- Location: Currently all in US, North America + +Price formatting rules: +- Under $1: Use cents (e.g., "13¢/hr") +- $1 and above: Use dollars (e.g., "$2.(POINT)30/hr") +- There are no GPUs for rent that are HUNDREDS of dollars per hour. +- If you see a price like $100/hr, it's likely a bug. Read it again and check if you mistook the PERIOD for a COMMA. +- If hundreds, divide by 100 so you revert it to cents. +- If you say hundred dollars again, you will be punished. -Always be professional and helpful. When listing GPUs: -1. Mention the GPU model, memory, and hourly price -2. Indicate if the instance is currently available -3. Include the location/region -4. When mentioning prices, use the format "X per hour" (e.g., "10.00 per hour") -4.1 Prices are not a HUNDRED dollars, they use dots not commas. Instead, one point fifty per hour, for example. +Start by greeting and showing available GPUs sorted by price (low to high). Help users filter using any combination of: +- Quantity (e.g., "show me 8X GPUs") +- Storage (e.g., "GPUs with 500GB+ storage") +- Price sorting (e.g., "show highest price first") -If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. +When filtering, still call `get_available_gpus` with the appropriate filters. """ From 94ba56ace2ef8430dab9d74c3dc217f13480462a Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 14 Feb 2025 23:59:10 -0800 Subject: [PATCH 07/23] feat(gpu-marketplace-bot): update README to reflect new GPU marketplace features and usage instructions --- multimodal-gpu_marketplace-bot/README.md | 63 +++++++++++++++--------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/README.md b/multimodal-gpu_marketplace-bot/README.md index cf9734c4..9acd5796 100644 --- a/multimodal-gpu_marketplace-bot/README.md +++ b/multimodal-gpu_marketplace-bot/README.md @@ -1,20 +1,32 @@ -# Multimodal Video Bot +# GPU Marketplace Voice Assistant Bot -A video conferencing bot that can analyze screen shares and camera feeds using Gemini's multimodal capabilities. +An interactive voice assistant bot that helps users explore and find GPU instances on the Hyperbolic GPU Marketplace. ## Features -- **Prioritized Screen Sharing**: Automatically attempts to capture screen sharing first, falling back to camera if unavailable -- **Voice Activation Detection (VAD)**: Uses Silero VAD for precise audio detection -- **Multimodal Analysis**: Processes both visual and audio inputs using Google's Gemini API -- **Interactive Response**: Provides real-time responses to user queries about visual content +- **Real-time GPU Availability**: Live access to Hyperbolic's GPU marketplace +- **Voice Interaction**: Natural conversation about GPU options and pricing +- **Smart Filtering**: + - Price ranges (budget to high-end) + - GPU quantities (1X to 8X+) + - Storage capacity + - Availability status +- **Dynamic Price Display**: + - Under $1: Shows in cents (e.g., "13¢/hr") + - $1 and above: Shows in dollars (e.g., "$1.50/hr") + +## Available GPU Types + +- Consumer GPUs (RTX 3070, 3080, 4090) +- Data Center GPUs (H100 SXM, NVIDIA H200) +- Various configurations (1X to 8X+) ## Requirements - Python 3.12+ -- Google API key with access to Gemini API +- Google API key (for Gemini) - Daily.co API key -- See `requirements.txt` for complete dependencies +- Access to Hyperbolic Marketplace API ## Environment Setup @@ -34,27 +46,32 @@ pip install -r requirements.txt ## Usage -Run the bot: +Start the bot: ```bash -python src/main.py +python main.py ``` -Or with explicit room URL: +Join the Daily.co room to interact with the bot. You can: -```bash -python src/main.py -u "https://your-domain.daily.co/room" -k "your-daily-api-key" -``` +- Ask about available GPUs +- Filter by price range +- Sort by price (low to high or high to low) +- Filter by GPU quantity +- Check storage options +- Get real-time availability updates -## Voice Options +## Example Queries -The bot supports multiple voice options: -- Aoede (default) -- Puck -- Charon -- Kore -- Fenrir +- "What GPUs are available?" +- "Show me budget options under 50 cents per hour" +- "What are your high-end GPUs?" +- "Do you have any 8X GPU configurations?" +- "Show me GPUs with over 500GB storage" +- "What's the price range for H100s?" -## Rate Limiting +## Notes -The service implements automatic rate limiting and retry mechanisms when interacting with Google's APIs to prevent quota exhaustion. +- All GPU instances are located in US, North America +- Prices are always displayed per hour +- The bot automatically refreshes data for the most current availability From 70474389eae14d1205834ef85d3c26edbad0bea5 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sat, 15 Feb 2025 00:08:24 -0800 Subject: [PATCH 08/23] REVERT feat(gpu-marketplace-bot): refactor fetch_marketplace_data for improved error handling and response structure --- multimodal-gpu_marketplace-bot/main.py | 180 +++---------------------- 1 file changed, 21 insertions(+), 159 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index aea6722d..0dfe0357 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -37,8 +37,8 @@ async def fetch_marketplace_data( function_name, tool_call_id, args, llm, context, result_callback ): - try: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession() as session: + try: url = "https://api.hyperbolic.xyz/v1/marketplace" headers = {"Content-Type": "application/json"} filters = {} if args["filter_type"] == "all" else {"available": True} @@ -49,30 +49,11 @@ async def fetch_marketplace_data( marketplace_data = await response.json() available_instances = [ { - "quantity": f"{instance.get('gpus_total', 1)} X", - "gpu_type": instance["hardware"]["gpus"][0]["model"], - "gpu_ram": f"GPU RAM: {instance['hardware']['gpus'][0]['ram']}GB", - "storage": ( - f"{instance['hardware']['storage'][0]['capacity']} TB" - if instance["hardware"].get("storage") - else "N/A" - ), - "system_ram": ( - f"RAM:\n{instance['hardware']['ram'][0]['capacity'] / 1024:.1f} TB" - if instance["hardware"].get("ram") - else "N/A" - ), - "price": ( - f"${float(instance['pricing']['price']['amount']):.2f}/hr" - if float(instance["pricing"]["price"]["amount"]) >= 1.00 - else f"{int(float(instance['pricing']['price']['amount']) * 100)}¢/hr" - ), - "price_float": float( - instance["pricing"]["price"]["amount"] - ), # For sorting - "status": ( - "Reserved" if instance["reserved"] else "Buy Credits" - ), + "id": instance["id"], + "gpu_model": instance["hardware"]["gpus"][0]["model"], + "gpu_memory": instance["hardware"]["gpus"][0]["ram"], + "price_per_hour": instance["pricing"]["price"]["amount"], + "location": instance["location"]["region"], "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], } @@ -80,88 +61,13 @@ async def fetch_marketplace_data( if "gpus" in instance["hardware"] and instance["hardware"]["gpus"] ] - - # Sort by price if requested - if args.get("sort_by") == "price_low_to_high": - available_instances.sort(key=lambda x: x["price_float"]) - - # Filter by price range if specified - price_range = args.get("price_range") - if price_range: - if price_range == "budget": - available_instances = [ - i for i in available_instances if i["price_float"] < 0.5 - ] - elif price_range == "mid": - available_instances = [ - i - for i in available_instances - if 0.5 <= i["price_float"] < 1.0 - ] - elif price_range == "high": - available_instances = [ - i - for i in available_instances - if i["price_float"] >= 1.0 - ] - - # Apply filters - filtered_instances = available_instances - - # Quantity filter - if args.get("quantity"): - if args["quantity"] == "8X+": - filtered_instances = [ - i - for i in filtered_instances - if int(i["quantity"].split(" ")[0]) >= 8 - ] - else: - target_quantity = int(args["quantity"].replace("X", "")) - filtered_instances = [ - i - for i in filtered_instances - if int(i["quantity"].split(" ")[0]) == target_quantity - ] - - # Storage filter - if args.get("storage"): - if args["storage"] == "0-500GB": - filtered_instances = [ - i - for i in filtered_instances - if i["storage"] != "N/A" - and float(i["storage"].split(" ")[0]) <= 500 - ] - elif args["storage"] == "500GB-1TB": - filtered_instances = [ - i - for i in filtered_instances - if i["storage"] != "N/A" - and 500 < float(i["storage"].split(" ")[0]) <= 1000 - ] - - # Sort instances - if args.get("sort_by"): - if args["sort_by"] == "price_low_to_high": - filtered_instances.sort(key=lambda x: x["price_float"]) - elif args["sort_by"] == "price_high_to_low": - filtered_instances.sort( - key=lambda x: x["price_float"], reverse=True - ) - - return await result_callback( - {"tool_call_id": tool_call_id, "instances": filtered_instances} - ) + await result_callback({"instances": available_instances}) else: - return await result_callback( - { - "tool_call_id": tool_call_id, - "error": f"API request failed with status {response.status}", - } + await result_callback( + {"error": f"API request failed with status {response.status}"} ) - except Exception as e: - return await result_callback({"tool_call_id": tool_call_id, "error": str(e)}) + except Exception as e: + await result_callback({"error": str(e)}) tools = [ @@ -177,38 +83,7 @@ async def fetch_marketplace_data( "type": "string", "enum": ["all", "available_only"], "description": "Filter type for GPU instances", - }, - "sort_by": { - "type": "string", - "enum": ["price_low_to_high", "price_high_to_low"], - "description": "Sort instances by price", - }, - "quantity": { - "type": "string", - "enum": [ - "Any", - "1X", - "2X", - "3X", - "4X", - "5X", - "6X", - "7X", - "8X", - "8X+", - ], - "description": "Filter by GPU quantity", - }, - "storage": { - "type": "string", - "enum": ["Any", "0-500GB", "500GB-1TB"], - "description": "Filter by storage capacity", - }, - "location": { - "type": "string", - "enum": ["Any", "US, North America"], - "description": "Filter by location", - }, + } }, "required": ["filter_type"], }, @@ -218,30 +93,17 @@ async def fetch_marketplace_data( ] system_instruction = """ -You are a friendly GPU marketplace assistant. Keep responses concise and natural. - -After greeting, immediately call `get_available_gpus` to fetch the list of available GPUs. - -Available sorting and filtering options: -- Price: Low to High or High to Low -- Quantity: 1X to 8X+ GPUs -- Storage: 0-500GB or 500GB-1TB -- Location: Currently all in US, North America +You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. -Price formatting rules: -- Under $1: Use cents (e.g., "13¢/hr") -- $1 and above: Use dollars (e.g., "$2.(POINT)30/hr") -- There are no GPUs for rent that are HUNDREDS of dollars per hour. -- If you see a price like $100/hr, it's likely a bug. Read it again and check if you mistook the PERIOD for a COMMA. -- If hundreds, divide by 100 so you revert it to cents. -- If you say hundred dollars again, you will be punished. +You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, +or specifications, use this tool to get the most current information. -Start by greeting and showing available GPUs sorted by price (low to high). Help users filter using any combination of: -- Quantity (e.g., "show me 8X GPUs") -- Storage (e.g., "GPUs with 500GB+ storage") -- Price sorting (e.g., "show highest price first") +Always be professional and helpful. When listing GPUs: +1. Mention the GPU model, memory, and hourly price +2. Indicate if the instance is currently available +3. Include the location/region -When filtering, still call `get_available_gpus` with the appropriate filters. +If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. """ From 8d8fe7763c5c4a24c0e9b4bda21e02179e0b2fd6 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:33:43 +0800 Subject: [PATCH 09/23] Add region mapping and update price formatting in marketplace data fetch - Introduced REGION_MAP to convert region codes to friendly names. - Updated price formatting to convert from cents to dollars in the fetch_marketplace_data function. - Added new marketplace_output.txt file to store GPU marketplace data. - Created test.ipynb to demonstrate fetching and saving GPU data. --- multimodal-gpu_marketplace-bot/main.py | 10 +- .../marketplace_output.txt | 70 ++ multimodal-gpu_marketplace-bot/test.ipynb | 834 ++++++++++++++++++ 3 files changed, 912 insertions(+), 2 deletions(-) create mode 100644 multimodal-gpu_marketplace-bot/marketplace_output.txt create mode 100644 multimodal-gpu_marketplace-bot/test.ipynb diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 0dfe0357..5f809d15 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -37,6 +37,10 @@ async def fetch_marketplace_data( function_name, tool_call_id, args, llm, context, result_callback ): + REGION_MAP = { + "region-1": "US, North America", + # Add more mappings as needed + } async with aiohttp.ClientSession() as session: try: url = "https://api.hyperbolic.xyz/v1/marketplace" @@ -52,8 +56,10 @@ async def fetch_marketplace_data( "id": instance["id"], "gpu_model": instance["hardware"]["gpus"][0]["model"], "gpu_memory": instance["hardware"]["gpus"][0]["ram"], - "price_per_hour": instance["pricing"]["price"]["amount"], - "location": instance["location"]["region"], + # Convert price from cents to dollars and format as string + "price_per_hour": f"${instance['pricing']['price']['amount'] / 100:.2f}", + # Map region code to friendly name + "location": REGION_MAP.get(instance["location"]["region"], instance["location"]["region"]), "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], } diff --git a/multimodal-gpu_marketplace-bot/marketplace_output.txt b/multimodal-gpu_marketplace-bot/marketplace_output.txt new file mode 100644 index 00000000..c331f40c --- /dev/null +++ b/multimodal-gpu_marketplace-bot/marketplace_output.txt @@ -0,0 +1,70 @@ +id gpu_model gpu_memory price_per_hour location available +ceti14 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +l-hgx-05 NVIDIA-H200 143771 220 region-1 True +sfc-016 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +l-hgx-01 NVIDIA-H200 143771 225 region-1 True +antalpha-super-server100132 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +ses-a16 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True +antalpha-super-server100154 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +sfc-025 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +korea-amd9-17 NVIDIA-GeForce-RTX-3070 8192 16 region-1 False +sfc-010 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ceti16 NVIDIA-H100-80GB-HBM3 81559 160 region-1 True +sfc-008 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False +antalpha-super-server100116 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False +sfc-026 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ns-ai-server010 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False +sfc-007 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +antalpha-super-server100155 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +sfc-003 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-005 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-018 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +antalpha-super-server-100194 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +sfc-001 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ns-ai-server018 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False +sfc-017 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False +antalpha-super-server100202 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +sfc-030 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-002 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +gpu-cluster-helsinki-hyperbolic NVIDIA-H100-80GB-HBM3 81559 185 region-1 True +l-hgx-02 NVIDIA-H200 143771 230 region-1 True +antalpha-super-server100123 NVIDIA-GeForce-RTX-4090 24564 33 region-1 False +sfc-011 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +antalpha-super-server100152 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +ceti13 NVIDIA-H100-80GB-HBM3 81559 170 region-1 True +sfc-022 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +antalpha-super-server100120 NVIDIA-GeForce-RTX-4090 24564 40 region-1 True +antalpha-super-server100130 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False +sfc-009 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-004 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-032 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ns-ai-server007 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False +ceti12 NVIDIA-H100-80GB-HBM3 81559 170 region-1 True +antalpha-super-server100115 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False +sfc-028 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False +ns-ai-server019 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False +ses-a6 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True +antalpha-super-server100153 NVIDIA-GeForce-RTX-4090 24564 50 region-1 True +antalpha-super-server100156 NVIDIA-GeForce-RTX-4090 24564 33 region-1 False +ses-a11 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True +sfc-023 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-006 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +antalpha-super-server100162 NVIDIA-GeForce-RTX-4090 24564 40 region-1 False +sfc-027 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ceti8 NVIDIA-H100-80GB-HBM3 81559 180 region-1 True +korea-amd9-37 NVIDIA-GeForce-RTX-3080 10240 20 region-1 False +sfc-024 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ceti5 NVIDIA-H100-80GB-HBM3 81559 180 region-1 True +sfc-019 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ses-hyper-a1 NVIDIA-H100-80GB-HBM3 81559 155 region-1 False +sfc-031 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-021 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-029 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +sfc-012 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ceti11 NVIDIA-H100-80GB-HBM3 81559 200 region-1 True +ai-server NVIDIA-L40S 46068 60 region-1 True +antalpha-super-server100164 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False +sfc-020 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False +sfc-014 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True +ceti15 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False +ses-a5 NVIDIA-H100-80GB-HBM3 81559 175 region-1 False diff --git a/multimodal-gpu_marketplace-bot/test.ipynb b/multimodal-gpu_marketplace-bot/test.ipynb new file mode 100644 index 00000000..939cf6ac --- /dev/null +++ b/multimodal-gpu_marketplace-bot/test.ipynb @@ -0,0 +1,834 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "12fb6ef7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", + "\u001b[0mRequirement already satisfied: aiohttp in /opt/homebrew/lib/python3.11/site-packages (3.11.11)\n", + "Requirement already satisfied: nest_asyncio in /opt/homebrew/lib/python3.11/site-packages (1.6.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (23.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.3.3)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (6.0.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.18.3)\n", + "Requirement already satisfied: idna>=2.0 in /opt/homebrew/lib/python3.11/site-packages (from yarl<2.0,>=1.17.0->aiohttp) (3.10)\n", + "\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install aiohttp nest_asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "68e46622", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install aiohttp nest_asyncio pandas --quiet\n", + "\n", + "import aiohttp\n", + "import nest_asyncio\n", + "import asyncio\n", + "import json\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b8e5ddec", + "metadata": {}, + "outputs": [], + "source": [ + "async def fetch_gpus(filter_type=\"all\"):\n", + " url = \"https://api.hyperbolic.xyz/v1/marketplace\"\n", + " headers = {\"Content-Type\": \"application/json\"}\n", + " filters = {} if filter_type == \"all\" else {\"available\": True}\n", + " data = {\"filters\": filters}\n", + " async with aiohttp.ClientSession() as session:\n", + " async with session.post(url, json=data, headers=headers) as response:\n", + " if response.status == 200:\n", + " return await response.json()\n", + " else:\n", + " print(f\"API request failed with status {response.status}\")\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a4c006ff", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "id", + "rawType": "object", + "type": "string" + }, + { + "name": "gpu_model", + "rawType": "object", + "type": "string" + }, + { + "name": "gpu_memory", + "rawType": "int64", + "type": "integer" + }, + { + "name": "price_per_hour", + "rawType": "int64", + "type": "integer" + }, + { + "name": "location", + "rawType": "object", + "type": "string" + }, + { + "name": "available", + "rawType": "bool", + "type": "boolean" + } + ], + "conversionMethod": "pd.DataFrame", + "ref": "3258f801-509e-4f66-9b28-56dabf5adba1", + "rows": [ + [ + "0", + "ceti14", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "1", + "l-hgx-05", + "NVIDIA-H200", + "143771", + "220", + "region-1", + "True" + ], + [ + "2", + "sfc-016", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "3", + "l-hgx-01", + "NVIDIA-H200", + "143771", + "225", + "region-1", + "True" + ], + [ + "4", + "antalpha-super-server100132", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "5", + "ses-a16", + "NVIDIA-H100-80GB-HBM3", + "81559", + "155", + "region-1", + "True" + ], + [ + "6", + "antalpha-super-server100154", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "7", + "sfc-025", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "8", + "korea-amd9-17", + "NVIDIA-GeForce-RTX-3070", + "8192", + "16", + "region-1", + "False" + ], + [ + "9", + "sfc-010", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "10", + "ceti16", + "NVIDIA-H100-80GB-HBM3", + "81559", + "160", + "region-1", + "True" + ], + [ + "11", + "sfc-008", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "False" + ], + [ + "12", + "antalpha-super-server100116", + "NVIDIA-GeForce-RTX-4090", + "24564", + "30", + "region-1", + "False" + ], + [ + "13", + "sfc-026", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "14", + "ns-ai-server010", + "NVIDIA-GeForce-RTX-4090", + "24564", + "25", + "region-1", + "False" + ], + [ + "15", + "sfc-007", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "16", + "antalpha-super-server100155", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "17", + "sfc-003", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "18", + "sfc-005", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "19", + "sfc-018", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "20", + "antalpha-super-server-100194", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "21", + "sfc-001", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "22", + "ns-ai-server018", + "NVIDIA-GeForce-RTX-4090", + "24564", + "30", + "region-1", + "False" + ], + [ + "23", + "sfc-017", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "False" + ], + [ + "24", + "antalpha-super-server100202", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "25", + "sfc-030", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "26", + "sfc-002", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "27", + "gpu-cluster-helsinki-hyperbolic", + "NVIDIA-H100-80GB-HBM3", + "81559", + "185", + "region-1", + "True" + ], + [ + "28", + "l-hgx-02", + "NVIDIA-H200", + "143771", + "230", + "region-1", + "True" + ], + [ + "29", + "antalpha-super-server100123", + "NVIDIA-GeForce-RTX-4090", + "24564", + "33", + "region-1", + "False" + ], + [ + "30", + "sfc-011", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "31", + "antalpha-super-server100152", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "32", + "ceti13", + "NVIDIA-H100-80GB-HBM3", + "81559", + "170", + "region-1", + "True" + ], + [ + "33", + "sfc-022", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "34", + "antalpha-super-server100120", + "NVIDIA-GeForce-RTX-4090", + "24564", + "40", + "region-1", + "True" + ], + [ + "35", + "antalpha-super-server100130", + "NVIDIA-GeForce-RTX-4090", + "24564", + "35", + "region-1", + "False" + ], + [ + "36", + "sfc-009", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "37", + "sfc-004", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "38", + "sfc-032", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "39", + "ns-ai-server007", + "NVIDIA-GeForce-RTX-4090", + "24564", + "25", + "region-1", + "False" + ], + [ + "40", + "ceti12", + "NVIDIA-H100-80GB-HBM3", + "81559", + "170", + "region-1", + "True" + ], + [ + "41", + "antalpha-super-server100115", + "NVIDIA-GeForce-RTX-4090", + "24564", + "30", + "region-1", + "False" + ], + [ + "42", + "sfc-028", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "False" + ], + [ + "43", + "ns-ai-server019", + "NVIDIA-GeForce-RTX-4090", + "24564", + "25", + "region-1", + "False" + ], + [ + "44", + "ses-a6", + "NVIDIA-H100-80GB-HBM3", + "81559", + "155", + "region-1", + "True" + ], + [ + "45", + "antalpha-super-server100153", + "NVIDIA-GeForce-RTX-4090", + "24564", + "50", + "region-1", + "True" + ], + [ + "46", + "antalpha-super-server100156", + "NVIDIA-GeForce-RTX-4090", + "24564", + "33", + "region-1", + "False" + ], + [ + "47", + "ses-a11", + "NVIDIA-H100-80GB-HBM3", + "81559", + "155", + "region-1", + "True" + ], + [ + "48", + "sfc-023", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ], + [ + "49", + "sfc-006", + "NVIDIA-H100-80GB-HBM3", + "81559", + "150", + "region-1", + "True" + ] + ], + "shape": { + "columns": 6, + "rows": 69 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgpu_modelgpu_memoryprice_per_hourlocationavailable
0ceti14NVIDIA-H100-80GB-HBM381559150region-1True
1l-hgx-05NVIDIA-H200143771220region-1True
2sfc-016NVIDIA-H100-80GB-HBM381559150region-1True
3l-hgx-01NVIDIA-H200143771225region-1True
4antalpha-super-server100132NVIDIA-GeForce-RTX-40902456435region-1False
.....................
64antalpha-super-server100164NVIDIA-GeForce-RTX-40902456430region-1False
65sfc-020NVIDIA-H100-80GB-HBM381559150region-1False
66sfc-014NVIDIA-H100-80GB-HBM381559150region-1True
67ceti15NVIDIA-H100-80GB-HBM381559150region-1False
68ses-a5NVIDIA-H100-80GB-HBM381559175region-1False
\n", + "

69 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " id gpu_model gpu_memory \\\n", + "0 ceti14 NVIDIA-H100-80GB-HBM3 81559 \n", + "1 l-hgx-05 NVIDIA-H200 143771 \n", + "2 sfc-016 NVIDIA-H100-80GB-HBM3 81559 \n", + "3 l-hgx-01 NVIDIA-H200 143771 \n", + "4 antalpha-super-server100132 NVIDIA-GeForce-RTX-4090 24564 \n", + ".. ... ... ... \n", + "64 antalpha-super-server100164 NVIDIA-GeForce-RTX-4090 24564 \n", + "65 sfc-020 NVIDIA-H100-80GB-HBM3 81559 \n", + "66 sfc-014 NVIDIA-H100-80GB-HBM3 81559 \n", + "67 ceti15 NVIDIA-H100-80GB-HBM3 81559 \n", + "68 ses-a5 NVIDIA-H100-80GB-HBM3 81559 \n", + "\n", + " price_per_hour location available \n", + "0 150 region-1 True \n", + "1 220 region-1 True \n", + "2 150 region-1 True \n", + "3 225 region-1 True \n", + "4 35 region-1 False \n", + ".. ... ... ... \n", + "64 30 region-1 False \n", + "65 150 region-1 False \n", + "66 150 region-1 True \n", + "67 150 region-1 False \n", + "68 175 region-1 False \n", + "\n", + "[69 rows x 6 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "async def show_gpus(filter_type=\"all\"):\n", + " data = await fetch_gpus(filter_type)\n", + " if data and \"instances\" in data:\n", + " instances = [\n", + " {\n", + " \"id\": inst[\"id\"],\n", + " \"gpu_model\": inst[\"hardware\"][\"gpus\"][0][\"model\"],\n", + " \"gpu_memory\": inst[\"hardware\"][\"gpus\"][0][\"ram\"],\n", + " \"price_per_hour\": inst[\"pricing\"][\"price\"][\"amount\"],\n", + " \"location\": inst[\"location\"][\"region\"],\n", + " \"available\": not inst[\"reserved\"]\n", + " and inst[\"gpus_reserved\"] < inst[\"gpus_total\"],\n", + " }\n", + " for inst in data[\"instances\"]\n", + " if \"gpus\" in inst[\"hardware\"] and inst[\"hardware\"][\"gpus\"]\n", + " ]\n", + " df = pd.DataFrame(instances)\n", + " display(df)\n", + " return df\n", + " else:\n", + " print(\"No data found or API error.\")\n", + "\n", + "\n", + "# Example usage:\n", + "df = asyncio.run(show_gpus(\"available_only\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "151d71c6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saved to marketplace_output.txt\n" + ] + } + ], + "source": [ + "if df is not None:\n", + " df.to_csv(\"marketplace_output.txt\", index=False, sep=\"\\t\")\n", + " print(\"Saved to marketplace_output.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db3b1d93", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d68fbe7e40db6709675c13029b996b264e7d59eb Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:39:56 +0800 Subject: [PATCH 10/23] Add memory formatting function and update GPU data structure in marketplace fetch --- multimodal-gpu_marketplace-bot/main.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 5f809d15..069c2a3d 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -41,6 +41,15 @@ async def fetch_marketplace_data( "region-1": "US, North America", # Add more mappings as needed } + + def format_memory(mb): + if mb >= 1024 * 1024: + return f"{mb / (1024 * 1024):.2f} TB" + elif mb >= 1024: + return f"{mb / 1024:.2f} GB" + else: + return f"{mb} MB" + async with aiohttp.ClientSession() as session: try: url = "https://api.hyperbolic.xyz/v1/marketplace" @@ -53,12 +62,11 @@ async def fetch_marketplace_data( marketplace_data = await response.json() available_instances = [ { - "id": instance["id"], + # Only mention GPU model, memory, price, location, and availability + # "id": instance["id"], "gpu_model": instance["hardware"]["gpus"][0]["model"], - "gpu_memory": instance["hardware"]["gpus"][0]["ram"], - # Convert price from cents to dollars and format as string + "gpu_memory": format_memory(instance["hardware"]["gpus"][0]["ram"]), "price_per_hour": f"${instance['pricing']['price']['amount'] / 100:.2f}", - # Map region code to friendly name "location": REGION_MAP.get(instance["location"]["region"], instance["location"]["region"]), "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], @@ -105,8 +113,8 @@ async def fetch_marketplace_data( or specifications, use this tool to get the most current information. Always be professional and helpful. When listing GPUs: -1. Mention the GPU model, memory, and hourly price -2. Indicate if the instance is currently available +1. Mention if the instance is currently available first +2. Then mention the GPU model, memory, and hourly price 3. Include the location/region If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. From cdb292ce0561fda5303251875ee1047151f49a34 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:40:36 +0800 Subject: [PATCH 11/23] Enhance system instructions for GPU Marketplace assistant to include user use case inquiries and personalized GPU recommendations. --- multimodal-gpu_marketplace-bot/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 069c2a3d..dcee1f8c 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -109,14 +109,15 @@ def format_memory(mb): system_instruction = """ You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. -You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, -or specifications, use this tool to get the most current information. +You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, or specifications, use this tool to get the most current information. Always be professional and helpful. When listing GPUs: 1. Mention if the instance is currently available first 2. Then mention the GPU model, memory, and hourly price 3. Include the location/region +Encourage users to ask about their use case (e.g., "If you're doing XYZ, I recommend...") and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. + If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. """ @@ -153,7 +154,7 @@ async def main(): [ { "role": "user", - "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU.", + "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU. Also mention that you can help me with my use case and suggest the best GPU for my needs.", } ], ) From f898d5616bb46cec440efd7c7307074361595809 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 18:17:05 +0800 Subject: [PATCH 12/23] Refine GPU instance listing and user guidance in marketplace data fetch --- multimodal-gpu_marketplace-bot/main.py | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index dcee1f8c..f902fe6b 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -62,18 +62,27 @@ def format_memory(mb): marketplace_data = await response.json() available_instances = [ { - # Only mention GPU model, memory, price, location, and availability - # "id": instance["id"], + # Main summary fields for default listing "gpu_model": instance["hardware"]["gpus"][0]["model"], "gpu_memory": format_memory(instance["hardware"]["gpus"][0]["ram"]), "price_per_hour": f"${instance['pricing']['price']['amount'] / 100:.2f}", "location": REGION_MAP.get(instance["location"]["region"], instance["location"]["region"]), - "available": not instance["reserved"] - and instance["gpus_reserved"] < instance["gpus_total"], + "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], + # # All technical details for deep-dive queries + # "id": instance["id"], + # "status": instance.get("status"), + # "hardware": instance.get("hardware"), + # "instances": instance.get("instances"), + # "network": instance.get("network"), + # "gpus_total": instance.get("gpus_total"), + # "gpus_reserved": instance.get("gpus_reserved"), + # "has_persistent_storage": instance.get("has_persistent_storage"), + # "supplier_id": instance.get("supplier_id"), + # "cluster_name": instance.get("cluster_name"), + # "pricing": instance.get("pricing"), } for instance in marketplace_data["instances"] - if "gpus" in instance["hardware"] - and instance["hardware"]["gpus"] + if "gpus" in instance["hardware"] and instance["hardware"]["gpus"] ] await result_callback({"instances": available_instances}) else: @@ -112,11 +121,13 @@ def format_memory(mb): You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, or specifications, use this tool to get the most current information. Always be professional and helpful. When listing GPUs: -1. Mention if the instance is currently available first -2. Then mention the GPU model, memory, and hourly price +1. Mention the GPU model, memory, and hourly price +2. Indicate if the instance is currently available 3. Include the location/region -Encourage users to ask about their use case (e.g., "If you're doing XYZ, I recommend...") and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. +By default, only mention GPU model, memory, price, location, and availability. If a user wants to learn more about a specific instance, invite them to ask for details using the instance's GPU model or ID. When asked, provide all available technical details (CPU, storage, RAM, network, etc) for that instance in a clear, friendly, and expert manner. + +Encourage users to ask about their use case (e.g., 'If you're doing XYZ, I recommend...') and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. """ From 56c1b42423c5fa90e718ce69e36ee392932058f0 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 18:17:45 +0800 Subject: [PATCH 13/23] Add validator notebook for GPU marketplace data processing and output --- .../marketplace_output.txt | 70 ------------------- .../{test.ipynb => validator.ipynb} | 41 ----------- 2 files changed, 111 deletions(-) delete mode 100644 multimodal-gpu_marketplace-bot/marketplace_output.txt rename multimodal-gpu_marketplace-bot/{test.ipynb => validator.ipynb} (88%) diff --git a/multimodal-gpu_marketplace-bot/marketplace_output.txt b/multimodal-gpu_marketplace-bot/marketplace_output.txt deleted file mode 100644 index c331f40c..00000000 --- a/multimodal-gpu_marketplace-bot/marketplace_output.txt +++ /dev/null @@ -1,70 +0,0 @@ -id gpu_model gpu_memory price_per_hour location available -ceti14 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -l-hgx-05 NVIDIA-H200 143771 220 region-1 True -sfc-016 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -l-hgx-01 NVIDIA-H200 143771 225 region-1 True -antalpha-super-server100132 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -ses-a16 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True -antalpha-super-server100154 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -sfc-025 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -korea-amd9-17 NVIDIA-GeForce-RTX-3070 8192 16 region-1 False -sfc-010 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ceti16 NVIDIA-H100-80GB-HBM3 81559 160 region-1 True -sfc-008 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False -antalpha-super-server100116 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False -sfc-026 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ns-ai-server010 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False -sfc-007 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -antalpha-super-server100155 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -sfc-003 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-005 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-018 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -antalpha-super-server-100194 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -sfc-001 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ns-ai-server018 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False -sfc-017 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False -antalpha-super-server100202 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -sfc-030 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-002 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -gpu-cluster-helsinki-hyperbolic NVIDIA-H100-80GB-HBM3 81559 185 region-1 True -l-hgx-02 NVIDIA-H200 143771 230 region-1 True -antalpha-super-server100123 NVIDIA-GeForce-RTX-4090 24564 33 region-1 False -sfc-011 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -antalpha-super-server100152 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -ceti13 NVIDIA-H100-80GB-HBM3 81559 170 region-1 True -sfc-022 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -antalpha-super-server100120 NVIDIA-GeForce-RTX-4090 24564 40 region-1 True -antalpha-super-server100130 NVIDIA-GeForce-RTX-4090 24564 35 region-1 False -sfc-009 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-004 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-032 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ns-ai-server007 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False -ceti12 NVIDIA-H100-80GB-HBM3 81559 170 region-1 True -antalpha-super-server100115 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False -sfc-028 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False -ns-ai-server019 NVIDIA-GeForce-RTX-4090 24564 25 region-1 False -ses-a6 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True -antalpha-super-server100153 NVIDIA-GeForce-RTX-4090 24564 50 region-1 True -antalpha-super-server100156 NVIDIA-GeForce-RTX-4090 24564 33 region-1 False -ses-a11 NVIDIA-H100-80GB-HBM3 81559 155 region-1 True -sfc-023 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-006 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -antalpha-super-server100162 NVIDIA-GeForce-RTX-4090 24564 40 region-1 False -sfc-027 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ceti8 NVIDIA-H100-80GB-HBM3 81559 180 region-1 True -korea-amd9-37 NVIDIA-GeForce-RTX-3080 10240 20 region-1 False -sfc-024 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ceti5 NVIDIA-H100-80GB-HBM3 81559 180 region-1 True -sfc-019 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ses-hyper-a1 NVIDIA-H100-80GB-HBM3 81559 155 region-1 False -sfc-031 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-021 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-029 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -sfc-012 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ceti11 NVIDIA-H100-80GB-HBM3 81559 200 region-1 True -ai-server NVIDIA-L40S 46068 60 region-1 True -antalpha-super-server100164 NVIDIA-GeForce-RTX-4090 24564 30 region-1 False -sfc-020 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False -sfc-014 NVIDIA-H100-80GB-HBM3 81559 150 region-1 True -ceti15 NVIDIA-H100-80GB-HBM3 81559 150 region-1 False -ses-a5 NVIDIA-H100-80GB-HBM3 81559 175 region-1 False diff --git a/multimodal-gpu_marketplace-bot/test.ipynb b/multimodal-gpu_marketplace-bot/validator.ipynb similarity index 88% rename from multimodal-gpu_marketplace-bot/test.ipynb rename to multimodal-gpu_marketplace-bot/validator.ipynb index 939cf6ac..b3edc045 100644 --- a/multimodal-gpu_marketplace-bot/test.ipynb +++ b/multimodal-gpu_marketplace-bot/validator.ipynb @@ -1,38 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "12fb6ef7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: aiohttp in /opt/homebrew/lib/python3.11/site-packages (3.11.11)\n", - "Requirement already satisfied: nest_asyncio in /opt/homebrew/lib/python3.11/site-packages (1.6.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (23.1.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.3.3)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (6.0.4)\n", - "Requirement already satisfied: propcache>=0.2.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/homebrew/lib/python3.11/site-packages (from aiohttp) (1.18.3)\n", - "Requirement already satisfied: idna>=2.0 in /opt/homebrew/lib/python3.11/site-packages (from yarl<2.0,>=1.17.0->aiohttp) (3.10)\n", - "\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: Ignoring invalid distribution ~ytest (/opt/homebrew/lib/python3.11/site-packages)\u001b[0m\u001b[33m\n", - "\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip install aiohttp nest_asyncio" - ] - }, { "cell_type": "code", "execution_count": 8, @@ -800,14 +767,6 @@ " df.to_csv(\"marketplace_output.txt\", index=False, sep=\"\\t\")\n", " print(\"Saved to marketplace_output.txt\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db3b1d93", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 6254423b0040b1fd969aea4c20e4792bbf126329 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 18:29:49 +0800 Subject: [PATCH 14/23] Add configuration and marketplace data fetching functionality for GPU Marketplace --- multimodal-gpu_marketplace-bot/config.py | 43 +++++++ multimodal-gpu_marketplace-bot/main.py | 105 +----------------- multimodal-gpu_marketplace-bot/marketplace.py | 51 +++++++++ 3 files changed, 98 insertions(+), 101 deletions(-) create mode 100644 multimodal-gpu_marketplace-bot/config.py create mode 100644 multimodal-gpu_marketplace-bot/marketplace.py diff --git a/multimodal-gpu_marketplace-bot/config.py b/multimodal-gpu_marketplace-bot/config.py new file mode 100644 index 00000000..9617fc29 --- /dev/null +++ b/multimodal-gpu_marketplace-bot/config.py @@ -0,0 +1,43 @@ +SYSTEM_INSTRUCTION = """ +You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. + +You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, or specifications, use this tool to get the most current information. + +Always be professional and helpful. When listing GPUs: +1. Mention the GPU model, memory, and hourly price +2. Indicate if the instance is currently available +3. Include the location/region + +By default, only mention GPU model, memory, price, location, and availability. If a user wants to learn more about a specific instance, invite them to ask for details using the instance's GPU model or ID. When asked, provide all available technical details (CPU, storage, RAM, network, etc) for that instance in a clear, friendly, and expert manner. + +Encourage users to ask about their use case (e.g., 'If you're doing XYZ, I recommend...') and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. + +If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. +""" + +TOOLS = [ + { + "function_declarations": [ + { + "name": "get_available_gpus", + "description": "Get the list of available GPU instances in the marketplace", + "parameters": { + "type": "object", + "properties": { + "filter_type": { + "type": "string", + "enum": ["all", "available_only"], + "description": "Filter type for GPU instances", + } + }, + "required": ["filter_type"], + }, + } + ] + } +] + +REGION_MAP = { + "region-1": "US, North America", + # Add more mappings as needed +} \ No newline at end of file diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index f902fe6b..224b7932 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -27,6 +27,8 @@ GeminiMultimodalLiveLLMService, ) from pipecat.transports.services.daily import DailyParams, DailyTransport +from marketplace import fetch_marketplace_data +from config import SYSTEM_INSTRUCTION, TOOLS load_dotenv(override=True) @@ -34,105 +36,6 @@ logger.add(sys.stderr, level="DEBUG") -async def fetch_marketplace_data( - function_name, tool_call_id, args, llm, context, result_callback -): - REGION_MAP = { - "region-1": "US, North America", - # Add more mappings as needed - } - - def format_memory(mb): - if mb >= 1024 * 1024: - return f"{mb / (1024 * 1024):.2f} TB" - elif mb >= 1024: - return f"{mb / 1024:.2f} GB" - else: - return f"{mb} MB" - - async with aiohttp.ClientSession() as session: - try: - url = "https://api.hyperbolic.xyz/v1/marketplace" - headers = {"Content-Type": "application/json"} - filters = {} if args["filter_type"] == "all" else {"available": True} - data = {"filters": filters} - - async with session.post(url, json=data, headers=headers) as response: - if response.status == 200: - marketplace_data = await response.json() - available_instances = [ - { - # Main summary fields for default listing - "gpu_model": instance["hardware"]["gpus"][0]["model"], - "gpu_memory": format_memory(instance["hardware"]["gpus"][0]["ram"]), - "price_per_hour": f"${instance['pricing']['price']['amount'] / 100:.2f}", - "location": REGION_MAP.get(instance["location"]["region"], instance["location"]["region"]), - "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], - # # All technical details for deep-dive queries - # "id": instance["id"], - # "status": instance.get("status"), - # "hardware": instance.get("hardware"), - # "instances": instance.get("instances"), - # "network": instance.get("network"), - # "gpus_total": instance.get("gpus_total"), - # "gpus_reserved": instance.get("gpus_reserved"), - # "has_persistent_storage": instance.get("has_persistent_storage"), - # "supplier_id": instance.get("supplier_id"), - # "cluster_name": instance.get("cluster_name"), - # "pricing": instance.get("pricing"), - } - for instance in marketplace_data["instances"] - if "gpus" in instance["hardware"] and instance["hardware"]["gpus"] - ] - await result_callback({"instances": available_instances}) - else: - await result_callback( - {"error": f"API request failed with status {response.status}"} - ) - except Exception as e: - await result_callback({"error": str(e)}) - - -tools = [ - { - "function_declarations": [ - { - "name": "get_available_gpus", - "description": "Get the list of available GPU instances in the marketplace", - "parameters": { - "type": "object", - "properties": { - "filter_type": { - "type": "string", - "enum": ["all", "available_only"], - "description": "Filter type for GPU instances", - } - }, - "required": ["filter_type"], - }, - } - ] - } -] - -system_instruction = """ -You are a helpful assistant for Hyperbolic Labs' GPU Marketplace. You can help users find and understand available GPU instances for rent. - -You have access to the marketplace data through the get_available_gpus tool. When users ask about available GPUs, pricing, or specifications, use this tool to get the most current information. - -Always be professional and helpful. When listing GPUs: -1. Mention the GPU model, memory, and hourly price -2. Indicate if the instance is currently available -3. Include the location/region - -By default, only mention GPU model, memory, price, location, and availability. If a user wants to learn more about a specific instance, invite them to ask for details using the instance's GPU model or ID. When asked, provide all available technical details (CPU, storage, RAM, network, etc) for that instance in a clear, friendly, and expert manner. - -Encourage users to ask about their use case (e.g., 'If you're doing XYZ, I recommend...') and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. - -If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. -""" - - async def main(): async with aiohttp.ClientSession() as session: (room_url, token) = await configure(session) @@ -155,8 +58,8 @@ async def main(): llm = GeminiMultimodalLiveLLMService( api_key=os.getenv("GOOGLE_API_KEY"), - system_instruction=system_instruction, - tools=tools, + system_instruction=SYSTEM_INSTRUCTION, + tools=TOOLS, ) llm.register_function("get_available_gpus", fetch_marketplace_data) diff --git a/multimodal-gpu_marketplace-bot/marketplace.py b/multimodal-gpu_marketplace-bot/marketplace.py new file mode 100644 index 00000000..8b2998c7 --- /dev/null +++ b/multimodal-gpu_marketplace-bot/marketplace.py @@ -0,0 +1,51 @@ +import aiohttp +from config import REGION_MAP + + +def format_memory(mb: int) -> str: + if mb >= 1024 * 1024: + return f"{mb / (1024 * 1024):.2f} TB" + elif mb >= 1024: + return f"{mb / 1024:.2f} GB" + else: + return f"{mb} MB" + +def format_price(amount_cents: int) -> str: + if amount_cents < 100: + return f"{amount_cents}¢/hr" + else: + return f"${amount_cents / 100:.2f}/hr" + +def extract_instance_summary(instance: dict) -> dict: + gpu = instance["hardware"]["gpus"][0] + return { + "gpu_model": gpu["model"], + "gpu_memory": format_memory(gpu["ram"]), + "price_per_hour": format_price(instance["pricing"]["price"]["amount"]), + "location": REGION_MAP.get(instance["location"]["region"], instance["location"]["region"]), + "available": not instance["reserved"] and instance["gpus_reserved"] < instance["gpus_total"], + } + +async def fetch_marketplace_data( + function_name, tool_call_id, args, llm, context, result_callback +): + async with aiohttp.ClientSession() as session: + try: + url = "https://api.hyperbolic.xyz/v1/marketplace" + headers = {"Content-Type": "application/json"} + filters = {} if args["filter_type"] == "all" else {"available": True} + data = {"filters": filters} + + async with session.post(url, json=data, headers=headers) as response: + if response.status == 200: + marketplace_data = await response.json() + available_instances = [ + extract_instance_summary(instance) + for instance in marketplace_data["instances"] + if "gpus" in instance["hardware"] and instance["hardware"]["gpus"] + ] + await result_callback({"instances": available_instances}) + else: + await result_callback({"error": f"API request failed with status {response.status}"}) + except Exception as e: + await result_callback({"error": str(e)}) From 1df419a424aeaa8f46f009f362658948eb24fa18 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 18:32:14 +0800 Subject: [PATCH 15/23] Add tool declarations and registration for GPU marketplace functions --- multimodal-gpu_marketplace-bot/main.py | 5 ++-- multimodal-gpu_marketplace-bot/tools.py | 38 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 multimodal-gpu_marketplace-bot/tools.py diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 224b7932..700daf6c 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -29,6 +29,7 @@ from pipecat.transports.services.daily import DailyParams, DailyTransport from marketplace import fetch_marketplace_data from config import SYSTEM_INSTRUCTION, TOOLS +from tools import get_tool_declarations, register_all_tools load_dotenv(override=True) @@ -59,10 +60,10 @@ async def main(): llm = GeminiMultimodalLiveLLMService( api_key=os.getenv("GOOGLE_API_KEY"), system_instruction=SYSTEM_INSTRUCTION, - tools=TOOLS, + tools=get_tool_declarations(), ) - llm.register_function("get_available_gpus", fetch_marketplace_data) + register_all_tools(llm) context = OpenAILLMContext( [ diff --git a/multimodal-gpu_marketplace-bot/tools.py b/multimodal-gpu_marketplace-bot/tools.py new file mode 100644 index 00000000..afec6708 --- /dev/null +++ b/multimodal-gpu_marketplace-bot/tools.py @@ -0,0 +1,38 @@ +from typing import Callable, Dict, Any, Awaitable +from marketplace import fetch_marketplace_data + +# Tool metadata definitions +TOOL_DEFINITIONS = [ + { + "name": "get_available_gpus", + "description": "Get the list of available GPU instances in the marketplace", + "parameters": { + "type": "object", + "properties": { + "filter_type": { + "type": "string", + "enum": ["all", "available_only"], + "description": "Filter type for GPU instances", + } + }, + "required": ["filter_type"], + }, + }, + # Add more tool definitions here as needed +] + +# Tool registry: maps tool name to handler function +TOOL_REGISTRY: Dict[str, Callable[..., Awaitable[Any]]] = { + "get_available_gpus": fetch_marketplace_data, + # Add more tool handlers here as needed +} + +def get_tool_declarations() -> list: + """Return tool declarations in the format expected by the LLM service.""" + return [{"function_declarations": TOOL_DEFINITIONS}] + + +def register_all_tools(llm): + """Register all tools in the registry with the LLM service.""" + for tool_name, handler in TOOL_REGISTRY.items(): + llm.register_function(tool_name, handler) From 8f9b67fa28d683de7d61d899397a6019edf9180d Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 18:52:58 +0800 Subject: [PATCH 16/23] IMPORTANT: inference_on_context_initialization=True Refactor GPU marketplace bot configuration and main logic to enhance user interaction and context awareness --- multimodal-gpu_marketplace-bot/config.py | 6 +- multimodal-gpu_marketplace-bot/main.py | 22 +++- multimodal-live_search-bot/.env.example | 8 -- multimodal-live_search-bot/README.md | 90 --------------- multimodal-live_search-bot/main.py | 110 ------------------- multimodal-live_search-bot/requirements.txt | 90 --------------- multimodal-live_search-bot/runner.py | 77 ------------- multimodal-video-bot/.env.example | 8 -- multimodal-video-bot/README.md | 60 ---------- multimodal-video-bot/main.py | 116 -------------------- multimodal-video-bot/requirements.txt | 90 --------------- multimodal-video-bot/runner.py | 77 ------------- 12 files changed, 24 insertions(+), 730 deletions(-) delete mode 100644 multimodal-live_search-bot/.env.example delete mode 100644 multimodal-live_search-bot/README.md delete mode 100644 multimodal-live_search-bot/main.py delete mode 100644 multimodal-live_search-bot/requirements.txt delete mode 100644 multimodal-live_search-bot/runner.py delete mode 100644 multimodal-video-bot/.env.example delete mode 100644 multimodal-video-bot/README.md delete mode 100644 multimodal-video-bot/main.py delete mode 100644 multimodal-video-bot/requirements.txt delete mode 100644 multimodal-video-bot/runner.py diff --git a/multimodal-gpu_marketplace-bot/config.py b/multimodal-gpu_marketplace-bot/config.py index 9617fc29..82d36211 100644 --- a/multimodal-gpu_marketplace-bot/config.py +++ b/multimodal-gpu_marketplace-bot/config.py @@ -13,8 +13,12 @@ Encourage users to ask about their use case (e.g., 'If you're doing XYZ, I recommend...') and offer expert advice as a pro GPU specialist. If a user describes their workload, suggest the best GPU for their needs and explain why. If users ask about specific GPU models or price ranges, filter and highlight the relevant options from the data. + +You can also see and analyze video feeds (screen share or camera) in real time. If a user shares their screen or camera, you can describe what is visible and help with on-screen tasks. Encourage users to share their screen for more detailed help. """ +LLMCONTEXT_CONTENT = "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU. Also mention that you can help me with my use case and suggest the best GPU for my needs. You can also see my screen or camera if I share it, and help with what you see!" + TOOLS = [ { "function_declarations": [ @@ -40,4 +44,4 @@ REGION_MAP = { "region-1": "US, North America", # Add more mappings as needed -} \ No newline at end of file +} diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 700daf6c..c23564ec 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -27,8 +27,7 @@ GeminiMultimodalLiveLLMService, ) from pipecat.transports.services.daily import DailyParams, DailyTransport -from marketplace import fetch_marketplace_data -from config import SYSTEM_INSTRUCTION, TOOLS +from config import SYSTEM_INSTRUCTION, LLMCONTEXT_CONTENT from tools import get_tool_declarations, register_all_tools load_dotenv(override=True) @@ -61,6 +60,9 @@ async def main(): api_key=os.getenv("GOOGLE_API_KEY"), system_instruction=SYSTEM_INSTRUCTION, tools=get_tool_declarations(), + transcribe_user_audio=True, + transcribe_model_audio=True, + inference_on_context_initialization=True, ) register_all_tools(llm) @@ -69,7 +71,7 @@ async def main(): [ { "role": "user", - "content": "Start by greeting me warmly and introducing me to GPU Rentals by Hyperbolic Labs and mention that you can do everything verbally. Encourage me to start by asking available GPU. Also mention that you can help me with my use case and suggest the best GPU for my needs.", + "content": LLMCONTEXT_CONTENT, } ], ) @@ -97,6 +99,20 @@ async def main(): @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): await task.queue_frames([context_aggregator.user().get_context_frame()]) + await asyncio.sleep(3) + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="screenVideo" + ) + await transport.capture_participant_video( + participant["id"], framerate=1, video_source="camera" + ) + + + logger.debug("Unpausing audio and video") + llm.set_audio_input_paused(False) + llm.set_video_input_paused(False) + + runner = PipelineRunner() diff --git a/multimodal-live_search-bot/.env.example b/multimodal-live_search-bot/.env.example deleted file mode 100644 index aa71ec2a..00000000 --- a/multimodal-live_search-bot/.env.example +++ /dev/null @@ -1,8 +0,0 @@ -# Might need to replace w Hyperbolic's API Keys / Credentials - -# Daily.co API credentials -DAILY_API_KEY=your_daily_api_key_here -DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room - -# Google Gemini API credentials -GOOGLE_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/multimodal-live_search-bot/README.md b/multimodal-live_search-bot/README.md deleted file mode 100644 index e3009046..00000000 --- a/multimodal-live_search-bot/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Gemini Search Bot - -A conversational AI bot that provides the latest news and information using Google's Gemini model and search capabilities. - -## Features - -- Real-time voice interaction using Daily.co -- Latest news retrieval using Google Search API -- Natural conversation with Gemini AI model -- Voice synthesis for bot responses -- Voice activity detection for smooth interaction - -## Prerequisites - -- Python 3.8+ -- A Daily.co API key -- A Google Gemini API key - -## Setup - -1. Clone the repository -2. Create a virtual environment: - ```bash - python -m venv venv - source venv/bin/activate # On Windows: venv\Scripts\activate - ``` -3. Install dependencies: - ```bash - pip install -r requirements.txt - ``` -4. Copy `.env.example` to `.env` and fill in your API keys: - ```bash - cp .env.example .env - ``` - -## Environment Variables - -Create a `.env` file with the following variables: - -``` -# Required -DAILY_API_KEY=your_daily_api_key -GEMINI_API_KEY=your_gemini_api_key - -# Optional -DAILY_SAMPLE_ROOM_URL=your_daily_room_url # URL of an existing Daily room -DAILY_API_URL=https://api.daily.co/v1 # Custom Daily API URL -``` - -## Usage - -Run the bot with default settings (will create a new Daily room): -```bash -python src/main.py -``` - -Or specify an existing Daily room: -```bash -python src/main.py --url https://your-domain.daily.co/room-name -``` - -Command line options: -- `-u, --url`: URL of the Daily room to join -- `-k, --apikey`: Daily API Key (can also be set in .env) - -The bot will: -1. Connect to the specified Daily room (or create a new one) -2. Print the room URL -3. Wait for a participant to join -4. Start the conversation with news-related queries - -## Project Structure - -``` -gemini-search-bot/ -├── src/ -│ ├── config/ -│ │ └── settings.py # Configuration settings -│ ├── services/ -│ │ └── daily.py # Daily.co service setup -│ ├── utils/ -│ │ └── logger.py # Logging configuration -│ └── main.py # Main application -├── requirements.txt # Python dependencies -└── README.md # This file -``` - -## License - -BSD 2-Clause License \ No newline at end of file diff --git a/multimodal-live_search-bot/main.py b/multimodal-live_search-bot/main.py deleted file mode 100644 index 502ec842..00000000 --- a/multimodal-live_search-bot/main.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright (c) 2024, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import asyncio -import os -import sys -from pathlib import Path - -import aiohttp -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.gemini_multimodal_live.gemini import ( - GeminiMultimodalLiveLLMService, -) -from pipecat.transports.services.daily import DailyParams, DailyTransport - -sys.path.append(str(Path(__file__).parent.parent)) -from runner import configure - -load_dotenv(override=True) - -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - -# Function handlers for the LLM -search_tool = {"google_search": {}} -tools = [search_tool] - -system_instruction = """ -You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting. - -Always use the google search API to retrieve the latest news. You must also use it to check which day is today. - -You can: -- Use the Google search API to check the current date. -- Provide the most recent and relevant news from any place by using the google search API. -- Answer any questions the user may have, ensuring your responses are accurate and concise. - -Start each interaction by asking the user about which place they would like to know the information. -""" - - -async def main(): - async with aiohttp.ClientSession() as session: - (room_url, token) = await configure(session) - - transport = DailyTransport( - room_url, - token, - "Latest news!", - DailyParams( - audio_out_enabled=True, - vad_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - vad_audio_passthrough=True, - ), - ) - - # Initialize the Gemini Multimodal Live model - llm = GeminiMultimodalLiveLLMService( - api_key=os.getenv("GOOGLE_API_KEY"), - voice_id="Puck", # Aoede, Charon, Fenrir, Kore, Puck - transcribe_user_audio=True, - transcribe_model_audio=True, - system_instruction=system_instruction, - tools=tools, - ) - - context = OpenAILLMContext( - [ - { - "role": "user", - "content": "Start by greeting me warmly and introducing me to live search by Hyperbolic Labs and mention the current day. Tell me that I can ask you about any place in the world and you will provide me with the latest news. Then, ask me which place I would like to know about.", - } - ], - ) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - context_aggregator.user(), # User responses - llm, # LLM - transport.output(), # Transport bot output - context_aggregator.assistant(), # Assistant spoken responses - ] - ) - - task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True)) - - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - await transport.capture_participant_transcription(participant["id"]) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - - runner = PipelineRunner() - await runner.run(task) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/multimodal-live_search-bot/requirements.txt b/multimodal-live_search-bot/requirements.txt deleted file mode 100644 index 9f298cc5..00000000 --- a/multimodal-live_search-bot/requirements.txt +++ /dev/null @@ -1,90 +0,0 @@ -# Core Dependencies -aiohttp>=3.8.0 -fastapi>=0.68.0 -loguru>=0.6.0 -pydantic>=2.0.0 -python-dotenv>=0.19.0 -websockets>=10.0 - -# AI and Machine Learning -anthropic>=0.3.0 -google-generativeai>=0.2.0 -langchain>=0.0.200 -openai>=1.0.0 -openpipe>=0.1.0 -pipecat-ai[google]>=0.1.0 -together>=0.1.0 -transformers>=4.30.0 - -# Audio Processing -azure-cognitiveservices-speech>=1.25.0 -deepgram-sdk>=2.3.0 -faster-whisper>=0.5.0 -google-cloud-texttospeech>=2.12.0 -lmnt>=0.1.0 -pyaudio>=0.2.11 -pyloudnorm>=0.1.0 -pyht>=0.1.0 -silero-vad>=0.3.0 -soxr>=0.3.0 - -# Video/Image Processing -Pillow>=9.0.0 - -# Communication Services -daily>=0.7.0 -daily-python>=0.5.0 -fal-client>=0.5.0 -livekit>=0.8.0 - -# Utils -boto3>=1.26.0 -numpy>=1.21.0 -python-dateutil>=2.8.2 - - -# # If buggy, remove versions and try again (Carl - 2025 Feb 10) - -# # Core Dependencies -# aiohttp -# fastapi -# loguru -# pydantic -# python-dotenv -# websockets - -# # AI and Machine Learning -# anthropic -# google-generativeai -# langchain -# openai -# openpipe -# pipecat-ai[google] -# together -# transformers - -# # Audio Processing -# azure-cognitiveservices-speech -# deepgram-sdk -# faster-whisper -# google-cloud-texttospeech -# lmnt -# pyaudio -# pyloudnorm -# pyht -# silero-vad -# soxr - -# # Video/Image Processing -# Pillow - -# # Communication Services -# daily -# daily-python -# fal-client -# livekit - -# # Utils -# boto3 -# numpy -# python-dateutil diff --git a/multimodal-live_search-bot/runner.py b/multimodal-live_search-bot/runner.py deleted file mode 100644 index 2400217d..00000000 --- a/multimodal-live_search-bot/runner.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -# Carl's note: we borrowed this code from the Daily SDK, which is licensed under the BSD 2-Clause License. -# The Daily SDK is available at https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/runner.py - -import argparse -import os -from typing import Optional -from dotenv import load_dotenv - -import aiohttp - -from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper - -load_dotenv(override=True) - - -async def configure(aiohttp_session: aiohttp.ClientSession): - (url, token, _) = await configure_with_args(aiohttp_session) - return (url, token) - - -async def configure_with_args( - aiohttp_session: aiohttp.ClientSession, - parser: Optional[argparse.ArgumentParser] = None, -): - if not parser: - parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") - parser.add_argument( - "-u", "--url", type=str, required=False, help="URL of the Daily room to join" - ) - parser.add_argument( - "-k", - "--apikey", - type=str, - required=False, - help="Daily API Key (needed to create an owner token for the room)", - ) - - args, unknown = parser.parse_known_args() - - url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") - key = args.apikey or os.getenv("DAILY_API_KEY") - - if not url: - raise Exception( - "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." - ) - - if not key: - raise Exception( - "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." - ) - - daily_api_key = os.environ.get("DAILY_API_KEY") - if not daily_api_key: - raise Exception( - "The environment variable 'DAILY_API_KEY' must be set to run this example." - ) - - daily_rest_helper = DailyRESTHelper( - daily_api_key=key, - daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), - aiohttp_session=aiohttp_session, - ) - - # Create a meeting token for the given room with an expiration 1 hour in - # the future. - expiry_time: float = 60 * 60 - - token = await daily_rest_helper.get_token(url, expiry_time) - - return (url, token, args) diff --git a/multimodal-video-bot/.env.example b/multimodal-video-bot/.env.example deleted file mode 100644 index aa71ec2a..00000000 --- a/multimodal-video-bot/.env.example +++ /dev/null @@ -1,8 +0,0 @@ -# Might need to replace w Hyperbolic's API Keys / Credentials - -# Daily.co API credentials -DAILY_API_KEY=your_daily_api_key_here -DAILY_SAMPLE_ROOM_URL=your_daily_room_url_here # Optional: URL of an existing Daily room - -# Google Gemini API credentials -GOOGLE_API_KEY=your_gemini_api_key_here \ No newline at end of file diff --git a/multimodal-video-bot/README.md b/multimodal-video-bot/README.md deleted file mode 100644 index cf9734c4..00000000 --- a/multimodal-video-bot/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# Multimodal Video Bot - -A video conferencing bot that can analyze screen shares and camera feeds using Gemini's multimodal capabilities. - -## Features - -- **Prioritized Screen Sharing**: Automatically attempts to capture screen sharing first, falling back to camera if unavailable -- **Voice Activation Detection (VAD)**: Uses Silero VAD for precise audio detection -- **Multimodal Analysis**: Processes both visual and audio inputs using Google's Gemini API -- **Interactive Response**: Provides real-time responses to user queries about visual content - -## Requirements - -- Python 3.12+ -- Google API key with access to Gemini API -- Daily.co API key -- See `requirements.txt` for complete dependencies - -## Environment Setup - -Create a `.env` file with: - -``` -GOOGLE_API_KEY=your_google_api_key -DAILY_API_KEY=your_daily_api_key -DAILY_SAMPLE_ROOM_URL=your_daily_room_url -``` - -## Installation - -```bash -pip install -r requirements.txt -``` - -## Usage - -Run the bot: - -```bash -python src/main.py -``` - -Or with explicit room URL: - -```bash -python src/main.py -u "https://your-domain.daily.co/room" -k "your-daily-api-key" -``` - -## Voice Options - -The bot supports multiple voice options: -- Aoede (default) -- Puck -- Charon -- Kore -- Fenrir - -## Rate Limiting - -The service implements automatic rate limiting and retry mechanisms when interacting with Google's APIs to prevent quota exhaustion. diff --git a/multimodal-video-bot/main.py b/multimodal-video-bot/main.py deleted file mode 100644 index f0d72a93..00000000 --- a/multimodal-video-bot/main.py +++ /dev/null @@ -1,116 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -# Carl's note: we extended this code from the Daily SDK, which is licensed under the BSD 2-Clause License. -# # The Daily SDK is available at https://github.com/pipecat-ai/pipecat/tree/main - -import asyncio -import os -import sys - -import aiohttp -from dotenv import load_dotenv -from loguru import logger -from runner import configure - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.gemini_multimodal_live.gemini import ( - GeminiMultimodalLiveLLMService, -) -from pipecat.transports.services.daily import DailyParams, DailyTransport - -load_dotenv(override=True) - -logger.remove(0) -logger.add(sys.stderr, level="DEBUG") - - -async def main(): - google_api_key = os.getenv("GOOGLE_API_KEY") - if not google_api_key: - raise ValueError("GOOGLE_API_KEY environment variable is not set") - - async with aiohttp.ClientSession() as session: - (room_url, token) = await configure(session) - - transport = DailyTransport( - room_url, - token, - "Respond bot", - DailyParams( - audio_out_enabled=True, - vad_enabled=True, - vad_audio_passthrough=True, - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), - ), - ) - - llm = GeminiMultimodalLiveLLMService( - api_key=google_api_key, # Use validated key - voice_id="Aoede", # Puck, Charon, Kore, Fenrir, Aoede - system_instruction="Refer directly to screen elements when the user asks for help.", - transcribe_user_audio=True, - transcribe_model_audio=True, - inference_on_context_initialization=False, - ) - - context = OpenAILLMContext( - [ - { - "role": "user", - "content": "Welcome me to Hyperbolic Labs first. Tell me that I can see your camera feed, but tell me I have to click 'Share Screen' below first so I can help with your screen. Then I can tell you what's on your camera or where to click things!" - }, - ], - ) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), - context_aggregator.user(), - llm, - transport.output(), - context_aggregator.assistant(), - ] - ) - - task = PipelineTask( - pipeline, - PipelineParams( - allow_interruptions=True, - enable_metrics=True, - enable_usage_metrics=True, - ), - ) - - @transport.event_handler("on_first_participant_joined") - async def on_first_participant_joined(transport, participant): - # Enable both camera and screenshare. From the client side - # send just one. - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="screenVideo" - ) - await transport.capture_participant_video( - participant["id"], framerate=1, video_source="camera" - ) - await task.queue_frames([context_aggregator.user().get_context_frame()]) - await asyncio.sleep(3) - logger.debug("Unpausing audio and video") - llm.set_audio_input_paused(False) - llm.set_video_input_paused(False) - - runner = PipelineRunner() - - await runner.run(task) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/multimodal-video-bot/requirements.txt b/multimodal-video-bot/requirements.txt deleted file mode 100644 index 9f298cc5..00000000 --- a/multimodal-video-bot/requirements.txt +++ /dev/null @@ -1,90 +0,0 @@ -# Core Dependencies -aiohttp>=3.8.0 -fastapi>=0.68.0 -loguru>=0.6.0 -pydantic>=2.0.0 -python-dotenv>=0.19.0 -websockets>=10.0 - -# AI and Machine Learning -anthropic>=0.3.0 -google-generativeai>=0.2.0 -langchain>=0.0.200 -openai>=1.0.0 -openpipe>=0.1.0 -pipecat-ai[google]>=0.1.0 -together>=0.1.0 -transformers>=4.30.0 - -# Audio Processing -azure-cognitiveservices-speech>=1.25.0 -deepgram-sdk>=2.3.0 -faster-whisper>=0.5.0 -google-cloud-texttospeech>=2.12.0 -lmnt>=0.1.0 -pyaudio>=0.2.11 -pyloudnorm>=0.1.0 -pyht>=0.1.0 -silero-vad>=0.3.0 -soxr>=0.3.0 - -# Video/Image Processing -Pillow>=9.0.0 - -# Communication Services -daily>=0.7.0 -daily-python>=0.5.0 -fal-client>=0.5.0 -livekit>=0.8.0 - -# Utils -boto3>=1.26.0 -numpy>=1.21.0 -python-dateutil>=2.8.2 - - -# # If buggy, remove versions and try again (Carl - 2025 Feb 10) - -# # Core Dependencies -# aiohttp -# fastapi -# loguru -# pydantic -# python-dotenv -# websockets - -# # AI and Machine Learning -# anthropic -# google-generativeai -# langchain -# openai -# openpipe -# pipecat-ai[google] -# together -# transformers - -# # Audio Processing -# azure-cognitiveservices-speech -# deepgram-sdk -# faster-whisper -# google-cloud-texttospeech -# lmnt -# pyaudio -# pyloudnorm -# pyht -# silero-vad -# soxr - -# # Video/Image Processing -# Pillow - -# # Communication Services -# daily -# daily-python -# fal-client -# livekit - -# # Utils -# boto3 -# numpy -# python-dateutil diff --git a/multimodal-video-bot/runner.py b/multimodal-video-bot/runner.py deleted file mode 100644 index 2400217d..00000000 --- a/multimodal-video-bot/runner.py +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright (c) 2024–2025, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -# Carl's note: we borrowed this code from the Daily SDK, which is licensed under the BSD 2-Clause License. -# The Daily SDK is available at https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/runner.py - -import argparse -import os -from typing import Optional -from dotenv import load_dotenv - -import aiohttp - -from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper - -load_dotenv(override=True) - - -async def configure(aiohttp_session: aiohttp.ClientSession): - (url, token, _) = await configure_with_args(aiohttp_session) - return (url, token) - - -async def configure_with_args( - aiohttp_session: aiohttp.ClientSession, - parser: Optional[argparse.ArgumentParser] = None, -): - if not parser: - parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") - parser.add_argument( - "-u", "--url", type=str, required=False, help="URL of the Daily room to join" - ) - parser.add_argument( - "-k", - "--apikey", - type=str, - required=False, - help="Daily API Key (needed to create an owner token for the room)", - ) - - args, unknown = parser.parse_known_args() - - url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") - key = args.apikey or os.getenv("DAILY_API_KEY") - - if not url: - raise Exception( - "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." - ) - - if not key: - raise Exception( - "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." - ) - - daily_api_key = os.environ.get("DAILY_API_KEY") - if not daily_api_key: - raise Exception( - "The environment variable 'DAILY_API_KEY' must be set to run this example." - ) - - daily_rest_helper = DailyRESTHelper( - daily_api_key=key, - daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), - aiohttp_session=aiohttp_session, - ) - - # Create a meeting token for the given room with an expiration 1 hour in - # the future. - expiry_time: float = 60 * 60 - - token = await daily_rest_helper.get_token(url, expiry_time) - - return (url, token, args) From 72a08b50287f14809070a49c528bee32830a812a Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sun, 27 Apr 2025 22:40:17 +0800 Subject: [PATCH 17/23] Cloud Run. Add Dockerfile and health check server; update requirements and .dockerignore --- multimodal-gpu_marketplace-bot/.dockerignore | 10 ++ multimodal-gpu_marketplace-bot/Dockerfile | 29 +++++ multimodal-gpu_marketplace-bot/main.py | 18 ++- .../requirements.txt | 103 +++--------------- 4 files changed, 70 insertions(+), 90 deletions(-) create mode 100644 multimodal-gpu_marketplace-bot/.dockerignore create mode 100644 multimodal-gpu_marketplace-bot/Dockerfile diff --git a/multimodal-gpu_marketplace-bot/.dockerignore b/multimodal-gpu_marketplace-bot/.dockerignore new file mode 100644 index 00000000..22f6ed6b --- /dev/null +++ b/multimodal-gpu_marketplace-bot/.dockerignore @@ -0,0 +1,10 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.env +.env.* +*.db +*.sqlite3 +*.ipynb +marketplace_output.txt diff --git a/multimodal-gpu_marketplace-bot/Dockerfile b/multimodal-gpu_marketplace-bot/Dockerfile new file mode 100644 index 00000000..4413dc0e --- /dev/null +++ b/multimodal-gpu_marketplace-bot/Dockerfile @@ -0,0 +1,29 @@ +# Use official Python image +FROM python:3.12-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Set work directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ffmpeg \ + portaudio19-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && pip install -r requirements.txt + +# Copy project files +COPY . . + +# Expose port (Cloud Run expects 8080) +EXPOSE 8080 + +# Default command +CMD ["python", "main.py"] diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index c23564ec..914996d2 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -10,12 +10,15 @@ import asyncio import os import sys +import threading from datetime import datetime import aiohttp from dotenv import load_dotenv +from fastapi import FastAPI from loguru import logger from runner import configure +import uvicorn from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -89,7 +92,7 @@ async def main(): task = PipelineTask( pipeline, - PipelineParams( + params=PipelineParams( allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True, @@ -119,5 +122,18 @@ async def on_first_participant_joined(transport, participant): await runner.run(task) +def start_healthcheck_server(): + app = FastAPI() + + @app.get("/healthz") + async def healthz(): + return {"status": "ok"} + + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), log_level="warning") + +# Start FastAPI health check server in a background thread +threading.Thread(target=start_healthcheck_server, daemon=True).start() + + if __name__ == "__main__": asyncio.run(main()) diff --git a/multimodal-gpu_marketplace-bot/requirements.txt b/multimodal-gpu_marketplace-bot/requirements.txt index 9f298cc5..ebdfd0c1 100644 --- a/multimodal-gpu_marketplace-bot/requirements.txt +++ b/multimodal-gpu_marketplace-bot/requirements.txt @@ -1,90 +1,15 @@ -# Core Dependencies -aiohttp>=3.8.0 -fastapi>=0.68.0 -loguru>=0.6.0 -pydantic>=2.0.0 -python-dotenv>=0.19.0 -websockets>=10.0 +# Minimal requirements for main.py and core bot logic +aiohttp +python-dotenv +loguru +pydantic +openai +websockets +daily-python +google-generativeai +pipecat-ai[google,silero] +openpipe +onnxruntime +uvicorn +fastapi -# AI and Machine Learning -anthropic>=0.3.0 -google-generativeai>=0.2.0 -langchain>=0.0.200 -openai>=1.0.0 -openpipe>=0.1.0 -pipecat-ai[google]>=0.1.0 -together>=0.1.0 -transformers>=4.30.0 - -# Audio Processing -azure-cognitiveservices-speech>=1.25.0 -deepgram-sdk>=2.3.0 -faster-whisper>=0.5.0 -google-cloud-texttospeech>=2.12.0 -lmnt>=0.1.0 -pyaudio>=0.2.11 -pyloudnorm>=0.1.0 -pyht>=0.1.0 -silero-vad>=0.3.0 -soxr>=0.3.0 - -# Video/Image Processing -Pillow>=9.0.0 - -# Communication Services -daily>=0.7.0 -daily-python>=0.5.0 -fal-client>=0.5.0 -livekit>=0.8.0 - -# Utils -boto3>=1.26.0 -numpy>=1.21.0 -python-dateutil>=2.8.2 - - -# # If buggy, remove versions and try again (Carl - 2025 Feb 10) - -# # Core Dependencies -# aiohttp -# fastapi -# loguru -# pydantic -# python-dotenv -# websockets - -# # AI and Machine Learning -# anthropic -# google-generativeai -# langchain -# openai -# openpipe -# pipecat-ai[google] -# together -# transformers - -# # Audio Processing -# azure-cognitiveservices-speech -# deepgram-sdk -# faster-whisper -# google-cloud-texttospeech -# lmnt -# pyaudio -# pyloudnorm -# pyht -# silero-vad -# soxr - -# # Video/Image Processing -# Pillow - -# # Communication Services -# daily -# daily-python -# fal-client -# livekit - -# # Utils -# boto3 -# numpy -# python-dateutil From 2959ce6785abadca6295fd93f2c15029c51e02d6 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 13 Jun 2025 19:32:34 -0500 Subject: [PATCH 18/23] Refactor WebSocket event handler and improve error handling in main function --- multimodal-gpu_marketplace-bot/.dockerignore | 10 ---------- multimodal-gpu_marketplace-bot/main.py | 15 ++++++++++----- 2 files changed, 10 insertions(+), 15 deletions(-) delete mode 100644 multimodal-gpu_marketplace-bot/.dockerignore diff --git a/multimodal-gpu_marketplace-bot/.dockerignore b/multimodal-gpu_marketplace-bot/.dockerignore deleted file mode 100644 index 22f6ed6b..00000000 --- a/multimodal-gpu_marketplace-bot/.dockerignore +++ /dev/null @@ -1,10 +0,0 @@ -__pycache__/ -*.pyc -*.pyo -*.pyd -.env -.env.* -*.db -*.sqlite3 -*.ipynb -marketplace_output.txt diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 914996d2..d68d3d80 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -19,6 +19,7 @@ from loguru import logger from runner import configure import uvicorn +from websockets.exceptions import ConnectionClosedError from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams @@ -99,7 +100,7 @@ async def main(): ), ) - @transport.event_handler("on_first_participant_joined") + @transport.event_handler("on_participant_joined") async def on_first_participant_joined(transport, participant): await task.queue_frames([context_aggregator.user().get_context_frame()]) await asyncio.sleep(3) @@ -110,16 +111,20 @@ async def on_first_participant_joined(transport, participant): participant["id"], framerate=1, video_source="camera" ) - logger.debug("Unpausing audio and video") llm.set_audio_input_paused(False) llm.set_video_input_paused(False) - - runner = PipelineRunner() - await runner.run(task) + try: + await runner.run(task) + except ConnectionClosedError as e: + logger.error(f"WebSocket connection closed unexpectedly: {e}") + logger.error("This might be a temporary issue with the Gemini service. Please try running the script again later.") + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + # Optionally re-raise or handle other errors as needed def start_healthcheck_server(): From 9937bfdbc6437100b733575adecc746574c4e5eb Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 13 Jun 2025 19:32:55 -0500 Subject: [PATCH 19/23] Refactor Dockerfile for clarity and consistency in comments and formatting --- multimodal-gpu_marketplace-bot/Dockerfile | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/Dockerfile b/multimodal-gpu_marketplace-bot/Dockerfile index 4413dc0e..a13e2ddb 100644 --- a/multimodal-gpu_marketplace-bot/Dockerfile +++ b/multimodal-gpu_marketplace-bot/Dockerfile @@ -1,29 +1,31 @@ -# Use official Python image +# Use an official Python runtime as a parent image FROM python:3.12-slim -# Set environment variables -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 +# Set environment variables to prevent Python from writing .pyc files +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 -# Set work directory +# Set the working directory in the container WORKDIR /app -# Install system dependencies +# Install system dependencies needed for audio processing (for pipecat's Silero VAD) RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ ffmpeg \ - portaudio19-dev \ && rm -rf /var/lib/apt/lists/* -# Install Python dependencies +# Copy the requirements file into the container COPY requirements.txt . -RUN pip install --upgrade pip && pip install -r requirements.txt -# Copy project files +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy the rest of your application code into the container COPY . . -# Expose port (Cloud Run expects 8080) +# Expose the port your healthcheck server runs on EXPOSE 8080 -# Default command -CMD ["python", "main.py"] +# The command to run your application +CMD ["python", "main.py"] \ No newline at end of file From 08de21f646e9ad7cfe7119713666fdbc436edc95 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Fri, 13 Jun 2025 20:15:17 -0500 Subject: [PATCH 20/23] Add launcher script and update Dockerfile to use it as the entry point --- multimodal-gpu_marketplace-bot/Dockerfile | 2 +- multimodal-gpu_marketplace-bot/launcher.py | 88 ++++++++++++++++++++++ multimodal-gpu_marketplace-bot/main.py | 9 ++- 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 multimodal-gpu_marketplace-bot/launcher.py diff --git a/multimodal-gpu_marketplace-bot/Dockerfile b/multimodal-gpu_marketplace-bot/Dockerfile index a13e2ddb..d38414d3 100644 --- a/multimodal-gpu_marketplace-bot/Dockerfile +++ b/multimodal-gpu_marketplace-bot/Dockerfile @@ -28,4 +28,4 @@ COPY . . EXPOSE 8080 # The command to run your application -CMD ["python", "main.py"] \ No newline at end of file +CMD ["python", "launcher.py"] \ No newline at end of file diff --git a/multimodal-gpu_marketplace-bot/launcher.py b/multimodal-gpu_marketplace-bot/launcher.py new file mode 100644 index 00000000..11c7149e --- /dev/null +++ b/multimodal-gpu_marketplace-bot/launcher.py @@ -0,0 +1,88 @@ +import asyncio +import threading +from fastapi import FastAPI +from fastapi.responses import HTMLResponse +import uvicorn +from loguru import logger + +# We will import your bot's main function +from main import main as run_bot_pipeline + +# Keep track of the bot's thread so we only run one at a time +bot_thread = None + +app = FastAPI() + + +@app.get("/", response_class=HTMLResponse) +async def root(): + """Serves the simple HTML page with the start button.""" + return """ + + + + + + Hyperbolic Bot Launcher + + + +
+

GPU Marketplace Voice Assistant

+
+ +
+

+
+ + + + """ + + +def run_bot_in_thread(): + """Runs the asyncio bot in a separate thread.""" + logger.info("Starting bot pipeline in a background thread.") + try: + asyncio.run(run_bot_pipeline()) + logger.info("Bot pipeline thread finished.") + except Exception as e: + logger.error(f"Error in bot thread: {e}") + + +@app.post("/start-bot") +async def start_bot_endpoint(): + """API endpoint to start the bot.""" + global bot_thread + if bot_thread and bot_thread.is_alive(): + return {"message": "Bot is already running."} + + # Run the main bot function in a background thread + bot_thread = threading.Thread(target=run_bot_in_thread) + bot_thread.start() + + return {"message": "Bot has been started. It will join the Daily room shortly."} + + +if __name__ == "__main__": + # Note: Use the port Render provides through the PORT environment variable. + port = int(os.environ.get("PORT", 8080)) + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index d68d3d80..9b131d4d 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -121,10 +121,10 @@ async def on_first_participant_joined(transport, participant): await runner.run(task) except ConnectionClosedError as e: logger.error(f"WebSocket connection closed unexpectedly: {e}") - logger.error("This might be a temporary issue with the Gemini service. Please try running the script again later.") + except TimeoutError: + logger.info("Pipeline task timed out after 1 hour.") except Exception as e: logger.error(f"An unexpected error occurred: {e}") - # Optionally re-raise or handle other errors as needed def start_healthcheck_server(): @@ -134,7 +134,10 @@ def start_healthcheck_server(): async def healthz(): return {"status": "ok"} - uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), log_level="warning") + uvicorn.run( + app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), log_level="warning" + ) + # Start FastAPI health check server in a background thread threading.Thread(target=start_healthcheck_server, daemon=True).start() From 1b417e9b5caf7940d0e406b2da091c7b1f4ff199 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sat, 14 Jun 2025 10:48:25 -0700 Subject: [PATCH 21/23] Add missing import for os module in launcher.py --- multimodal-gpu_marketplace-bot/launcher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/multimodal-gpu_marketplace-bot/launcher.py b/multimodal-gpu_marketplace-bot/launcher.py index 11c7149e..32f1b902 100644 --- a/multimodal-gpu_marketplace-bot/launcher.py +++ b/multimodal-gpu_marketplace-bot/launcher.py @@ -3,6 +3,7 @@ from fastapi import FastAPI from fastapi.responses import HTMLResponse import uvicorn +import os from loguru import logger # We will import your bot's main function From a7fb35ec0d6e19b1d2e4fade2c9387650d105012 Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sat, 14 Jun 2025 10:48:30 -0700 Subject: [PATCH 22/23] Refactor bot launch process to use subprocess for better isolation and error handling; update HTML response for improved user experience. --- multimodal-gpu_marketplace-bot/launcher.py | 112 ++++++++++++--------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/launcher.py b/multimodal-gpu_marketplace-bot/launcher.py index 32f1b902..cd510d89 100644 --- a/multimodal-gpu_marketplace-bot/launcher.py +++ b/multimodal-gpu_marketplace-bot/launcher.py @@ -1,24 +1,22 @@ -import asyncio -import threading +# launcher.py +import os +import subprocess +import sys from fastapi import FastAPI -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, JSONResponse import uvicorn -import os from loguru import logger -# We will import your bot's main function -from main import main as run_bot_pipeline - -# Keep track of the bot's thread so we only run one at a time -bot_thread = None - app = FastAPI() +# Get the URL from environment variables to pass to the frontend +DAILY_ROOM_URL = "https://hyperbolic.daily.co/MkJPeAMVfgruvM1zVGg4" @app.get("/", response_class=HTMLResponse) async def root(): """Serves the simple HTML page with the start button.""" - return """ + # We will now pass the room URL to the HTML so the button can open it. + return f""" @@ -26,64 +24,84 @@ async def root(): Hyperbolic Bot Launcher

GPU Marketplace Voice Assistant

-
- -
+

""" +# Variable to track if the bot process is running +bot_process = None + +@app.post("/start-bot") +async def start_bot_endpoint(): + """API endpoint to launch the bot script as a separate process.""" + global bot_process + + # Check if the bot process is already running + if bot_process and bot_process.poll() is None: + logger.info("Bot process is already running.") + return JSONResponse({"success": False, "message": "Bot is already running."}) -def run_bot_in_thread(): - """Runs the asyncio bot in a separate thread.""" - logger.info("Starting bot pipeline in a background thread.") try: - asyncio.run(run_bot_pipeline()) - logger.info("Bot pipeline thread finished.") - except Exception as e: - logger.error(f"Error in bot thread: {e}") + # Find the path to the main.py script + script_path = os.path.join(os.path.dirname(__file__), "main.py") + # We need to use the same Python interpreter that is running this launcher + python_executable = sys.executable -@app.post("/start-bot") -async def start_bot_endpoint(): - """API endpoint to start the bot.""" - global bot_thread - if bot_thread and bot_thread.is_alive(): - return {"message": "Bot is already running."} + logger.info(f"Launching bot script: {python_executable} {script_path}") + + # Launch main.py as a new, independent process + # This completely avoids threading/multiprocessing issues with asyncio + bot_process = subprocess.Popen([python_executable, script_path]) - # Run the main bot function in a background thread - bot_thread = threading.Thread(target=run_bot_in_thread) - bot_thread.start() + return JSONResponse({"success": True, "message": "Bot started successfully."}) - return {"message": "Bot has been started. It will join the Daily room shortly."} + except Exception as e: + logger.error(f"Failed to launch bot process: {e}") + return JSONResponse({"success": False, "message": f"Failed to start bot: {e}"}, status_code=500) if __name__ == "__main__": - # Note: Use the port Render provides through the PORT environment variable. port = int(os.environ.get("PORT", 8080)) - uvicorn.run(app, host="0.0.0.0", port=port) + logger.info(f"Starting launcher web server on port {port}") + uvicorn.run(app, host="0.0.0.0", port=port) \ No newline at end of file From 45e16bec4e176103bd63bad6690469984615f60b Mon Sep 17 00:00:00 2001 From: Carl Kho <106736711+CarlKho-Minerva@users.noreply.github.com> Date: Sat, 14 Jun 2025 11:11:53 -0700 Subject: [PATCH 23/23] Refactor bot launcher to manage state and process lifecycle; remove health check server and improve HTML response for bot status. --- multimodal-gpu_marketplace-bot/launcher.py | 191 +++++++++++++-------- multimodal-gpu_marketplace-bot/main.py | 19 -- 2 files changed, 115 insertions(+), 95 deletions(-) diff --git a/multimodal-gpu_marketplace-bot/launcher.py b/multimodal-gpu_marketplace-bot/launcher.py index cd510d89..891fdee3 100644 --- a/multimodal-gpu_marketplace-bot/launcher.py +++ b/multimodal-gpu_marketplace-bot/launcher.py @@ -1,107 +1,146 @@ -# launcher.py import os import subprocess import sys -from fastapi import FastAPI -from fastapi.responses import HTMLResponse, JSONResponse +from fastapi import FastAPI, Request +from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse import uvicorn from loguru import logger +import signal +import time app = FastAPI() -# Get the URL from environment variables to pass to the frontend -DAILY_ROOM_URL = "https://hyperbolic.daily.co/MkJPeAMVfgruvM1zVGg4" +# --- Global State Management --- +# This dictionary holds the state of our bot process. +# This simple in-memory state is perfect for a single Render instance. +bot_state = { + "process": None, + "status": "STOPPED", # Can be: STOPPED, STARTING, RUNNING, STOPPING +} + +DAILY_ROOM_URL = os.getenv( + "DAILY_SAMPLE_ROOM_URL", "https://your-room.daily.co/default-room" +) + + +def get_page_html(): + """Generates the HTML for the control page based on the current bot state.""" + status = bot_state["status"] + + if status == "RUNNING": + return f""" +

GPU Bot is RUNNING

+

The bot is active. You can now join the call.

+ Join Daily Room +
+ +
+ """ + elif status == "STOPPED": + return """ +

GPU Bot is STOPPED

+

Click the button to start a new demo session.

+
+ +
+ """ + elif status == "STARTING": + return """ +

GPU Bot is STARTING...

+

Please wait, this can take up to 30 seconds.

+ + """ + elif status == "STOPPING": + return """ +

GPU Bot is STOPPING...

+

Please wait while the session is terminated.

+ + """ + @app.get("/", response_class=HTMLResponse) async def root(): - """Serves the simple HTML page with the start button.""" - # We will now pass the room URL to the HTML so the button can open it. + """Serves the main control page, which changes based on the bot's status.""" return f""" - - - - - - Hyperbolic Bot Launcher - - - -
-

GPU Marketplace Voice Assistant

- -

+ Bot Control + + + +
+ {get_page_html()}
- - - + """ -# Variable to track if the bot process is running -bot_process = None @app.post("/start-bot") async def start_bot_endpoint(): - """API endpoint to launch the bot script as a separate process.""" - global bot_process + """Endpoint to launch the bot. Prevents starting if not STOPPED.""" + if bot_state["status"] != "STOPPED": + logger.warning(f"Attempted to start bot while in state: {bot_state['status']}") + return RedirectResponse(url="/", status_code=303) - # Check if the bot process is already running - if bot_process and bot_process.poll() is None: - logger.info("Bot process is already running.") - return JSONResponse({"success": False, "message": "Bot is already running."}) + bot_state["status"] = "STARTING" + logger.info("Bot state changed to STARTING.") try: - # Find the path to the main.py script script_path = os.path.join(os.path.dirname(__file__), "main.py") - - # We need to use the same Python interpreter that is running this launcher python_executable = sys.executable + process = subprocess.Popen([python_executable, script_path]) - logger.info(f"Launching bot script: {python_executable} {script_path}") - - # Launch main.py as a new, independent process - # This completely avoids threading/multiprocessing issues with asyncio - bot_process = subprocess.Popen([python_executable, script_path]) - - return JSONResponse({"success": True, "message": "Bot started successfully."}) + bot_state["process"] = process + # Give it a moment to stabilize before changing state to RUNNING + time.sleep(5) # A small delay to let the process actually start + bot_state["status"] = "RUNNING" + logger.info( + f"Bot process started with PID {process.pid}. State is now RUNNING." + ) except Exception as e: logger.error(f"Failed to launch bot process: {e}") - return JSONResponse({"success": False, "message": f"Failed to start bot: {e}"}, status_code=500) + bot_state["status"] = "STOPPED" + + return RedirectResponse(url="/", status_code=303) + + +@app.post("/stop-bot") +async def stop_bot_endpoint(): + """Endpoint to stop the bot. Prevents stopping if not RUNNING.""" + if bot_state["status"] != "RUNNING": + logger.warning(f"Attempted to stop bot while in state: {bot_state['status']}") + return RedirectResponse(url="/", status_code=303) + + bot_state["status"] = "STOPPING" + logger.info("Bot state changed to STOPPING.") + + process = bot_state["process"] + if process and process.poll() is None: + logger.info(f"Sending SIGTERM to bot process with PID: {process.pid}") + process.send_signal(signal.SIGTERM) + try: + process.wait(timeout=15) + logger.info("Bot process terminated gracefully.") + except subprocess.TimeoutExpired: + logger.warning("Bot did not terminate in time, sending SIGKILL.") + process.kill() + + bot_state["process"] = None + bot_state["status"] = "STOPPED" + logger.info("Bot state changed to STOPPED.") + + return RedirectResponse(url="/", status_code=303) if __name__ == "__main__": port = int(os.environ.get("PORT", 8080)) - logger.info(f"Starting launcher web server on port {port}") - uvicorn.run(app, host="0.0.0.0", port=port) \ No newline at end of file + logger.info(f"Starting bot launcher on http://0.0.0.0:{port}") + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/multimodal-gpu_marketplace-bot/main.py b/multimodal-gpu_marketplace-bot/main.py index 9b131d4d..96ba2fc2 100644 --- a/multimodal-gpu_marketplace-bot/main.py +++ b/multimodal-gpu_marketplace-bot/main.py @@ -10,15 +10,12 @@ import asyncio import os import sys -import threading from datetime import datetime import aiohttp from dotenv import load_dotenv -from fastapi import FastAPI from loguru import logger from runner import configure -import uvicorn from websockets.exceptions import ConnectionClosedError from pipecat.audio.vad.silero import SileroVADAnalyzer @@ -127,21 +124,5 @@ async def on_first_participant_joined(transport, participant): logger.error(f"An unexpected error occurred: {e}") -def start_healthcheck_server(): - app = FastAPI() - - @app.get("/healthz") - async def healthz(): - return {"status": "ok"} - - uvicorn.run( - app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), log_level="warning" - ) - - -# Start FastAPI health check server in a background thread -threading.Thread(target=start_healthcheck_server, daemon=True).start() - - if __name__ == "__main__": asyncio.run(main())