diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 020af2e..847c6af 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -44,6 +44,9 @@ jobs: SECRET_KEY: ${{ secrets.SECRET_KEY }} ALGORITHM: ${{ secrets.ALGORITHM }} MODEL_S3_BUCKET: ${{ secrets.MODEL_S3_BUCKET }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_CONFIG_ENGINE_URL: ${{ secrets.GITHUB_CONFIG_ENGINE_URL }} + GITHUB_REPO_SUBDIR: ${{ secrets.GITHUB_REPO_SUBDIR }} HLS_TRANSFORM_DIRECTORY: ${{ secrets.HLS_TRANSFORM_DIRECTORY }} FPGA_DEV_AMI: ${{ secrets.FPGA_DEV_AMI }} GPU_DEV_AMI: ${{ secrets.GPU_DEV_AMI }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a77ed1d..9800571 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks default_language_version: - python: python3.11 + python: python3.10 repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 diff --git a/app/api/v1/endpoints/machine_endpoints.py b/app/api/v1/endpoints/machine_endpoints.py index 1476f82..0bd8401 100644 --- a/app/api/v1/endpoints/machine_endpoints.py +++ b/app/api/v1/endpoints/machine_endpoints.py @@ -15,8 +15,6 @@ from models.machine import ( Machine, MachineCreate, - ModelInferenceRequest, - ModelInferenceResponse, ModelSelectionRequest, ) from models.user import UserResponse @@ -156,9 +154,12 @@ async def create_fpga_machine( ) -> Machine: try: user_data = generate_hlstransform_setup_script( + github_token=settings.GITHUB_TOKEN, user_name=current_user.user_name, s3_bucket=settings.MODEL_S3_BUCKET, s3_directory=settings.HLS_TRANSFORM_DIRECTORY, + github_repo_url=settings.GITHUB_CONFIG_ENGINE_URL, + repo_subdir=settings.GITHUB_REPO_SUBDIR, ) logger.debug(f"Generated user data: {user_data}") @@ -235,38 +236,6 @@ async def terminate_machine( ) -@router.post( - "/machines/{machine_id}/inference", - response_model=ModelInferenceResponse, - tags=[fpga_tag], -) -async def run_model_inference( - machine_id: str, - request: ModelInferenceRequest, - current_user: Annotated[UserResponse, Depends(get_current_active_user)], - ec2_service: EC2Service = Depends(get_ec2_service), -): - try: - output = ec2_service.run_model_inference( - machine_id, request, current_user.user_id, current_user.user_name - ) - return ModelInferenceResponse(output=output) - except EC2Error as e: - logger.error(f"An error occurred: {e}") - raise EC2Error( - status_code=e.status_code, - detail=f"An unexpected error occurred while running model inference: {e.detail}", - error_code=e.error_code, - ) - except Exception as e: - logger.error(f"An internal server error occurred: {e}") - raise EC2Error( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="A fatal server error occurred while running model inference", - error_code="INTERNAL_SERVER_ERROR", - ) - - @router.post("/machine/gpu/pull_model", tags=[gpu_tag]) async def pull_gpu_model( current_user: Annotated[UserResponse, Depends(get_current_active_user)], @@ -388,6 +357,42 @@ async def get_gpu_inference_url( ) +@router.get("/machine/fpga/{machine_id}/inference_url", tags=[fpga_tag]) +async def get_fpga_inference_url( + machine_id: str, + current_user: Annotated[UserResponse, Depends(get_current_active_user)], + ec2_service: EC2Service = Depends(get_ec2_service), +): + try: + isOwner = ec2_service.is_user_owner_of_instance( + user_id=current_user.user_id, instance_id=machine_id + ) + if not isOwner: + raise EC2Error( + status_code=status.HTTP_403_FORBIDDEN, + detail="User not the owner of this machine", + error_code="FORBIDDEN", + ) + public_ip = ec2_service.get_instance_public_ip(machine_id) + ollama_url = f"http://{public_ip}:8000/api/generate" + + return {"inference_url": ollama_url} + except EC2Error as e: + logger.error(f"An error occurred: {e}") + raise EC2Error( + status_code=e.status_code, + detail=f"An EC2 Error occurred: {e.detail}", + error_code=e.error_code, + ) + except Exception as e: + logger.error(f"An internal server error occurred: {e}") + raise EC2Error( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"A server error occurred while getting the inference URL: {e}", + error_code="INTERNAL_SERVER_ERROR", + ) + + @router.post( "/machine/cpu", response_model=Machine, diff --git a/app/core/config.py b/app/core/config.py index 4407546..4d9be77 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -16,6 +16,11 @@ class Settings(BaseSettings): MODEL_S3_BUCKET: str HLS_TRANSFORM_DIRECTORY: str + # Github Credentials + GITHUB_TOKEN: str + GITHUB_CONFIG_ENGINE_URL: str + GITHUB_REPO_SUBDIR: str + # Machine Management FPGA_DEV_AMI: str GPU_DEV_AMI: str diff --git a/app/models/machine.py b/app/models/machine.py index 1694a83..d37f599 100644 --- a/app/models/machine.py +++ b/app/models/machine.py @@ -3,7 +3,6 @@ """ from pydantic import BaseModel, Field -from typing import Optional class Machine(BaseModel): @@ -20,21 +19,6 @@ class MachineCreate(BaseModel): machine_type: str -class ModelInferenceRequest(BaseModel): - prompt: str = Field(..., description="Input prompt for the model.") - temperature: Optional[float] = Field(0.8, description="Sampling temperature.") - max_tokens: Optional[int] = Field( - 256, description="Maximum number of tokens to generate." - ) - llm_model: Optional[str] = Field( - "llama2", description="Name of the model executable." - ) - - -class ModelInferenceResponse(BaseModel): - output: str - - class ModelSelectionRequest(BaseModel): machine_id: str = Field(..., description="ID of the machine to select.") model_name: str = Field(..., description="Name of the model to select.") diff --git a/app/scripts/ec2_setup.py b/app/scripts/ec2_setup.py index f368f5a..e986688 100644 --- a/app/scripts/ec2_setup.py +++ b/app/scripts/ec2_setup.py @@ -1,8 +1,10 @@ -from core.config import settings - - def generate_hlstransform_setup_script( - user_name: str, s3_bucket: str, s3_directory: str + github_token: str, + user_name: str, + s3_bucket: str, + s3_directory: str, + github_repo_url: str, + repo_subdir: str, ) -> str: return f"""#!/bin/bash @@ -10,17 +12,30 @@ def generate_hlstransform_setup_script( exec > /var/log/user-data.log 2>&1 set -x -# Update package list and install required packages -yum update -y # Use 'apt-get' if using Ubuntu or Debian -yum install -y aws-cli git # Install AWS CLI and Git, required for the script +############################## +# 1) System Updates & Basic Tools +############################## -# Install SSM Agent -echo "Installing SSM Agent..." -yum install -y https://s3.{settings.AWS_DEFAULT_REGION}.amazonaws.com/amazon-ssm-{settings.AWS_DEFAULT_REGION}/latest/linux_amd64/amazon-ssm-agent.rpm +sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo +sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo +sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo +sudo yum -y install centos-release-scl -# Start SSM Agent -systemctl enable amazon-ssm-agent -systemctl start amazon-ssm-agent +sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo +sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo +sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo +sudo yum -y install devtoolset-9 + +sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo +sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo +sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo +sudo yum -y install boost-devel + +scl enable devtoolset-9 bash + +############################## +# 2) Create User and Set Permissions +############################## # Create '{user_name}' user with a home directory echo "Creating '{user_name}' user with a home directory..." @@ -32,6 +47,10 @@ def generate_hlstransform_setup_script( # Set correct permissions on the directory chmod 755 /home/{user_name} +############################## +# 3) Fetch Files from S3 +############################## + # Static values for S3 bucket and directory S3_BUCKET="{s3_bucket}" S3_DIRECTORY="{s3_directory}" @@ -47,6 +66,10 @@ def generate_hlstransform_setup_script( chmod 755 /home/{user_name}/llama2 chmod 755 /home/{user_name}/forward.hw.awsxclbin +############################## +# 4) Setup AWS FPGA Environment +############################## + # Clone the AWS FPGA repository and set up the environment echo "Cloning AWS FPGA repository and setting up the environment..." export AWS_FPGA_REPO_DIR=/home/{user_name}/aws-fpga @@ -55,6 +78,10 @@ def generate_hlstransform_setup_script( source $AWS_FPGA_REPO_DIR/vitis_runtime_setup.sh export LC_ALL=C +############################## +# 5) Ensure XRT MPD (Message Proxy Daemon) is running +############################## + # Start the Xilinx XRT Message Proxy Daemon (MPD) if not running echo "Checking MPD service status..." if systemctl is-active --quiet mpd; then @@ -69,6 +96,82 @@ def generate_hlstransform_setup_script( systemctl status mpd echo "Setup complete. You can now run your application with './llama2' as the '{user_name}' user." + +############################## +# 5.5) Python3.8 environment +############################## + +# Install development tools and dependencies +echo "Installing development tools and dependencies..." +sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo +sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo +sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo +sudo yum groupinstall -y "Development Tools" + +sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo +sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo +sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo +sudo yum install -y gcc gcc-c++ make zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel openssl-devel xz xz-devel libffi-devel wget + +# Download and build Python 3.8 +cd /usr/src +echo "Downloading Python 3.8 source code..." +sudo wget https://www.python.org/ftp/python/3.8.16/Python-3.8.16.tgz +echo "Extracting Python 3.8..." +sudo tar xzf Python-3.8.16.tgz +cd Python-3.8.16 +echo "Building Python 3.8..." +sudo ./configure --enable-optimizations +sudo make altinstall + +# Verify Python installation +echo "Verifying Python 3.8 installation..." +python3.8 --version + +# Create symbolic link for python3 +echo "Creating symbolic link for python3..." +sudo ln -sf /usr/local/bin/python3.8 /usr/bin/python3 +python3 --version + +# Upgrade pip +echo "Upgrading pip..." +python3 -m ensurepip --upgrade +python3 -m pip install --upgrade pip + +############################## +# 6) Clone GitHub Repo +############################## +echo "Cloning the GitHub repository '{github_repo_url}'..." +cd /home/{user_name} +sudo git clone https://{github_token}@{github_repo_url} apis +cd /home/{user_name}/apis/{repo_subdir} + +# If repo has a requirements file: +if [ -f "requirements.txt" ]; then + sudo python3 -m pip install -r requirements.txt +else + # Install minimal dependencies if no requirements.txt + sudo python3 -m pip install fastapi uvicorn boto3 +fi + +############################## +# 7) Launch FastAPI Server +############################## +# We'll run `f1_server.py` in the background using Uvicorn on port 80 +# Adjust the path to match where 'f1_server.py' actually resides in your repo + +echo "Cloning config engine..." +cd /home/{user_name}/apis/{repo_subdir} +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 +nohup python3 -m uvicorn f1_server:app --host 0.0.0.0 --port 8000 & + +echo "---------------------------------------------------" +echo " Setup complete. FastAPI is running on port 80. " +echo " Use 'http:///inference' " +echo " to stream LLM model responses." +echo "---------------------------------------------------" + """ @@ -82,14 +185,6 @@ def generate_ollama_setup_script(user_name: str) -> str: yum update -y # Use 'apt-get' if using Ubuntu or Debian yum install -y aws-cli git # Install AWS CLI and Git, required for the script -# Install SSM Agent -echo "→ Installing SSM Agent..." -yum install -y https://s3.{settings.AWS_DEFAULT_REGION}.amazonaws.com/amazon-ssm-{settings.AWS_DEFAULT_REGION}/latest/linux_amd64/amazon-ssm-agent.rpm - -# Start SSM Agent -systemctl enable amazon-ssm-agent -systemctl start amazon-ssm-agent - # Create '{user_name}' user with a home directory echo "→ Creating '{user_name}' user with a home directory..." useradd -m -d /home/{user_name} {user_name} @@ -110,7 +205,7 @@ def generate_ollama_setup_script(user_name: str) -> str: # 2. Update the ollama.service echo "→ Updating ollama.service..." -if sudo systemctl status ollama.service > /dev/null 2>&1; then +if sudo systemctl status ollama.service > /dev/null 2>&1; then # Create the override directory echo "→→ Create override dir..." sudo mkdir -p /etc/systemd/system/ollama.service.d diff --git a/app/services/ec2_service.py b/app/services/ec2_service.py index 1eb3c5c..ccef06c 100644 --- a/app/services/ec2_service.py +++ b/app/services/ec2_service.py @@ -2,15 +2,13 @@ This module contains the logic for the machine service. """ -import shlex -import time from typing import List import boto3 from botocore.exceptions import ClientError from core.config import settings -from models.machine import Machine, ModelInferenceRequest +from models.machine import Machine from repositories.dynamodb_price_repository import DynamoDBPriceRepository from services.price_service import PriceService from utils.exceptions import ( @@ -342,144 +340,6 @@ def terminate_machine(self, machine_id: str) -> bool: status_code=400, detail=str(e), error_code="AWS_CLIENT_ERROR" ) - def run_model_inference( - self, - instance_id: str, - request: ModelInferenceRequest, - user_id: str, - user_name: str, - ) -> str: - ssm_client = boto3.client("ssm") - machine = self.get_machine_details(instance_id) - owner_tag = next( - (tag for tag in machine.machine_desc if tag["Key"] == "user_id"), None - ) - if owner_tag is None: - raise EC2Error( - status_code=400, - detail="Machine does not have a 'user_id' tag.", - error_code="MISSING_USER_ID_TAG", - ) - if ( - user_name.lower() != "admin" - and owner_tag["Value"].lower() != user_id.lower() - ): - raise EC2Error( - status_code=403, - detail="Instance does not belong to the user.", - error_code="INSTANCE_OWNERSHIP_FORBIDDEN", - ) - if not request.prompt: - raise EC2Error( - status_code=400, - detail="The request prompt is empty.", - error_code="INVALID_REQUEST_PARAMETERS", - ) - if not request.llm_model: - raise EC2Error( - status_code=400, - detail="The model name is not provided.", - error_code="INVALID_REQUEST_PARAMETERS", - ) - try: - prompt_escaped = shlex.quote(request.prompt) - temperature = request.temperature - max_tokens = request.max_tokens - model_name = request.llm_model - command = ( - f"export LD_LIBRARY_PATH=/opt/xilinx/xrt/lib:$LD_LIBRARY_PATH && " - f"export XILINX_XRT=/opt/xilinx/xrt && " - f"cd /home/{user_name} && " - f"./{model_name} ./weights.bin -z ./tokenizer.bin " - f"-t {temperature} " - f"-n {max_tokens} " - f"-i {prompt_escaped} " - f"-k ./forward.hw.awsxclbin" - ) - - response = ssm_client.send_command( - InstanceIds=[instance_id], - DocumentName="AWS-RunShellScript", - Parameters={"commands": [command]}, - ) - command_id = response["Command"]["CommandId"] - output = self.get_command_output(instance_id, command_id) - if "Copying data to buffer" in output: - output = output.split("Copying data to buffer", 1)[1] - return output.strip() - except ClientError as e: - error_code = e.response.get("Error", {}).get("Code", "UnknownClientError") - if error_code == "AccessDeniedException": - detail_msg = ( - f"Access denied when calling SSM SendCommand. " - f"Check IAM policies. Original error: {str(e)}" - ) - custom_error_code = "ACCESS_DENIED" - raise EC2Error( - status_code=403, detail=detail_msg, error_code=custom_error_code - ) - elif error_code == "InvalidInstanceId": - detail_msg = ( - f"Invalid instance ID or FPGA not found. " - f"Ensure the instance is in a valid state. Original error: {str(e)}" - ) - custom_error_code = "INVALID_INSTANCE_ID" - raise EC2Error( - status_code=400, detail=detail_msg, error_code=custom_error_code - ) - else: - detail_msg = f"Failed to run model inference: {str(e)}" - custom_error_code = "COMMAND_EXECUTION_FAILED" - raise EC2Error( - status_code=400, detail=detail_msg, error_code=custom_error_code - ) - - def get_command_output(self, instance_id: str, command_id: str) -> str: - ssm_client = boto3.client("ssm") - try: - # Poll for command execution status - while True: - time.sleep(1) - output = ssm_client.get_command_invocation( - CommandId=command_id, - InstanceId=instance_id, - ) - if output["Status"] in ["Success", "Failed", "TimedOut", "Cancelled"]: - break - if output["Status"] != "Success": - raise EC2Error( - status_code=400, - detail=f"Command execution failed with status: {output['Status']}", - error_code="COMMAND_EXECUTION_FAILED", - ) - return output["StandardOutputContent"] - except ClientError as e: - error_code = e.response.get("Error", {}).get("Code", "UnknownClientError") - if error_code == "AccessDeniedException": - detail_msg = ( - f"Access denied when retrieving SSM command output. " - f"Check IAM policies. Original error: {str(e)}" - ) - custom_error_code = "ACCESS_DENIED" - raise EC2Error( - status_code=403, detail=detail_msg, error_code=custom_error_code - ) - elif error_code == "InvalidInstanceId": - detail_msg = ( - f"Invalid instance ID when retrieving command output. " - f"Ensure the instance is in a valid state. Original error: {str(e)}" - ) - custom_error_code = "INVALID_INSTANCE_ID" - raise EC2Error( - status_code=400, detail=detail_msg, error_code=custom_error_code - ) - else: - detail_msg = f"Failed to get command output: {str(e)}" - custom_error_code = "COMMAND_OUTPUT_FAILED" - raise EC2Error( - status_code=400, detail=detail_msg, error_code=custom_error_code - ) - def get_instance_public_ip(self, instance_id: str) -> str: try: response = self.ec2.describe_instances(InstanceIds=[instance_id]) diff --git a/docker-compose.yml b/docker-compose.yml index 79f6eb1..929a5a2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,10 @@ services: - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} + # Github + - GITHUB_CONFIG_ENGINE_URL=${GITHUB_CONFIG_ENGINE_URL} + - GITHUB_TOKEN=${GITHUB_TOKEN} + - GITHUB_REPO_SUBDIR=${GITHUB_REPO_SUBDIR} # Authentication - SECRET_KEY=${SECRET_KEY} - ALGORITHM=${ALGORITHM}