Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ jobs:
SECRET_KEY: ${{ secrets.SECRET_KEY }}
ALGORITHM: ${{ secrets.ALGORITHM }}
MODEL_S3_BUCKET: ${{ secrets.MODEL_S3_BUCKET }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_CONFIG_ENGINE_URL: ${{ secrets.GITHUB_CONFIG_ENGINE_URL }}
GITHUB_REPO_SUBDIR: ${{ secrets.GITHUB_REPO_SUBDIR }}
HLS_TRANSFORM_DIRECTORY: ${{ secrets.HLS_TRANSFORM_DIRECTORY }}
FPGA_DEV_AMI: ${{ secrets.FPGA_DEV_AMI }}
GPU_DEV_AMI: ${{ secrets.GPU_DEV_AMI }}
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
default_language_version:
python: python3.11
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
Expand Down
73 changes: 39 additions & 34 deletions app/api/v1/endpoints/machine_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
from models.machine import (
Machine,
MachineCreate,
ModelInferenceRequest,
ModelInferenceResponse,
ModelSelectionRequest,
)
from models.user import UserResponse
Expand Down Expand Up @@ -156,9 +154,12 @@ async def create_fpga_machine(
) -> Machine:
try:
user_data = generate_hlstransform_setup_script(
github_token=settings.GITHUB_TOKEN,
user_name=current_user.user_name,
s3_bucket=settings.MODEL_S3_BUCKET,
s3_directory=settings.HLS_TRANSFORM_DIRECTORY,
github_repo_url=settings.GITHUB_CONFIG_ENGINE_URL,
repo_subdir=settings.GITHUB_REPO_SUBDIR,
)

logger.debug(f"Generated user data: {user_data}")
Expand Down Expand Up @@ -235,38 +236,6 @@ async def terminate_machine(
)


@router.post(
"/machines/{machine_id}/inference",
response_model=ModelInferenceResponse,
tags=[fpga_tag],
)
async def run_model_inference(
machine_id: str,
request: ModelInferenceRequest,
current_user: Annotated[UserResponse, Depends(get_current_active_user)],
ec2_service: EC2Service = Depends(get_ec2_service),
):
try:
output = ec2_service.run_model_inference(
machine_id, request, current_user.user_id, current_user.user_name
)
return ModelInferenceResponse(output=output)
except EC2Error as e:
logger.error(f"An error occurred: {e}")
raise EC2Error(
status_code=e.status_code,
detail=f"An unexpected error occurred while running model inference: {e.detail}",
error_code=e.error_code,
)
except Exception as e:
logger.error(f"An internal server error occurred: {e}")
raise EC2Error(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="A fatal server error occurred while running model inference",
error_code="INTERNAL_SERVER_ERROR",
)


@router.post("/machine/gpu/pull_model", tags=[gpu_tag])
async def pull_gpu_model(
current_user: Annotated[UserResponse, Depends(get_current_active_user)],
Expand Down Expand Up @@ -388,6 +357,42 @@ async def get_gpu_inference_url(
)


@router.get("/machine/fpga/{machine_id}/inference_url", tags=[fpga_tag])
async def get_fpga_inference_url(
machine_id: str,
current_user: Annotated[UserResponse, Depends(get_current_active_user)],
ec2_service: EC2Service = Depends(get_ec2_service),
):
try:
isOwner = ec2_service.is_user_owner_of_instance(
user_id=current_user.user_id, instance_id=machine_id
)
if not isOwner:
raise EC2Error(
status_code=status.HTTP_403_FORBIDDEN,
detail="User not the owner of this machine",
error_code="FORBIDDEN",
)
public_ip = ec2_service.get_instance_public_ip(machine_id)
ollama_url = f"http://{public_ip}:8000/api/generate"

return {"inference_url": ollama_url}
except EC2Error as e:
logger.error(f"An error occurred: {e}")
raise EC2Error(
status_code=e.status_code,
detail=f"An EC2 Error occurred: {e.detail}",
error_code=e.error_code,
)
except Exception as e:
logger.error(f"An internal server error occurred: {e}")
raise EC2Error(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"A server error occurred while getting the inference URL: {e}",
error_code="INTERNAL_SERVER_ERROR",
)


@router.post(
"/machine/cpu",
response_model=Machine,
Expand Down
5 changes: 5 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class Settings(BaseSettings):
MODEL_S3_BUCKET: str
HLS_TRANSFORM_DIRECTORY: str

# Github Credentials
GITHUB_TOKEN: str
GITHUB_CONFIG_ENGINE_URL: str
GITHUB_REPO_SUBDIR: str

# Machine Management
FPGA_DEV_AMI: str
GPU_DEV_AMI: str
Expand Down
16 changes: 0 additions & 16 deletions app/models/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

from pydantic import BaseModel, Field
from typing import Optional


class Machine(BaseModel):
Expand All @@ -20,21 +19,6 @@ class MachineCreate(BaseModel):
machine_type: str


class ModelInferenceRequest(BaseModel):
prompt: str = Field(..., description="Input prompt for the model.")
temperature: Optional[float] = Field(0.8, description="Sampling temperature.")
max_tokens: Optional[int] = Field(
256, description="Maximum number of tokens to generate."
)
llm_model: Optional[str] = Field(
"llama2", description="Name of the model executable."
)


class ModelInferenceResponse(BaseModel):
output: str


class ModelSelectionRequest(BaseModel):
machine_id: str = Field(..., description="ID of the machine to select.")
model_name: str = Field(..., description="Name of the model to select.")
Expand Down
139 changes: 117 additions & 22 deletions app/scripts/ec2_setup.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,41 @@
from core.config import settings


def generate_hlstransform_setup_script(
user_name: str, s3_bucket: str, s3_directory: str
github_token: str,
user_name: str,
s3_bucket: str,
s3_directory: str,
github_repo_url: str,
repo_subdir: str,
) -> str:
return f"""#!/bin/bash

# Log output to file for debugging purposes
exec > /var/log/user-data.log 2>&1
set -x

# Update package list and install required packages
yum update -y # Use 'apt-get' if using Ubuntu or Debian
yum install -y aws-cli git # Install AWS CLI and Git, required for the script
##############################
# 1) System Updates & Basic Tools
##############################

# Install SSM Agent
echo "Installing SSM Agent..."
yum install -y https://s3.{settings.AWS_DEFAULT_REGION}.amazonaws.com/amazon-ssm-{settings.AWS_DEFAULT_REGION}/latest/linux_amd64/amazon-ssm-agent.rpm
sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo
sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo
sudo yum -y install centos-release-scl

# Start SSM Agent
systemctl enable amazon-ssm-agent
systemctl start amazon-ssm-agent
sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo
sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo
sudo yum -y install devtoolset-9

sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo
sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo
sudo yum -y install boost-devel

scl enable devtoolset-9 bash

##############################
# 2) Create User and Set Permissions
##############################

# Create '{user_name}' user with a home directory
echo "Creating '{user_name}' user with a home directory..."
Expand All @@ -32,6 +47,10 @@ def generate_hlstransform_setup_script(
# Set correct permissions on the directory
chmod 755 /home/{user_name}

##############################
# 3) Fetch Files from S3
##############################

# Static values for S3 bucket and directory
S3_BUCKET="{s3_bucket}"
S3_DIRECTORY="{s3_directory}"
Expand All @@ -47,6 +66,10 @@ def generate_hlstransform_setup_script(
chmod 755 /home/{user_name}/llama2
chmod 755 /home/{user_name}/forward.hw.awsxclbin

##############################
# 4) Setup AWS FPGA Environment
##############################

# Clone the AWS FPGA repository and set up the environment
echo "Cloning AWS FPGA repository and setting up the environment..."
export AWS_FPGA_REPO_DIR=/home/{user_name}/aws-fpga
Expand All @@ -55,6 +78,10 @@ def generate_hlstransform_setup_script(
source $AWS_FPGA_REPO_DIR/vitis_runtime_setup.sh
export LC_ALL=C

##############################
# 5) Ensure XRT MPD (Message Proxy Daemon) is running
##############################

# Start the Xilinx XRT Message Proxy Daemon (MPD) if not running
echo "Checking MPD service status..."
if systemctl is-active --quiet mpd; then
Expand All @@ -69,6 +96,82 @@ def generate_hlstransform_setup_script(
systemctl status mpd

echo "Setup complete. You can now run your application with './llama2' as the '{user_name}' user."

##############################
# 5.5) Python3.8 environment
##############################

# Install development tools and dependencies
echo "Installing development tools and dependencies..."
sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo
sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo
sudo yum groupinstall -y "Development Tools"

sudo sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sudo sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo
sudo sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo
sudo yum install -y gcc gcc-c++ make zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel openssl-devel xz xz-devel libffi-devel wget

# Download and build Python 3.8
cd /usr/src
echo "Downloading Python 3.8 source code..."
sudo wget https://www.python.org/ftp/python/3.8.16/Python-3.8.16.tgz
echo "Extracting Python 3.8..."
sudo tar xzf Python-3.8.16.tgz
cd Python-3.8.16
echo "Building Python 3.8..."
sudo ./configure --enable-optimizations
sudo make altinstall

# Verify Python installation
echo "Verifying Python 3.8 installation..."
python3.8 --version

# Create symbolic link for python3
echo "Creating symbolic link for python3..."
sudo ln -sf /usr/local/bin/python3.8 /usr/bin/python3
python3 --version

# Upgrade pip
echo "Upgrading pip..."
python3 -m ensurepip --upgrade
python3 -m pip install --upgrade pip

##############################
# 6) Clone GitHub Repo
##############################
echo "Cloning the GitHub repository '{github_repo_url}'..."
cd /home/{user_name}
sudo git clone https://{github_token}@{github_repo_url} apis
cd /home/{user_name}/apis/{repo_subdir}

# If repo has a requirements file:
if [ -f "requirements.txt" ]; then
sudo python3 -m pip install -r requirements.txt
else
# Install minimal dependencies if no requirements.txt
sudo python3 -m pip install fastapi uvicorn boto3
fi

##############################
# 7) Launch FastAPI Server
##############################
# We'll run `f1_server.py` in the background using Uvicorn on port 80
# Adjust the path to match where 'f1_server.py' actually resides in your repo

echo "Cloning config engine..."
cd /home/{user_name}/apis/{repo_subdir}
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
nohup python3 -m uvicorn f1_server:app --host 0.0.0.0 --port 8000 &

echo "---------------------------------------------------"
echo " Setup complete. FastAPI is running on port 80. "
echo " Use 'http://<public-EC2-IP>/inference' "
echo " to stream LLM model responses."
echo "---------------------------------------------------"

"""


Expand All @@ -82,14 +185,6 @@ def generate_ollama_setup_script(user_name: str) -> str:
yum update -y # Use 'apt-get' if using Ubuntu or Debian
yum install -y aws-cli git # Install AWS CLI and Git, required for the script

# Install SSM Agent
echo "→ Installing SSM Agent..."
yum install -y https://s3.{settings.AWS_DEFAULT_REGION}.amazonaws.com/amazon-ssm-{settings.AWS_DEFAULT_REGION}/latest/linux_amd64/amazon-ssm-agent.rpm

# Start SSM Agent
systemctl enable amazon-ssm-agent
systemctl start amazon-ssm-agent

# Create '{user_name}' user with a home directory
echo "→ Creating '{user_name}' user with a home directory..."
useradd -m -d /home/{user_name} {user_name}
Expand All @@ -110,7 +205,7 @@ def generate_ollama_setup_script(user_name: str) -> str:

# 2. Update the ollama.service
echo "→ Updating ollama.service..."
if sudo systemctl status ollama.service > /dev/null 2>&1; then
if sudo systemctl status ollama.service > /dev/null 2>&1; then
# Create the override directory
echo "→→ Create override dir..."
sudo mkdir -p /etc/systemd/system/ollama.service.d
Expand Down
Loading
Loading