From 795f693a43e29268358d93a9c019aaab4042d35f Mon Sep 17 00:00:00 2001 From: Alejandro Date: Mon, 16 Sep 2024 23:23:22 -0700 Subject: [PATCH] Add Inference Spec & CI --- .github/workflows/deploy_inference_apps.yaml | 84 ++++++++++++++ app_inference_spec.py | 113 +++++++++++++++++++ example_inference.json | 23 ++++ 3 files changed, 220 insertions(+) create mode 100644 .github/workflows/deploy_inference_apps.yaml create mode 100644 app_inference_spec.py create mode 100644 example_inference.json diff --git a/.github/workflows/deploy_inference_apps.yaml b/.github/workflows/deploy_inference_apps.yaml new file mode 100644 index 0000000..f973c55 --- /dev/null +++ b/.github/workflows/deploy_inference_apps.yaml @@ -0,0 +1,84 @@ +name: Deploy Inference Apps + +on: + push: + branches: + - main + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + deploy_dev: + name: Deploy Inference Apps (Development) + runs-on: ubuntu-latest + env: + ENV: dev + AWS_REGION: us-east-1 + AWS_CI_ROLE: ${{ secrets.AWS_INFER_CI_ROLE__DEV }} + steps: + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ env.AWS_REGION }} + role-to-assume: ${{ env.AWS_CI_ROLE}} + + - name: Deploy Ray Serve + shell: bash + run: | + RAY_CLUSTER_NAME=ray-cluster-$ENV + RAY_PRIVATE_IP=$(aws ec2 describe-instances --region $AWS_DEFAULT_REGION --filters "Name=tag:Name,Values=ray-cluster-$ENV-head" --query "Reservations[*].Instances[*].PrivateIpAddress" --output text) + RAY_BASTION_PUBLIC_IP=$(aws ec2 describe-instances --region $AWS_DEFAULT_REGION --filters "Name=tag:Name,Values=ray-cluster-$ENV-bastion" --query "Reservations[*].Instances[*].PublicIpAddress" --output text) + RAY_CLUSTER_KEY_PAIR_FILE=$RAY_CLUSTER_NAME + RAY_CLUSTER_SECRET_KEY_PAIR_NAME=$RAY_CLUSTER_NAME-key-pair-secret + + aws secretsmanager get-secret-value --region $AWS_REGION --secret-id $RAY_CLUSTER_SECRET_KEY_PAIR_NAME --query SecretString --output text > ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem + chmod 400 ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem + + echo "Deploying Ray Serve on $RAY_CLUSTER_NAME..." + if ssh -o StrictHostKeyChecking=no -i ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem ubuntu@$RAY_BASTION_PUBLIC_IP "source ~/.profile && bash bastion-ray-serve-deploy.sh $ENV" >/dev/null 2>&1; then + echo "Deployment succeeded." + else + echo "Deployment failed." + exit 1 + fi + + deploy_prod: + name: Deploy Inference Apps (Production) + runs-on: ubuntu-latest + depends-on: deploy_dev + env: + ENV: prod + AWS_REGION: us-east-1 + AWS_CI_ROLE: ${{ secrets.AWS_INFER_CI_ROLE__PROD }} + steps: + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ env.AWS_REGION }} + role-to-assume: ${{ env.AWS_CI_ROLE}} + + - name: Deploy Ray Serve + shell: bash + run: | + RAY_CLUSTER_NAME=ray-cluster-$ENV + RAY_PRIVATE_IP=$(aws ec2 describe-instances --region $AWS_DEFAULT_REGION --filters "Name=tag:Name,Values=ray-cluster-$ENV-head" --query "Reservations[*].Instances[*].PrivateIpAddress" --output text) + RAY_BASTION_PUBLIC_IP=$(aws ec2 describe-instances --region $AWS_DEFAULT_REGION --filters "Name=tag:Name,Values=ray-cluster-$ENV-bastion" --query "Reservations[*].Instances[*].PublicIpAddress" --output text) + RAY_CLUSTER_KEY_PAIR_FILE=$RAY_CLUSTER_NAME + RAY_CLUSTER_SECRET_KEY_PAIR_NAME=$RAY_CLUSTER_NAME-key-pair-secret + + aws secretsmanager get-secret-value --region $AWS_REGION --secret-id $RAY_CLUSTER_SECRET_KEY_PAIR_NAME --query SecretString --output text > ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem + chmod 400 ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem + + echo "Deploying Ray Serve on $RAY_CLUSTER_NAME..." + if ssh -o StrictHostKeyChecking=no -i ./${RAY_CLUSTER_KEY_PAIR_FILE}.pem ubuntu@$RAY_BASTION_PUBLIC_IP "source ~/.profile && bash bastion-ray-serve-deploy.sh $ENV" >/dev/null 2>&1; then + echo "Deployment succeeded." + else + echo "Deployment failed." + exit 1 + fi + \ No newline at end of file diff --git a/app_inference_spec.py b/app_inference_spec.py new file mode 100644 index 0000000..c88bdac --- /dev/null +++ b/app_inference_spec.py @@ -0,0 +1,113 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Tuple +from presidio_analyzer import AnalyzerEngine +from presidio_anonymizer import AnonymizerEngine +from models_host.base_inference_spec import BaseInferenceSpec + +app = FastAPI() + +class InferenceData(BaseModel): + name: str + shape: List[int] + data: List + datatype: str + +class InputRequest(BaseModel): + inputs: List[InferenceData] + +class OutputResponse(BaseModel): + modelname: str + modelversion: str + outputs: List[InferenceData] + + +class InferenceSpec(BaseInferenceSpec): + model_name = "presidio-pii" + + PII_ENTITIES_MAP = { + "pii": [ + "EMAIL_ADDRESS", + "PHONE_NUMBER", + "DOMAIN_NAME", + "IP_ADDRESS", + "DATE_TIME", + "LOCATION", + "PERSON", + "URL", + ], + "spi": [ + "CREDIT_CARD", + "CRYPTO", + "IBAN_CODE", + "NRP", + "MEDICAL_LICENSE", + "US_BANK_NUMBER", + "US_DRIVER_LICENSE", + "US_ITIN", + "US_PASSPORT", + "US_SSN", + ], + } + + def load(self): + self.pii_analyzer = AnalyzerEngine() + self.pii_anonymizer = AnonymizerEngine() + + def process_request(self, input_request: InputRequest) -> Tuple[Tuple, dict]: + text_vals = None + pii_entities = None + + for inp in input_request.inputs: + if inp.name == "text": + text_vals = inp.data + elif inp.name == "pii_entities": + pii_entities = inp.data + + if text_vals is None or pii_entities is None: + raise HTTPException(status_code=400, detail="Invalid input format") + + if isinstance(pii_entities, str): + entities_to_filter = self.PII_ENTITIES_MAP.get(pii_entities) + if entities_to_filter is None: + raise HTTPException(status_code=400, detail="Invalid PII entity type") + elif isinstance(pii_entities, list): + entities_to_filter = pii_entities + else: + raise HTTPException(status_code=400, detail="Invalid PII entity format") + + args = (text_vals, entities_to_filter) + kwargs = {} + + return args, kwargs + + def infer(self, text_vals: List[str], entities: List[str]) -> OutputResponse: + outputs = [] + for idx, text in enumerate(text_vals): + anonymized_text = self.get_anonymized_text(text, entities) + results = anonymized_text if anonymized_text != text else text + + outputs.append( + InferenceData( + name=f"result{idx}", + datatype="BYTES", + shape=[len(results)], + data=[results], + ) + ) + + return OutputResponse( + modelname=self.model_name, + modelversion="1", + outputs=outputs + ) + + def get_anonymized_text(self, text: str, entities: List[str]) -> str: + results = self.pii_analyzer.analyze(text=text, entities=entities, language="en") + anonymized_text = self.pii_anonymizer.anonymize( + text=text, analyzer_results=results + ).text + return anonymized_text + + + \ No newline at end of file diff --git a/example_inference.json b/example_inference.json new file mode 100644 index 0000000..70cb488 --- /dev/null +++ b/example_inference.json @@ -0,0 +1,23 @@ +{ + "inputs": [ + { + "name": "text", + "shape": [1], + "data": ["My phone number is 555-555-5555 and my email is ae@example.com"], + "datatype": "BYTES" + }, + { + "name": "pii_entities", + "shape": [6], + "data": [ + "PHONE_NUMBER", + "EMAIL_ADDRESS", + "DATE_TIME", + "LOCATION", + "PERSON", + "URL" + ], + "datatype": "BYTES" + } + ] +} \ No newline at end of file