diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..1d8288e --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,497 @@ +# ============================================================================ +# GitLab CI/CD Pipeline for Building and Pushing Container Images to AWS ECR +# ============================================================================ +# +# This pipeline automatically builds Docker containers and pushes them to AWS ECR. +# It features intelligent change detection, parallel builds, and secure OIDC authentication. +# +# Features: +# - Automatic service discovery from src/agentic_platform/agent/ directories +# - Intelligent change detection (builds only what changed) +# - Parallel builds for faster execution +# - Secure AWS authentication via OIDC (no credentials stored) +# - Automatic ECR repository creation +# +# Setup Required: +# 1. Run: ./script/setup_aws_oidc.sh YOUR_GITLAB_PROJECT_PATH +# 2. Add GitLab CI/CD variables: AWS_ROLE_ARN and AWS_REGION +# +# ============================================================================ + +# Pipeline Trigger Configuration +workflow: + rules: + # Automatic triggers + - if: '$CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_BRANCH == "main"' + when: always + - if: '$CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_BRANCH == "develop"' + when: always + - if: '$CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_TAG =~ /^v.*/' + when: always + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + when: always + + # Manual trigger with optional MANUAL_SERVICES parameter + # Options: "all" (build all), "changed" (default), or "service1,service2" (specific services) + - if: '$CI_PIPELINE_SOURCE == "web"' + when: always + variables: + MANUAL_SERVICES: "${MANUAL_SERVICES:-changed}" + + # Do not run for any other pipeline sources + - when: never + +# ============================================================================ +# Pipeline Configuration +# ============================================================================ + +# Pipeline stages execute in this order +stages: + - detect-changes # Discover services and determine what to build + - build # Build and push Docker images in parallel + - summary # Display build results + +# Global variables (can be overridden in GitLab CI/CD settings) +variables: + AWS_REGION: "us-east-1" + +# Default configuration for all jobs +default: + image: ubuntu:22.04 + before_script: + - apt-get update -qq + - apt-get install -y -qq git jq curl unzip + - | + # Install AWS CLI + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + unzip -q awscliv2.zip + ./aws/install + rm -rf aws awscliv2.zip + +# ============================================================================ +# Stage 1: Detect Changes +# ============================================================================ +# This job discovers services, detects changes, and determines what to build + +detect-changes: + stage: detect-changes + script: + # Authenticate with AWS using OIDC + - echo "Authenticating with AWS using OIDC..." + - | + # Use GitLab's OIDC token to authenticate with AWS + CREDENTIALS=$(aws sts assume-role-with-web-identity \ + --role-arn ${AWS_ROLE_ARN} \ + --role-session-name "gitlab-ci-${CI_PROJECT_NAME}-${CI_PIPELINE_ID}" \ + --web-identity-token ${CI_JOB_JWT_V2} \ + --duration-seconds 3600 \ + --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \ + --output text) + + export AWS_ACCESS_KEY_ID=$(echo $CREDENTIALS | awk '{print $1}') + export AWS_SECRET_ACCESS_KEY=$(echo $CREDENTIALS | awk '{print $2}') + export AWS_SESSION_TOKEN=$(echo $CREDENTIALS | awk '{print $3}') + + aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID + aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY + aws configure set aws_session_token $AWS_SESSION_TOKEN + aws configure set region ${AWS_REGION} + + echo "AWS authentication successful" + + # Discover services by scanning for Dockerfiles + - echo "Discovering services from src/agentic_platform/agent/ directory..." + - | + ALL_SERVICES=() + + # Scan for services with Dockerfiles + if [ -d "src/agentic_platform/agent" ]; then + for service_dir in src/agentic_platform/agent/*/; do + if [ -f "${service_dir}Dockerfile" ]; then + service_name=$(basename "$service_dir") + ALL_SERVICES+=("$service_name") + fi + done + fi + + # Fallback: check docker/ directory + if [ -d "docker" ]; then + for service_dir in docker/*/; do + if [ -f "${service_dir}Dockerfile" ]; then + service_name=$(basename "$service_dir") + ALL_SERVICES+=("$service_name") + fi + done + fi + + # Sort for consistent output + IFS=$'\n' ALL_SERVICES=($(sort <<<"${ALL_SERVICES[*]}")) + unset IFS + + echo "Discovered ${#ALL_SERVICES[@]} services:" + for service in "${ALL_SERVICES[@]}"; do + echo " - $service" + done + + if [ ${#ALL_SERVICES[@]} -eq 0 ]; then + echo "No services found with Dockerfiles." + fi + + # Handle manual trigger parameters + - | + SERVICES_TO_BUILD=() + + if [ -n "${MANUAL_SERVICES:-}" ]; then + echo "Manual trigger detected with parameter: ${MANUAL_SERVICES}" + + if [ "${MANUAL_SERVICES}" = "all" ]; then + echo "Building all discovered services" + SERVICES_TO_BUILD=("${ALL_SERVICES[@]}") + elif [ "${MANUAL_SERVICES}" = "changed" ]; then + echo "Using change detection for manual trigger" + else + echo "Parsing comma-separated service list" + IFS=',' read -ra MANUAL_SERVICE_LIST <<< "${MANUAL_SERVICES}" + for service in "${MANUAL_SERVICE_LIST[@]}"; do + service=$(echo "$service" | xargs) + if [[ " ${ALL_SERVICES[@]} " =~ " ${service} " ]]; then + SERVICES_TO_BUILD+=("$service") + else + echo "Warning: Service '$service' not found, skipping" + fi + done + fi + fi + + # Detect changes to determine what to build + - | + if [ ${#SERVICES_TO_BUILD[@]} -eq 0 ]; then + echo "Running change detection..." + + # Determine what changed based on trigger type + if [ -n "${CI_MERGE_REQUEST_IID:-}" ]; then + echo "Trigger type: Merge Request" + BASE_SHA="${CI_MERGE_REQUEST_TARGET_BRANCH_SHA}" + HEAD_SHA="${CI_COMMIT_SHA}" + elif [ "${CI_COMMIT_BEFORE_SHA}" = "0000000000000000000000000000000000000000" ]; then + echo "Trigger type: New branch (building all services)" + BASE_SHA="" + HEAD_SHA="${CI_COMMIT_SHA}" + else + echo "Trigger type: Push" + BASE_SHA="${CI_COMMIT_BEFORE_SHA}" + HEAD_SHA="${CI_COMMIT_SHA}" + fi + + echo "Comparing commits: ${BASE_SHA:-} -> ${HEAD_SHA}" + + # Get list of changed files + if [ -n "${BASE_SHA}" ]; then + CHANGED_FILES=$(git diff --name-only ${BASE_SHA} ${HEAD_SHA} || echo "") + else + CHANGED_FILES=$(git ls-files || echo "") + fi + + echo "Changed files:" + echo "$CHANGED_FILES" + + # Check if core dependencies changed (rebuild all services if true) + CORE_CHANGED=false + CORE_PATTERNS=( + "pyproject.toml" + "requirements.txt" + "src/agentic_platform/core/" + "package.json" + "yarn.lock" + ) + + for pattern in "${CORE_PATTERNS[@]}"; do + if echo "$CHANGED_FILES" | grep -q "$pattern"; then + echo "Core dependency change detected: $pattern" + CORE_CHANGED=true + break + fi + done + + if [ "$CORE_CHANGED" = true ]; then + echo "Core dependencies changed - marking all services for rebuild" + SERVICES_TO_BUILD=("${ALL_SERVICES[@]}") + else + # Check for service-specific changes + for service in "${ALL_SERVICES[@]}"; do + service_underscore=$(echo "$service" | tr '-' '_') + service_hyphen=$(echo "$service" | tr '_' '-') + + if echo "$CHANGED_FILES" | grep -q "docker/${service}/\|docker/${service_underscore}/\|docker/${service_hyphen}/"; then + echo "Changes detected in docker/${service}/" + SERVICES_TO_BUILD+=("$service") + elif echo "$CHANGED_FILES" | grep -q "src/agentic_platform/service/${service}/\|src/agentic_platform/service/${service_underscore}/\|src/agentic_platform/service/${service_hyphen}/"; then + echo "Changes detected in src/agentic_platform/service/${service}/" + SERVICES_TO_BUILD+=("$service") + elif echo "$CHANGED_FILES" | grep -q "src/agentic_platform/agent/${service}/\|src/agentic_platform/agent/${service_underscore}/\|src/agentic_platform/agent/${service_hyphen}/"; then + echo "Changes detected in src/agentic_platform/agent/${service}/" + SERVICES_TO_BUILD+=("$service") + fi + done + fi + fi + + # Check for missing ECR repositories + - | + echo "Validating ECR repositories..." + MISSING_REPO_SERVICES=() + + for service in "${ALL_SERVICES[@]}"; do + repo_name="agentic-platform-${service}" + + if aws ecr describe-repositories --repository-names "$repo_name" --region ${AWS_REGION} >/dev/null 2>&1; then + echo "ECR repository exists: $repo_name" + else + echo "ECR repository missing: $repo_name" + MISSING_REPO_SERVICES+=("$service") + fi + done + + if [ ${#MISSING_REPO_SERVICES[@]} -gt 0 ]; then + echo "Services with missing ECR repositories:" + for service in "${MISSING_REPO_SERVICES[@]}"; do + echo " - $service" + done + fi + + # Generate final build list + - | + echo "Generating final build list..." + + # Combine changed services and services with missing repos + COMBINED_SERVICES=("${SERVICES_TO_BUILD[@]}" "${MISSING_REPO_SERVICES[@]}") + FINAL_SERVICES=($(printf '%s\n' "${COMBINED_SERVICES[@]}" | sort -u)) + + echo "Final build list (${#FINAL_SERVICES[@]} services):" + for service in "${FINAL_SERVICES[@]}"; do + reasons=() + if [[ " ${SERVICES_TO_BUILD[@]} " =~ " ${service} " ]]; then + reasons+=("code changes") + fi + if [[ " ${MISSING_REPO_SERVICES[@]} " =~ " ${service} " ]]; then + reasons+=("missing ECR repository") + fi + echo " - $service (${reasons[*]})" + done + + # Create JSON array for next stage + if [ ${#FINAL_SERVICES[@]} -gt 0 ]; then + SERVICES_JSON=$(printf '%s\n' "${FINAL_SERVICES[@]}" | jq -R . | jq -s -c .) + else + SERVICES_JSON="[]" + fi + + echo "SERVICES_TO_BUILD=$SERVICES_JSON" > build.env + echo "BUILD_COUNT=${#FINAL_SERVICES[@]}" >> build.env + + echo "Build configuration saved to build.env" + cat build.env + + artifacts: + reports: + dotenv: build.env + paths: + - build.env + expire_in: 1 hour + + id_tokens: + CI_JOB_JWT_V2: + aud: https://gitlab.com + +# ============================================================================ +# Stage 2: Build +# ============================================================================ +# This job builds Docker images in parallel and pushes them to AWS ECR + +build: + stage: build + image: docker:24-dind + services: + - docker:24-dind + variables: + DOCKER_HOST: tcp://docker:2376 + DOCKER_TLS_CERTDIR: "/certs" + DOCKER_TLS_VERIFY: 1 + DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client" + before_script: + - apk add --no-cache python3 py3-pip bash jq curl aws-cli + needs: + - job: detect-changes + artifacts: true + id_tokens: + CI_JOB_JWT_V2: + aud: https://gitlab.com + rules: + - if: '$BUILD_COUNT == "0"' + when: never + - when: on_success + script: + # Build all services in parallel + - | + cat > /tmp/build.sh << 'EOFSCRIPT' + #!/bin/bash + set -e + + echo "Authenticating with AWS using OIDC..." + + CREDENTIALS=$(aws sts assume-role-with-web-identity \ + --role-arn ${AWS_ROLE_ARN} \ + --role-session-name "gitlab-ci-${CI_PROJECT_NAME}-${CI_PIPELINE_ID}" \ + --web-identity-token ${CI_JOB_JWT_V2} \ + --duration-seconds 3600 \ + --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \ + --output text) + + export AWS_ACCESS_KEY_ID=$(echo $CREDENTIALS | awk '{print $1}') + export AWS_SECRET_ACCESS_KEY=$(echo $CREDENTIALS | awk '{print $2}') + export AWS_SESSION_TOKEN=$(echo $CREDENTIALS | awk '{print $3}') + + aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID + aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY + aws configure set aws_session_token $AWS_SESSION_TOKEN + aws configure set region ${AWS_REGION} + + echo "AWS authentication successful" + + echo "Parsing services to build..." + echo "SERVICES_TO_BUILD: $SERVICES_TO_BUILD" + + if [ "$SERVICES_TO_BUILD" = "[]" ] || [ -z "$SERVICES_TO_BUILD" ]; then + echo "No services to build. Exiting successfully." + exit 0 + fi + + SERVICES=($(echo "$SERVICES_TO_BUILD" | jq -r '.[]')) + echo "Services to build in parallel: ${SERVICES[@]}" + + declare -a BUILD_PIDS + declare -a BUILD_SERVICES + + # Function to build a single service + build_service() { + local service=$1 + echo "=========================================" + echo "Building service: $service" + echo "=========================================" + + chmod +x deploy/build-container-gitlab.sh + ./deploy/build-container-gitlab.sh "$service" + + local exit_code=$? + if [ $exit_code -eq 0 ]; then + echo "โœ“ Build completed successfully for: $service" + else + echo "โœ— Build failed for: $service (exit code: $exit_code)" + fi + return $exit_code + } + + # Launch parallel builds + for service in "${SERVICES[@]}"; do + echo "Launching parallel build for: $service" + (build_service "$service"; exit $?) & + BUILD_PIDS+=($!) + BUILD_SERVICES+=("$service") + done + + # Wait for all builds and collect results + echo "" + echo "Waiting for all parallel builds to complete..." + FAILED_BUILDS=() + SUCCESS_BUILDS=() + + for i in "${!BUILD_PIDS[@]}"; do + pid=${BUILD_PIDS[$i]} + service=${BUILD_SERVICES[$i]} + + if wait $pid; then + echo "โœ“ Build succeeded: $service" + SUCCESS_BUILDS+=("$service") + else + echo "โœ— Build failed: $service" + FAILED_BUILDS+=("$service") + fi + done + + # Report results + echo "" + echo "=========================================" + echo "Build Results Summary" + echo "=========================================" + echo "Total services: ${#SERVICES[@]}" + echo "Successful: ${#SUCCESS_BUILDS[@]}" + echo "Failed: ${#FAILED_BUILDS[@]}" + + if [ ${#FAILED_BUILDS[@]} -gt 0 ]; then + echo "" + echo "Failed services:" + for service in "${FAILED_BUILDS[@]}"; do + echo " - $service" + done + exit 1 + else + echo "" + echo "All builds completed successfully!" + exit 0 + fi + EOFSCRIPT + - chmod +x /tmp/build.sh + - /bin/bash /tmp/build.sh + +# ============================================================================ +# Stage 3: Summary +# ============================================================================ +# This job displays the final build results + +summary: + stage: summary + needs: + - job: detect-changes + artifacts: true + - job: build + optional: true + when: always + script: + - | + #!/bin/bash + set -e + + echo "=========================================" + echo "Pipeline Build Summary" + echo "=========================================" + echo "" + + if [ -f "build.env" ]; then + source build.env + + if [ "$BUILD_COUNT" = "0" ] || [ "$SERVICES_TO_BUILD" = "[]" ]; then + echo "๐Ÿ“‹ Result: No services were built" + echo "" + echo "Reason: No code changes detected and all ECR repositories exist." + echo "" + echo "The pipeline completed successfully with no build actions required." + else + echo "๐Ÿ“ฆ Services Built: $BUILD_COUNT" + echo "" + echo "The following services were processed:" + printf '%s\n' "$SERVICES_TO_BUILD" | jq -r '.[]' | while read -r service; do + echo " โœ“ $service" + done + echo "" + echo "All container images have been built and pushed to AWS ECR." + fi + else + echo "โš ๏ธ Warning: build.env artifact not found" + echo "" + echo "Unable to generate detailed summary." + fi + + echo "" + echo "=========================================" + echo "Pipeline execution completed" + echo "=========================================" diff --git a/GITLAB_SETUP_GUIDE.md b/GITLAB_SETUP_GUIDE.md new file mode 100644 index 0000000..4f904dc --- /dev/null +++ b/GITLAB_SETUP_GUIDE.md @@ -0,0 +1,178 @@ +# GitLab CI/CD Pipeline Setup Guide + +This guide will help you set up the GitLab CI/CD pipeline for building and pushing Docker containers to AWS ECR. + +## Files Included + +1. `.gitlab-ci.yml` - GitLab CI/CD pipeline configuration +2. `deploy/build-container-gitlab.sh` - Docker build script +3. `script/setup_aws_oidc.sh` - AWS OIDC setup script +4. `docs/TESTING_GUIDE.md` - Detailed testing guide + +## Prerequisites + +- GitLab repository with your code +- AWS account with permissions to create IAM roles and ECR repositories +- AWS CLI installed and configured locally + +## Setup Steps + +### 1. Add Files to Your Repository + +Copy these files to your GitLab repository: +- `.gitlab-ci.yml` (root directory) +- `deploy/build-container-gitlab.sh` +- `script/setup_aws_oidc.sh` + +Commit and push to GitLab: +```bash +git add .gitlab-ci.yml deploy/build-container-gitlab.sh script/setup_aws_oidc.sh +git commit -m "Add GitLab CI/CD pipeline for container builds" +git push origin main +``` + +### 2. Configure AWS OIDC Trust + +Run the setup script locally (requires AWS CLI configured): + +```bash +chmod +x script/setup_aws_oidc.sh +./script/setup_aws_oidc.sh YOUR_GITLAB_PROJECT_PATH +``` + +**Example:** +```bash +./script/setup_aws_oidc.sh mycompany/myproject +``` + +The script will output an `AWS_ROLE_ARN` - **save this value**. + +### 3. Configure GitLab CI/CD Variables + +Go to your GitLab project: +1. Navigate to: **Settings โ†’ CI/CD โ†’ Variables** +2. Click **"Add variable"** +3. Add these two variables: + +**Variable 1:** +- Key: `AWS_ROLE_ARN` +- Value: `arn:aws:iam::YOUR_ACCOUNT_ID:role/GitLabCIRole` (from step 2) +- Type: Variable +- Protect variable: โ˜ (unchecked) +- Mask variable: โ˜‘ (checked) + +**Variable 2:** +- Key: `AWS_REGION` +- Value: `us-east-1` (or your preferred region) +- Type: Variable +- Protect variable: โ˜ (unchecked) +- Mask variable: โ˜ (unchecked) + +### 4. Test the Pipeline + +Push a commit to trigger the pipeline: +```bash +git commit --allow-empty -m "Test pipeline" +git push origin main +``` + +Or manually trigger it: +1. Go to: **CI/CD โ†’ Pipelines** +2. Click **"Run pipeline"** +3. Select branch: `main` +4. Click **"Run pipeline"** + +## How It Works + +### Automatic Triggers + +The pipeline runs automatically on: +- Push to `main` branch +- Push to `develop` branch +- Push of version tags (v*) +- Merge requests + +### Service Discovery + +The pipeline automatically discovers services by scanning: +- `src/agentic_platform/agent/*/Dockerfile` +- `docker/*/Dockerfile` (fallback) + +### Change Detection + +The pipeline intelligently builds only changed services: +- **Core changes**: Rebuilds all services if core dependencies change +- **Service changes**: Rebuilds only affected services +- **New services**: Automatically builds services with missing ECR repositories + +### Parallel Builds + +All services build in parallel for faster execution. + +### ECR Repository Management + +The pipeline automatically: +- Creates ECR repositories if they don't exist +- Uses naming pattern: `agentic-platform-{service-name}` +- Tags images with commit SHA and `latest` + +## Pipeline Stages + +1. **detect-changes**: Discovers services and determines what to build +2. **build**: Builds and pushes Docker images to ECR in parallel +3. **summary**: Shows build results + +## Manual Triggering Options + +When manually triggering the pipeline, you can set the `MANUAL_SERVICES` variable: + +- `all` - Build all services +- `changed` - Build only changed services (default) +- `service1,service2` - Build specific services (comma-separated) + +## Monitoring + +View pipeline execution: +- Go to: **CI/CD โ†’ Pipelines** +- Click on a pipeline to see detailed logs +- Each service builds in parallel with its own log section + +## Troubleshooting + +### Pipeline fails with "Incorrect token audience" +- Verify the OIDC provider audience in AWS IAM is set to `https://gitlab.com` +- Re-run the setup script: `./script/setup_aws_oidc.sh YOUR_PROJECT_PATH` + +### Pipeline fails with "Access Denied" +- Verify `AWS_ROLE_ARN` variable is set correctly in GitLab +- Check the IAM role trust policy includes your GitLab project path + +### No services discovered +- Verify Dockerfiles exist in `src/agentic_platform/agent/{service}/Dockerfile` +- Check the detect-changes job logs for discovery output + +### Docker build fails +- Verify the Dockerfile paths are correct +- Check that all source files referenced in Dockerfile exist +- Review the build job logs for specific errors + +## AWS Resources Created + +The setup script creates: +1. **OIDC Identity Provider**: `gitlab.com` +2. **IAM Role**: `GitLabCIRole` (or custom name) +3. **IAM Policy**: Permissions for ECR operations +4. **ECR Repositories**: Created automatically during first build + +## Security Notes + +- No AWS credentials are stored in GitLab +- Uses OIDC for secure, temporary authentication +- IAM role is scoped to your specific GitLab project +- ECR permissions are limited to `agentic-platform-*` repositories + +## Support + +For detailed testing instructions, see `docs/TESTING_GUIDE.md` + +For issues or questions, contact your DevOps team. diff --git a/bootstrap/README-gitlab.md b/bootstrap/README-gitlab.md new file mode 100644 index 0000000..888d1da --- /dev/null +++ b/bootstrap/README-gitlab.md @@ -0,0 +1,210 @@ +# GitLab CI/CD OIDC Integration for AWS ECR + +This CloudFormation template enables secure, credential-free authentication between GitLab CI/CD pipelines and AWS using OpenID Connect (OIDC). It allows your GitLab pipelines to push container images to Amazon ECR without storing long-lived AWS credentials. + +## ๐ŸŽฏ What This Template Creates + +- **OIDC Identity Provider**: Establishes trust between AWS and GitLab.com +- **IAM Role**: Provides temporary credentials to GitLab CI/CD pipelines +- **IAM Policy**: Grants permissions to create ECR repositories and push container images + +## ๐Ÿ“‹ Prerequisites + +- AWS account with permissions to create IAM resources +- GitLab project hosted on gitlab.com +- AWS CLI installed and configured (for deployment) + +## ๐Ÿš€ Deployment + +### Step 1: Gather Required Information + +From your GitLab project URL `https://gitlab.com/GROUP/PROJECT`: +- **GitLabGroup**: The group or namespace (e.g., `my-company`) +- **GitLabProject**: The project name (e.g., `my-app`) + +### Step 2: Deploy the CloudFormation Stack + +```bash +aws cloudformation create-stack \ + --stack-name gitlab-ci-oidc \ + --template-body file://gitlab-bootstrap.yaml \ + --parameters \ + ParameterKey=GitLabGroup,ParameterValue=YOUR_GROUP \ + ParameterKey=GitLabProject,ParameterValue=YOUR_PROJECT \ + ParameterKey=ECRRepositoryPrefix,ParameterValue=my-prefix \ + --capabilities CAPABILITY_NAMED_IAM \ + --region us-east-1 +``` + +**Parameters:** +- `GitLabGroup` (Required): Your GitLab group/namespace +- `GitLabProject` (Required): Your GitLab project name +- `ECRRepositoryPrefix` (Optional): Prefix for ECR repositories. Leave empty to use project name. + +### Step 3: Get Stack Outputs + +```bash +aws cloudformation describe-stacks \ + --stack-name gitlab-ci-oidc \ + --query 'Stacks[0].Outputs' \ + --region us-east-1 +``` + +Copy the `GitLabCIRoleArn` value. + +## โš™๏ธ GitLab Configuration + +### Add CI/CD Variables + +1. Go to your GitLab project: **Settings > CI/CD > Variables** +2. Add these variables: + +| Variable | Value | Protected | Masked | +|----------|-------|-----------|--------| +| `AWS_ROLE_ARN` | (ARN from stack outputs) | โœ… | โŒ | +| `AWS_REGION` | Your AWS region (e.g., `us-east-1`) | โŒ | โŒ | +| `AWS_ACCOUNT_ID` | Your 12-digit AWS account ID | โŒ | โŒ | + +### Create `.gitlab-ci.yml` + +Add this to your GitLab project: + +```yaml +image: docker:latest + +services: + - docker:dind + +variables: + AWS_DEFAULT_REGION: $AWS_REGION + ECR_REGISTRY: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com + IMAGE_NAME: my-prefix/my-app + +before_script: + - apk add --no-cache aws-cli + +stages: + - build + - push + +build-image: + stage: build + script: + - docker build -t $IMAGE_NAME:$CI_COMMIT_SHORT_SHA . + - docker tag $IMAGE_NAME:$CI_COMMIT_SHORT_SHA $IMAGE_NAME:latest + +push-to-ecr: + stage: push + id_tokens: + GITLAB_OIDC_TOKEN: + aud: sts.amazonaws.com + script: + # Assume the IAM role using OIDC token + - | + export $(printf "AWS_ACCESS_KEY_ID=%s AWS_SECRET_ACCESS_KEY=%s AWS_SESSION_TOKEN=%s" \ + $(aws sts assume-role-with-web-identity \ + --role-arn ${AWS_ROLE_ARN} \ + --role-session-name "gitlab-${CI_PROJECT_ID}-${CI_PIPELINE_ID}" \ + --web-identity-token ${GITLAB_OIDC_TOKEN} \ + --duration-seconds 3600 \ + --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \ + --output text)) + + # Login to ECR + - aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $ECR_REGISTRY + + # Create repository if it doesn't exist + - aws ecr describe-repositories --repository-names $IMAGE_NAME --region $AWS_REGION || aws ecr create-repository --repository-name $IMAGE_NAME --region $AWS_REGION + + # Tag and push + - docker tag $IMAGE_NAME:latest $ECR_REGISTRY/$IMAGE_NAME:$CI_COMMIT_SHORT_SHA + - docker tag $IMAGE_NAME:latest $ECR_REGISTRY/$IMAGE_NAME:latest + - docker push $ECR_REGISTRY/$IMAGE_NAME:$CI_COMMIT_SHORT_SHA + - docker push $ECR_REGISTRY/$IMAGE_NAME:latest + only: + - main + - develop + - tags +``` + +## ๐Ÿ”’ Security Features + +### Branch Restrictions +The IAM role can only be assumed by pipelines running on: +- `main` branch +- `develop` branch +- Tags (e.g., `v1.0.0`) +- Merge request branches + +### ECR Repository Scoping +Permissions are limited to ECR repositories matching the configured prefix: +- With prefix `my-prefix`: Can access `my-prefix/*` repositories +- Without prefix: Can access `{project-name}/*` repositories + +### Temporary Credentials +- No long-lived AWS credentials stored in GitLab +- Credentials expire after 1 hour +- Each pipeline run gets unique credentials + +## ๐Ÿงช Testing + +### Quick Test + +Create a simple test pipeline: + +```yaml +test-aws-connection: + stage: test + id_tokens: + GITLAB_OIDC_TOKEN: + aud: sts.amazonaws.com + script: + - apk add --no-cache aws-cli + - | + export $(printf "AWS_ACCESS_KEY_ID=%s AWS_SECRET_ACCESS_KEY=%s AWS_SESSION_TOKEN=%s" \ + $(aws sts assume-role-with-web-identity \ + --role-arn ${AWS_ROLE_ARN} \ + --role-session-name "gitlab-test" \ + --web-identity-token ${GITLAB_OIDC_TOKEN} \ + --duration-seconds 3600 \ + --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \ + --output text)) + - echo "โœ… Successfully authenticated to AWS!" + - aws sts get-caller-identity + - aws ecr describe-repositories --region $AWS_REGION || echo "No repositories yet" +``` + +## ๐Ÿ”ง Troubleshooting + +### Error: "Couldn't retrieve verification key" +- Ensure you're using gitlab.com (not self-hosted GitLab) +- Verify the OIDC provider was created successfully in IAM + +### Error: "Not authorized to perform sts:AssumeRoleWithWebIdentity" +- Check that your GitLab project path matches the parameters +- Verify you're running on an allowed branch (main, develop, tags) +- Ensure the `id_tokens` section is in your `.gitlab-ci.yml` + +### Error: "Access Denied" when pushing to ECR +- Verify the repository name matches the configured prefix +- Check that the IAM policy is attached to the role + +## ๐Ÿ“š Additional Resources + +- [GitLab OIDC Documentation](https://docs.gitlab.com/ee/ci/cloud_services/) +- [AWS IAM OIDC Documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html) +- [Amazon ECR Documentation](https://docs.aws.amazon.com/ecr/) + +## ๐Ÿ—‘๏ธ Cleanup + +To remove all resources: + +```bash +aws cloudformation delete-stack \ + --stack-name gitlab-ci-oidc \ + --region us-east-1 +``` + +## ๐Ÿ“ License + +This template is provided as-is for use with AWS and GitLab integration. diff --git a/bootstrap/gitlab-bootstrap.yaml b/bootstrap/gitlab-bootstrap.yaml new file mode 100644 index 0000000..2155683 --- /dev/null +++ b/bootstrap/gitlab-bootstrap.yaml @@ -0,0 +1,166 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + GitLab CI/CD OIDC Integration for AWS ECR + + This template creates an OIDC identity provider and IAM role that allows GitLab CI/CD + pipelines to authenticate with AWS and push container images to Amazon ECR without + storing long-lived AWS credentials. + + Resources Created: + - OIDC Identity Provider for gitlab.com + - IAM Role with web identity federation + - IAM Policy with ECR push permissions + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: GitLab Project Configuration + Parameters: + - GitLabGroup + - GitLabProject + - Label: + default: ECR Configuration + Parameters: + - ECRRepositoryPrefix + ParameterLabels: + GitLabGroup: + default: GitLab Group/Namespace + GitLabProject: + default: GitLab Project Name + ECRRepositoryPrefix: + default: ECR Repository Prefix + +Parameters: + GitLabGroup: + Type: String + Description: 'GitLab group or namespace name (e.g., "my-company" from gitlab.com/my-company/my-project)' + AllowedPattern: '^[a-zA-Z0-9_.-]+$' + ConstraintDescription: 'Must contain only alphanumeric characters, hyphens, underscores, and periods' + + GitLabProject: + Type: String + Description: 'GitLab project name (e.g., "my-project" from gitlab.com/my-company/my-project)' + AllowedPattern: '^[a-zA-Z0-9_.-]+$' + ConstraintDescription: 'Must contain only alphanumeric characters, hyphens, underscores, and periods' + + ECRRepositoryPrefix: + Type: String + Description: 'Prefix for ECR repository names to scope permissions. Leave empty to use project name as prefix.' + Default: '' + AllowedPattern: '^[a-z0-9][a-z0-9_.-]*$|^$' + ConstraintDescription: 'Must start with lowercase letter or number, and contain only lowercase letters, numbers, hyphens, underscores, and periods' +Conditions: + HasRepositoryPrefix: !Not [!Equals [!Ref ECRRepositoryPrefix, '']] + +Resources: + GitLabOIDCProvider: + Type: AWS::IAM::OIDCProvider + Properties: + Url: https://gitlab.com + ClientIdList: + - sts.amazonaws.com + ThumbprintList: + - 7e04de896a3e666be93d4e7d5781e61c4f9f5fb7 + - 1c58a3a8518e8759bf075b76b750d4f2df264fcd + + ECRPushPolicy: + Type: AWS::IAM::ManagedPolicy + Metadata: + cfn_nag: + rules_to_suppress: + - id: W13 + reason: 'ecr:GetAuthorizationToken requires wildcard resource as per AWS ECR API requirements' + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: ECRGetAuthorizationToken + Effect: Allow + Action: + - ecr:GetAuthorizationToken + Resource: '*' + - Sid: ECRPushPermissions + Effect: Allow + Action: + - ecr:CreateRepository + - ecr:DescribeRepositories + - ecr:BatchCheckLayerAvailability + - ecr:GetDownloadUrlForLayer + - ecr:BatchGetImage + - ecr:InitiateLayerUpload + - ecr:UploadLayerPart + - ecr:CompleteLayerUpload + - ecr:PutImage + Resource: !If + - HasRepositoryPrefix + - !Sub 'arn:aws:ecr:${AWS::Region}:${AWS::AccountId}:repository/${ECRRepositoryPrefix}*' + - !Sub 'arn:aws:ecr:${AWS::Region}:${AWS::AccountId}:repository/${GitLabProject}*' + + GitLabCIRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub '${AWS::StackName}-GitLabCIRole' + Description: !Sub 'IAM role for GitLab CI/CD pipelines from ${GitLabGroup}/${GitLabProject}' + MaxSessionDuration: 3600 + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Federated: !GetAtt GitLabOIDCProvider.Arn + Action: sts:AssumeRoleWithWebIdentity + Condition: + StringEquals: + gitlab.com:aud: sts.amazonaws.com + StringLike: + gitlab.com:sub: + - !Sub 'project_path:${GitLabGroup}/${GitLabProject}:ref_type:branch:ref:main' + - !Sub 'project_path:${GitLabGroup}/${GitLabProject}:ref_type:branch:ref:develop' + - !Sub 'project_path:${GitLabGroup}/${GitLabProject}:ref_type:tag:ref:*' + - !Sub 'project_path:${GitLabGroup}/${GitLabProject}:ref_type:branch:ref:*' + ManagedPolicyArns: + - !Ref ECRPushPolicy + Tags: + - Key: ManagedBy + Value: CloudFormation + - Key: Purpose + Value: GitLabCICD + - Key: GitLabProject + Value: !Sub '${GitLabGroup}/${GitLabProject}' + +Outputs: + GitLabCIRoleArn: + Description: 'ARN of the IAM role for GitLab CI/CD. Add this to your GitLab CI/CD variables as AWS_ROLE_ARN' + Value: !GetAtt GitLabCIRole.Arn + Export: + Name: !Sub '${AWS::StackName}-RoleArn' + + GitLabCIRoleName: + Description: 'Name of the IAM role for GitLab CI/CD' + Value: !Ref GitLabCIRole + Export: + Name: !Sub '${AWS::StackName}-RoleName' + + OIDCProviderArn: + Description: 'ARN of the GitLab OIDC provider' + Value: !GetAtt GitLabOIDCProvider.Arn + Export: + Name: !Sub '${AWS::StackName}-OIDCProviderArn' + + ECRRepositoryPattern: + Description: 'ECR repository pattern that this role can access' + Value: !If + - HasRepositoryPrefix + - !Sub '${ECRRepositoryPrefix}*' + - !Sub '${GitLabProject}*' + + NextSteps: + Description: 'Configuration instructions' + Value: !Sub | + 1. Add these CI/CD variables in GitLab (Settings > CI/CD > Variables): + - AWS_ROLE_ARN: ${GitLabCIRole.Arn} + - AWS_REGION: ${AWS::Region} + - AWS_ACCOUNT_ID: ${AWS::AccountId} + 2. Use the provided .gitlab-ci.yml example to test the integration + 3. Ensure your GitLab project is at: https://gitlab.com/${GitLabGroup}/${GitLabProject} diff --git a/deploy/build-container-gitlab.sh b/deploy/build-container-gitlab.sh new file mode 100644 index 0000000..753e560 --- /dev/null +++ b/deploy/build-container-gitlab.sh @@ -0,0 +1,123 @@ +#!/bin/bash +set -e + +# This script is designed for GitLab CI/CD +# It expects one parameter: service name +# Dockerfiles are located in src/agentic_platform/agent/{service}/Dockerfile + +SERVICE_NAME="$1" + +if [ -z "$SERVICE_NAME" ]; then + echo "Usage: $0 " + echo "Example: $0 memory-gateway" + exit 1 +fi + +echo "Building service: $SERVICE_NAME" + +# Convert hyphens to underscores for folder paths +FOLDER_NAME="${SERVICE_NAME//-/_}" + +# Move to project root +cd "$(dirname "$0")/.." + +# Try to find Dockerfile in multiple locations +DOCKERFILE_PATH="" + +# Check src/agentic_platform/agent/ first +if [[ -f "src/agentic_platform/agent/${FOLDER_NAME}/Dockerfile" ]]; then + DOCKERFILE_PATH="src/agentic_platform/agent/${FOLDER_NAME}/Dockerfile" + BUILD_CONTEXT="." # Use repository root as build context + echo "Using Dockerfile from: $DOCKERFILE_PATH" +# Check src/agentic_platform/service/ as fallback +elif [[ -f "src/agentic_platform/service/${FOLDER_NAME}/Dockerfile" ]]; then + DOCKERFILE_PATH="src/agentic_platform/service/${FOLDER_NAME}/Dockerfile" + BUILD_CONTEXT="." # Use repository root as build context + echo "Using Dockerfile from: $DOCKERFILE_PATH" +# Check docker/ directory as last resort +elif [[ -f "docker/${FOLDER_NAME}/Dockerfile" ]]; then + DOCKERFILE_PATH="docker/${FOLDER_NAME}/Dockerfile" + BUILD_CONTEXT="." # Use repository root as build context + echo "Using Dockerfile from: $DOCKERFILE_PATH" +else + echo "Error: Dockerfile not found for service: $SERVICE_NAME" + echo "Searched locations:" + echo " - src/agentic_platform/agent/${FOLDER_NAME}/Dockerfile" + echo " - src/agentic_platform/service/${FOLDER_NAME}/Dockerfile" + echo " - docker/${FOLDER_NAME}/Dockerfile" + exit 1 +fi + +# Configuration +AWS_REGION="${AWS_REGION:-us-east-1}" +ECR_REPO_NAME="agentic-platform-${SERVICE_NAME}" +IMAGE_TAG="${CI_COMMIT_SHORT_SHA:-latest}" + +# Get AWS account ID +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + +if [[ -z "$AWS_ACCOUNT_ID" || "$AWS_ACCOUNT_ID" == "None" ]]; then + echo "Error: Could not determine AWS Account ID" + exit 1 +fi + +ECR_REPO_URI="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPO_NAME" + +echo "Configuration:" +echo " Service: $SERVICE_NAME" +echo " Dockerfile: $DOCKERFILE_PATH" +echo " Build Context: $BUILD_CONTEXT" +echo " AWS Region: $AWS_REGION" +echo " AWS Account: $AWS_ACCOUNT_ID" +echo " ECR Repository: $ECR_REPO_NAME" +echo " Image Tag: $IMAGE_TAG" + +# Authenticate with ECR +echo "Authenticating with ECR..." +aws ecr get-login-password --region "$AWS_REGION" | \ + docker login --username AWS --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com" + +if [ $? -ne 0 ]; then + echo "Error: Failed to authenticate with ECR" + exit 1 +fi + +# Create ECR repository if it doesn't exist +echo "Ensuring ECR repository exists..." +if ! aws ecr describe-repositories --repository-names "$ECR_REPO_NAME" --region "$AWS_REGION" >/dev/null 2>&1; then + echo "Creating ECR repository: $ECR_REPO_NAME" + aws ecr create-repository --repository-name "$ECR_REPO_NAME" --region "$AWS_REGION" + if [ $? -ne 0 ]; then + echo "Error: Failed to create ECR repository" + exit 1 + fi +else + echo "ECR repository already exists: $ECR_REPO_NAME" +fi + +# Build Docker image +echo "Building Docker image..." +docker build \ + -t "$ECR_REPO_URI:$IMAGE_TAG" \ + -t "$ECR_REPO_URI:latest" \ + -f "$DOCKERFILE_PATH" \ + "$BUILD_CONTEXT" + +if [ $? -ne 0 ]; then + echo "Error: Docker build failed" + exit 1 +fi + +# Push images to ECR +echo "Pushing images to ECR..." +docker push "$ECR_REPO_URI:$IMAGE_TAG" +docker push "$ECR_REPO_URI:latest" + +if [ $? -ne 0 ]; then + echo "Error: Failed to push to ECR" + exit 1 +fi + +echo "โœ… Success! Images pushed to ECR:" +echo " - $ECR_REPO_URI:$IMAGE_TAG" +echo " - $ECR_REPO_URI:latest" diff --git a/script/setup_aws_oidc.sh b/script/setup_aws_oidc.sh new file mode 100755 index 0000000..4e0b56c --- /dev/null +++ b/script/setup_aws_oidc.sh @@ -0,0 +1,271 @@ +#!/bin/bash + +# AWS OIDC Setup Script for GitLab CI/CD +# This script configures AWS to trust GitLab as an OIDC identity provider + +set -e + +# Colors for output +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Configuration +GITLAB_URL="${GITLAB_URL:-https://gitlab.com}" +GITLAB_PROJECT_PATH="${1}" +ROLE_NAME="${2:-GitLabCIRole}" +AWS_REGION="${AWS_REGION:-us-east-1}" + +# Validate inputs +if [ -z "$GITLAB_PROJECT_PATH" ]; then + echo -e "${RED}Error: GitLab project path required${NC}" + echo "" + echo "Usage: $0 [role-name]" + echo "" + echo "Example: $0 mygroup/myproject GitLabCIRole" + echo "" + echo "Environment variables:" + echo " GITLAB_URL - GitLab instance URL (default: https://gitlab.com)" + echo " AWS_REGION - AWS region (default: us-east-1)" + exit 1 +fi + +echo -e "${BLUE}=== AWS OIDC Setup for GitLab CI/CD ===${NC}" +echo "" +echo -e "${BLUE}Configuration:${NC}" +echo " GitLab URL: ${GITLAB_URL}" +echo " Project Path: ${GITLAB_PROJECT_PATH}" +echo " Role Name: ${ROLE_NAME}" +echo " AWS Region: ${AWS_REGION}" +echo "" + +# Check AWS CLI is installed +if ! command -v aws &> /dev/null; then + echo -e "${RED}Error: AWS CLI not found. Please install it first.${NC}" + exit 1 +fi + +# Check AWS credentials are configured +if ! aws sts get-caller-identity &> /dev/null; then + echo -e "${RED}Error: AWS credentials not configured. Please run 'aws configure' first.${NC}" + exit 1 +fi + +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +echo -e "${GREEN}AWS Account ID: ${AWS_ACCOUNT_ID}${NC}" +echo "" + +# Extract GitLab domain from URL +GITLAB_DOMAIN=$(echo "$GITLAB_URL" | sed 's|https://||' | sed 's|http://||' | sed 's|/.*||') + +# Step 1: Create OIDC Identity Provider +echo -e "${GREEN}Step 1: Creating OIDC Identity Provider...${NC}" + +OIDC_PROVIDER_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${GITLAB_DOMAIN}" + +# Check if provider already exists +if aws iam get-open-id-connect-provider --open-id-connect-provider-arn "$OIDC_PROVIDER_ARN" &> /dev/null; then + echo -e "${YELLOW} OIDC provider already exists: ${OIDC_PROVIDER_ARN}${NC}" +else + # Get GitLab's thumbprint + echo -e "${BLUE} Fetching GitLab OIDC thumbprint...${NC}" + + # For gitlab.com, use the known thumbprint + if [ "$GITLAB_DOMAIN" = "gitlab.com" ]; then + THUMBPRINT="1b511abead59c6ce207077c0bf0e0043b1382612" + else + # For self-hosted GitLab, calculate thumbprint + THUMBPRINT=$(echo | openssl s_client -servername "$GITLAB_DOMAIN" -connect "${GITLAB_DOMAIN}:443" 2>/dev/null | \ + openssl x509 -fingerprint -sha1 -noout | \ + cut -d'=' -f2 | tr -d ':' | tr '[:upper:]' '[:lower:]') + fi + + echo -e "${BLUE} Thumbprint: ${THUMBPRINT}${NC}" + + # Create the OIDC provider + aws iam create-open-id-connect-provider \ + --url "${GITLAB_URL}" \ + --client-id-list "${GITLAB_URL}" \ + --thumbprint-list "${THUMBPRINT}" \ + --region "${AWS_REGION}" + + echo -e "${GREEN} โœ“ OIDC provider created${NC}" +fi + +echo "" + +# Step 2: Create IAM Role with Trust Policy +echo -e "${GREEN}Step 2: Creating IAM Role with Trust Policy...${NC}" + +# Create trust policy document +TRUST_POLICY=$(cat < /dev/null; then + echo -e "${YELLOW} Role already exists: ${ROLE_NAME}${NC}" + echo -e "${BLUE} Updating trust policy...${NC}" + + # Update trust policy + echo "$TRUST_POLICY" > /tmp/trust-policy.json + aws iam update-assume-role-policy \ + --role-name "$ROLE_NAME" \ + --policy-document file:///tmp/trust-policy.json + rm /tmp/trust-policy.json + + echo -e "${GREEN} โœ“ Trust policy updated${NC}" +else + # Create the role + echo "$TRUST_POLICY" > /tmp/trust-policy.json + aws iam create-role \ + --role-name "$ROLE_NAME" \ + --assume-role-policy-document file:///tmp/trust-policy.json \ + --description "Role for GitLab CI/CD to access AWS resources" + rm /tmp/trust-policy.json + + echo -e "${GREEN} โœ“ IAM role created${NC}" +fi + +ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" +echo "" + +# Step 3: Attach ECR Permissions Policy +echo -e "${GREEN}Step 3: Attaching ECR Permissions Policy...${NC}" + +# Create ECR permissions policy +ECR_POLICY_NAME="${ROLE_NAME}-ECR-Policy" +ECR_POLICY=$(cat < /dev/null; then + echo -e "${YELLOW} Policy already exists: ${ECR_POLICY_NAME}${NC}" + + # Create a new policy version + echo "$ECR_POLICY" > /tmp/ecr-policy.json + + # Delete oldest version if we have 5 versions (AWS limit) + VERSIONS=$(aws iam list-policy-versions --policy-arn "$POLICY_ARN" --query 'Versions[?IsDefaultVersion==`false`].VersionId' --output text) + VERSION_COUNT=$(echo "$VERSIONS" | wc -w) + if [ "$VERSION_COUNT" -ge 5 ]; then + OLDEST_VERSION=$(echo "$VERSIONS" | awk '{print $1}') + aws iam delete-policy-version --policy-arn "$POLICY_ARN" --version-id "$OLDEST_VERSION" + fi + + aws iam create-policy-version \ + --policy-arn "$POLICY_ARN" \ + --policy-document file:///tmp/ecr-policy.json \ + --set-as-default + rm /tmp/ecr-policy.json + + echo -e "${GREEN} โœ“ Policy updated${NC}" +else + # Create the policy + echo "$ECR_POLICY" > /tmp/ecr-policy.json + aws iam create-policy \ + --policy-name "$ECR_POLICY_NAME" \ + --policy-document file:///tmp/ecr-policy.json \ + --description "ECR permissions for GitLab CI/CD" + rm /tmp/ecr-policy.json + + echo -e "${GREEN} โœ“ Policy created${NC}" +fi + +# Attach policy to role +if aws iam list-attached-role-policies --role-name "$ROLE_NAME" | grep -q "$ECR_POLICY_NAME"; then + echo -e "${YELLOW} Policy already attached to role${NC}" +else + aws iam attach-role-policy \ + --role-name "$ROLE_NAME" \ + --policy-arn "$POLICY_ARN" + + echo -e "${GREEN} โœ“ Policy attached to role${NC}" +fi + +echo "" + +# Step 4: Output Configuration +echo -e "${GREEN}=== Setup Complete ===${NC}" +echo "" +echo -e "${BLUE}OIDC Provider ARN:${NC}" +echo " ${OIDC_PROVIDER_ARN}" +echo "" +echo -e "${BLUE}IAM Role ARN:${NC}" +echo " ${ROLE_ARN}" +echo "" +echo -e "${YELLOW}Next steps:${NC}" +echo "1. Add the following CI/CD variables to your GitLab project:" +echo "" +echo " Variable Name: AWS_ROLE_ARN" +echo " Value: ${ROLE_ARN}" +echo "" +echo " Variable Name: AWS_REGION" +echo " Value: ${AWS_REGION}" +echo "" +echo "2. In GitLab, go to: Settings > CI/CD > Variables" +echo "3. Add the variables above" +echo "4. Ensure variables are protected if using protected branches" +echo "" +echo -e "${BLUE}Trust Policy Summary:${NC}" +echo " This role trusts GitLab project: ${GITLAB_PROJECT_PATH}" +echo " For all branches (ref_type:branch:ref:*)" +echo "" +echo -e "${YELLOW}To modify trust policy for specific branches:${NC}" +echo " Edit the trust policy condition to match your requirements" +echo " Example for main branch only:" +echo " \"${GITLAB_DOMAIN}:sub\": \"project_path:${GITLAB_PROJECT_PATH}:ref_type:branch:ref:main\"" +echo "" diff --git a/src/agentic_platform/agent/code_transform/Dockerfile b/src/agentic_platform/agent/code_transform/Dockerfile new file mode 100644 index 0000000..9d698f1 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/Dockerfile @@ -0,0 +1,78 @@ +# ATX Container Test Runner +# Base image: Ubuntu 22.04 LTS +FROM ubuntu:22.04 + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Set working directory +WORKDIR /workspace + +# Create non-root user for security +RUN groupadd -r atxuser && \ + useradd -r -g atxuser -m -s /bin/bash atxuser && \ + chown -R atxuser:atxuser /workspace + +# Install system dependencies including Node.js +RUN apt-get update && \ + apt-get install -y \ + curl \ + git \ + python3 \ + python3-pip \ + unzip \ + build-essential \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js 20.x (required by ATX CLI) +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs + +# Install ATX CLI (AWS Transform CLI) +# Download and install the real ATX CLI from AWS +RUN curl -fsSL https://desktop-release.transform.us-east-1.api.aws/install.sh | bash + +# Make ATX accessible to all users and verify installation +RUN echo 'export PATH="$HOME/.local/bin:$PATH"' >> /root/.bashrc && \ + export PATH="$HOME/.local/bin:$PATH" && \ + cp /root/.local/bin/atx /usr/local/bin/atx && \ + chmod +x /usr/local/bin/atx && \ + echo 'export PATH="/usr/local/bin:$PATH"' >> /home/atxuser/.bashrc && \ + chown atxuser:atxuser /home/atxuser/.bashrc + +# Verify ATX installation (skip version check since it requires AWS credentials) +# ATX CLI is installed and will be verified during runtime when AWS credentials are available +RUN echo "ATX CLI installed successfully and accessible to all users" + +# Install AWS CLI v2 +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + ./aws/install && \ + rm -rf aws awscliv2.zip + +# Configure AWS CLI defaults +RUN aws --version + +# Copy scripts into image +COPY scripts/s3-integration.sh /usr/local/bin/s3-integration.sh +COPY scripts/atx-orchestrator.sh /usr/local/bin/atx-orchestrator.sh +COPY scripts/smoke-test.sh /usr/local/bin/smoke-test.sh +COPY scripts/csv-parser.sh /usr/local/bin/csv-parser.sh +COPY scripts/entrypoint.sh /usr/local/bin/entrypoint.sh +COPY scripts/test-orchestrator.sh /usr/local/bin/test-orchestrator.sh + +# Set executable permissions +RUN chmod +x /usr/local/bin/s3-integration.sh && \ + chmod +x /usr/local/bin/atx-orchestrator.sh && \ + chmod +x /usr/local/bin/smoke-test.sh && \ + chmod +x /usr/local/bin/csv-parser.sh && \ + chmod +x /usr/local/bin/entrypoint.sh && \ + chmod +x /usr/local/bin/test-orchestrator.sh + +# Switch to non-root user +USER atxuser + +# Set entrypoint and default command +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["--help"] diff --git a/src/agentic_platform/agent/code_transform/GITLAB-DEPLOYMENT.md b/src/agentic_platform/agent/code_transform/GITLAB-DEPLOYMENT.md new file mode 100644 index 0000000..ddcbbcb --- /dev/null +++ b/src/agentic_platform/agent/code_transform/GITLAB-DEPLOYMENT.md @@ -0,0 +1,288 @@ +# GitLab CI/CD Deployment Guide + +This guide walks you through setting up automated deployment of the ATX Container Test Runner using GitLab CI/CD with OIDC authentication. + +## ๐Ÿ” Prerequisites + +- GitLab account with CI/CD enabled +- AWS account with appropriate permissions +- Git installed locally + +## ๐Ÿš€ Step-by-Step Setup + +### Step 1: AWS Infrastructure Setup + +Run the automated setup script to create AWS resources: + +```bash +# Make the script executable +chmod +x setup-gitlab-ci.sh + +# Run the setup script +./setup-gitlab-ci.sh +``` + +This script creates: +- S3 buckets for source code and results +- OIDC Identity Provider for GitLab +- IAM role with secure temporary credentials +- ECR repository for Docker images + +**Important:** Save the output values - you'll need them for GitLab configuration. + +### Step 2: GitLab Repository Setup + +1. **Create a new GitLab repository** or use an existing one + +2. **Push this code to your GitLab repository:** + ```bash + git init + git add . + git commit -m "Initial ATX Container Test Runner setup" + git remote add origin https://gitlab.com/your-username/atx-container-test-runner.git + git push -u origin main + ``` + +### Step 3: Configure GitLab CI/CD Variables + +1. **Navigate to your GitLab project** +2. **Go to Settings โ†’ CI/CD โ†’ Variables** +3. **Add the following variables** (from setup script output): + +| Variable Name | Value | Protected | Masked | +|---------------|-------|-----------|--------| +| `AWS_REGION` | Your AWS region (e.g., us-east-1) | โœ… | โŒ | +| `AWS_ACCOUNT_ID` | Your 12-digit AWS account ID | โœ… | โœ… | +| `AWS_ROLE_ARN` | IAM role ARN from setup script | โœ… | โŒ | +| `SOURCE_BUCKET` | S3 bucket for source code | โœ… | โŒ | +| `RESULTS_BUCKET` | S3 bucket for results | โœ… | โŒ | + +**Security Notes:** +- Mark sensitive variables as "Protected" and "Masked" +- No AWS access keys needed - OIDC provides secure authentication +- Variables are only available to protected branches (main/master) + +### Step 4: Verify GitLab Runner Configuration + +Ensure your GitLab project has access to runners with Docker support: + +1. **Go to Settings โ†’ CI/CD โ†’ Runners** +2. **Verify Docker executor is available** +3. **If using shared runners:** Ensure Docker-in-Docker is enabled +4. **If using custom runners:** Verify Docker is installed and configured + +### Step 5: Trigger First Deployment + +```bash +# Trigger the pipeline +git commit --allow-empty -m "Trigger initial deployment" +git push origin main +``` + +## ๐Ÿ”„ Pipeline Stages + +The GitLab CI/CD pipeline includes these stages: + +### 1. **Validate** ๐Ÿ” +- Validates CloudFormation templates +- Checks script syntax +- Verifies configuration files + +### 2. **Build** ๐Ÿณ +- Builds Docker image +- Runs security scans +- Creates image tags + +### 3. **Test** ๐Ÿงช +- Runs smoke tests +- Validates ATX functionality +- Tests S3 integration + +### 4. **Push** ๐Ÿ“ฆ +- Authenticates with AWS using OIDC +- Pushes Docker image to ECR +- Tags images appropriately + +### 5. **Deploy** ๐Ÿš€ +- Deploys CloudFormation stack +- Updates ECS service +- Configures networking and security + +### 6. **Verify** โœ”๏ธ +- Checks deployment health +- Validates service endpoints +- Runs integration tests + +## ๐Ÿ”ง Pipeline Configuration + +The pipeline is configured in `.gitlab-ci.yml` with: + +- **OIDC Authentication:** Secure, temporary AWS credentials +- **Docker-in-Docker:** For building container images +- **Conditional Deployment:** Only deploys from main branch +- **Manual Approval:** For production deployments +- **Rollback Support:** Automatic rollback on failure + +## ๐Ÿ› Troubleshooting + +### Common Issues + +#### Pipeline Fails at Authentication +``` +Error: Could not assume role with OIDC +``` +**Solution:** +1. Verify OIDC Identity Provider is configured correctly +2. Check IAM role trust policy includes GitLab +3. Ensure variables are set correctly in GitLab + +#### Docker Build Fails +``` +Error: Cannot connect to Docker daemon +``` +**Solution:** +1. Verify GitLab Runner has Docker support +2. Check if Docker-in-Docker service is enabled +3. Ensure sufficient resources for Docker builds + +#### CloudFormation Deployment Fails +``` +Error: Stack creation failed +``` +**Solution:** +1. Check CloudWatch Logs for detailed errors +2. Verify IAM permissions for CloudFormation +3. Check resource limits and quotas +4. Review stack events in AWS Console + +#### ECS Task Fails to Start +``` +Error: Task stopped with exit code 1 +``` +**Solution:** +1. Check CloudWatch Logs: `/ecs/production-atx-test-runner` +2. Verify S3 bucket permissions +3. Check ECR image availability +4. Review task definition configuration + +### Debug Commands + +```bash +# Check pipeline logs +gitlab-ci-multi-runner exec docker validate + +# Test Docker build locally +docker build -t atx-test-runner . + +# Validate CloudFormation template +aws cloudformation validate-template --template-body file://deployment/cloudformation-complete-stack.yaml + +# Check AWS authentication +aws sts get-caller-identity +``` + +## ๐Ÿ”’ Security Best Practices + +### OIDC Configuration +- โœ… Use OIDC instead of long-lived access keys +- โœ… Limit token lifetime to 1 hour +- โœ… Restrict to specific branches (main/master) +- โœ… Use least privilege IAM policies + +### GitLab Variables +- โœ… Mark sensitive variables as "Protected" and "Masked" +- โœ… Use environment-specific variable scopes +- โœ… Regularly rotate any manual credentials +- โœ… Audit variable access logs + +### Container Security +- โœ… Enable ECR image scanning +- โœ… Use minimal base images +- โœ… Regularly update dependencies +- โœ… Scan for vulnerabilities in CI/CD + +## ๐Ÿ“Š Monitoring and Logging + +### CloudWatch Integration +- **Application Logs:** `/ecs/production-atx-test-runner` +- **Pipeline Logs:** Available in GitLab CI/CD interface +- **AWS CloudTrail:** API call auditing +- **CloudWatch Metrics:** ECS and ECR metrics + +### Alerts and Notifications +Configure GitLab notifications for: +- Pipeline failures +- Deployment completions +- Security scan results +- Performance issues + +## ๐Ÿ”„ Updates and Maintenance + +### Updating the Application +1. Make code changes +2. Commit and push to main branch +3. Pipeline automatically rebuilds and redeploys +4. Monitor deployment in GitLab and AWS Console + +### Updating Infrastructure +1. Modify CloudFormation templates in `deployment/` +2. Test changes in development environment +3. Deploy via GitLab pipeline +4. Verify changes in AWS Console + +### Rollback Procedure +If deployment fails: +1. Check pipeline logs for errors +2. Use GitLab's rollback feature if available +3. Or manually revert to previous commit: + ```bash + git revert HEAD + git push origin main + ``` + +## ๐Ÿงน Cleanup + +To remove all resources: + +### Via GitLab Pipeline +1. Go to **CI/CD โ†’ Pipelines** +2. Find latest pipeline +3. Click **destroy:stack** manual job +4. Click **Play** to execute + +### Manual Cleanup +```bash +# Delete CloudFormation stack +aws cloudformation delete-stack --stack-name atx-test-runner --region us-east-1 + +# Delete ECR repository +aws ecr delete-repository --repository-name atx-test-runner --force --region us-east-1 + +# Delete S3 buckets (after emptying them) +aws s3 rb s3://your-source-bucket --force +aws s3 rb s3://your-results-bucket --force +``` + +## ๐Ÿ“ž Support + +For issues specific to GitLab CI/CD: +1. Check GitLab pipeline logs +2. Review this guide's troubleshooting section +3. Consult [GitLab CI/CD documentation](https://docs.gitlab.com/ee/ci/) +4. Check AWS CloudWatch Logs for runtime issues + +For ATX-specific issues: +- See `docs/troubleshooting.md` +- Check `QUICK-REFERENCE.md` +- Review CloudWatch Logs + +## ๐ŸŽ‰ Success! + +Once setup is complete, your GitLab repository will automatically: +- Build and test code changes +- Deploy to AWS ECS +- Run ATX transformations +- Store results in S3 +- Provide detailed logging and monitoring + +Happy coding! ๐Ÿš€ \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/LICENSE b/src/agentic_platform/agent/code_transform/LICENSE new file mode 100644 index 0000000..bf927c4 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 ATX Container Test Runner + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/MANIFEST.txt b/src/agentic_platform/agent/code_transform/MANIFEST.txt new file mode 100644 index 0000000..aafe2b6 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/MANIFEST.txt @@ -0,0 +1,39 @@ +ATX Container Test Runner - Package Manifest +Version: 0.1.0 +Created: 20251208-160758 + +Essential Files: +- Dockerfile Container definition +- .gitlab-ci.yml GitLab CI/CD pipeline (OIDC) +- setup-gitlab-ci.sh OIDC setup script +- USER-GUIDE.md Customer getting started guide +- QUICK-REFERENCE.md Quick reference card +- OIDC-SETUP-GUIDE.md OIDC authentication guide +- GITLAB-DEPLOYMENT.md GitLab deployment guide +- QUICKSTART-ECS.md ECS quick start guide + +Documentation: +- docs/deployment-guide.md Complete deployment guide +- docs/troubleshooting.md Troubleshooting guide +- docs/build-and-test.md Build and test guide +- docs/smoke-test.md Smoke test documentation + +Scripts: +- scripts/atx-orchestrator.sh Main orchestration script +- scripts/s3-integration.sh S3 operations +- scripts/csv-parser.sh CSV parsing +- scripts/smoke-test.sh Smoke test +- scripts/push-to-ecr.sh ECR push automation +- scripts/build-and-test.sh Build and test automation + +Deployment Templates: +- deployment/cloudformation-complete-stack.yaml Complete CloudFormation stack +- deployment/ecs-task-definition.json ECS task definition +- deployment/kubernetes-deployment.yaml Kubernetes manifests +- deployment/terraform/ Terraform IaC files + +Examples: +- examples/*.csv Sample CSV configurations +- examples/ci-cd-integration.sh CI/CD integration example + + diff --git a/src/agentic_platform/agent/code_transform/OIDC-SETUP-GUIDE.md b/src/agentic_platform/agent/code_transform/OIDC-SETUP-GUIDE.md new file mode 100644 index 0000000..2e1f61e --- /dev/null +++ b/src/agentic_platform/agent/code_transform/OIDC-SETUP-GUIDE.md @@ -0,0 +1,277 @@ +# OIDC Setup Guide for GitLab CI/CD + +## What is OIDC? + +OpenID Connect (OIDC) allows GitLab CI/CD to authenticate with AWS using temporary, short-lived tokens instead of long-lived access keys. This provides enhanced security and eliminates the need to manage static credentials. + +## Benefits of OIDC vs Access Keys + +| Feature | OIDC | Access Keys | +|---------|------|-------------| +| **Security** | โœ… Temporary tokens (1 hour) | โŒ Long-lived credentials | +| **Rotation** | โœ… Automatic | โŒ Manual rotation required | +| **Compromise Risk** | โœ… Low (tokens expire quickly) | โŒ High (permanent until rotated) | +| **Compliance** | โœ… Better audit trail | โŒ Static credentials | +| **Management** | โœ… No key management | โŒ Key storage and rotation | + +## How It Works + +1. **GitLab generates OIDC token** when pipeline runs +2. **AWS STS validates token** against configured trust policy +3. **AWS returns temporary credentials** (AccessKey, SecretKey, SessionToken) +4. **Pipeline uses temporary credentials** for AWS operations +5. **Credentials expire automatically** after 1 hour + +## Setup Process + +### Prerequisites + +- AWS CLI installed and configured with admin permissions +- GitLab project with CI/CD enabled +- GitLab project ID (found in Project Settings โ†’ General) + +### Step 1: Run Setup Script + +```bash +chmod +x setup-gitlab-ci.sh +./setup-gitlab-ci.sh +``` + +The script will: +1. Create S3 buckets with encryption and versioning +2. Create OIDC Identity Provider for GitLab +3. Create IAM role with trust policy for your GitLab project +4. Attach necessary permissions to the role + +### Step 2: Configure GitLab Variables + +Add these variables to **Settings โ†’ CI/CD โ†’ Variables**: + +| Variable | Value | Description | +|----------|-------|-------------| +| `AWS_REGION` | `us-east-1` | AWS region | +| `AWS_ACCOUNT_ID` | `123456789012` | Your AWS account ID | +| `AWS_ROLE_ARN` | `arn:aws:iam::123456789012:role/GitLabCIRole` | IAM role ARN | +| `SOURCE_BUCKET` | `atx-test-source-123456789012` | S3 source bucket | +| `RESULTS_BUCKET` | `atx-test-results-123456789012` | S3 results bucket | + +**Note:** No `AWS_ACCESS_KEY_ID` or `AWS_SECRET_ACCESS_KEY` needed! + +### Step 3: Test the Pipeline + +Push code to trigger the pipeline: + +```bash +git add . +git commit -m "Test OIDC authentication" +git push origin main +``` + +## Trust Policy Explained + +The IAM role trust policy restricts access to: + +- **Specific GitLab project** (your project ID) +- **Main branch only** (ref:main) +- **GitLab.com domain** (or your GitLab instance) + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::ACCOUNT:oidc-provider/gitlab.com" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "gitlab.com:aud": "https://gitlab.com", + "gitlab.com:sub": "project_path:*:ref_type:branch:ref:main" + }, + "StringLike": { + "gitlab.com:project_id": "YOUR_PROJECT_ID" + } + } + } + ] +} +``` + +## Pipeline Configuration + +The `.gitlab-ci.yml` uses OIDC authentication: + +```yaml +.aws_oidc_auth: &aws_oidc_auth + id_tokens: + GITLAB_OIDC_TOKEN: + aud: https://gitlab.com + before_script: + - # Install AWS CLI + - # Assume role with OIDC token + - # Export temporary credentials +``` + +## Troubleshooting + +### Common Issues + +#### 1. "AssumeRoleWithWebIdentity failed" + +**Cause:** Trust policy mismatch or incorrect project ID + +**Solution:** +- Verify GitLab project ID is correct +- Check trust policy conditions +- Ensure pipeline runs on main branch + +#### 2. "Invalid identity token" + +**Cause:** OIDC provider not configured correctly + +**Solution:** +- Verify OIDC provider exists in AWS IAM +- Check thumbprint is correct +- Ensure GitLab URL matches + +#### 3. "Access denied" during AWS operations + +**Cause:** IAM role permissions insufficient + +**Solution:** +- Check IAM role has necessary permissions +- Verify resource ARNs in policy +- Test with broader permissions first + +### Debugging Steps + +1. **Check OIDC token:** + ```yaml + script: + - echo "Token audience: $(echo $GITLAB_OIDC_TOKEN | base64 -d | jq -r .aud)" + - echo "Token subject: $(echo $GITLAB_OIDC_TOKEN | base64 -d | jq -r .sub)" + ``` + +2. **Verify role assumption:** + ```yaml + script: + - aws sts get-caller-identity + - aws sts assume-role-with-web-identity --role-arn $AWS_ROLE_ARN --role-session-name test --web-identity-token $GITLAB_OIDC_TOKEN + ``` + +3. **Test permissions:** + ```yaml + script: + - aws s3 ls + - aws ecr describe-repositories + ``` + +## Security Best Practices + +### 1. Least Privilege Permissions + +Only grant permissions needed for the pipeline: + +```json +{ + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "s3:GetObject", + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::atx-test-*/*" +} +``` + +### 2. Restrict to Specific Branches + +Limit OIDC access to main branch only: + +```json +{ + "StringEquals": { + "gitlab.com:sub": "project_path:*:ref_type:branch:ref:main" + } +} +``` + +### 3. Monitor and Audit + +- Enable CloudTrail for API calls +- Monitor AssumeRoleWithWebIdentity events +- Set up alerts for unusual activity + +### 4. Regular Review + +- Review IAM role permissions quarterly +- Update trust policy if project structure changes +- Monitor for unused permissions + +## Migration from Access Keys + +If migrating from access keys: + +1. **Run new setup script** to create OIDC role +2. **Update GitLab variables** (remove access keys, add role ARN) +3. **Test pipeline** with OIDC +4. **Delete old IAM user** and access keys +5. **Update documentation** for team + +## Advanced Configuration + +### Multiple Branches + +To allow multiple branches: + +```json +{ + "StringLike": { + "gitlab.com:sub": "project_path:*:ref_type:branch:ref:*" + } +} +``` + +### Multiple Projects + +To share role across projects: + +```json +{ + "StringLike": { + "gitlab.com:project_id": ["12345", "67890"] + } +} +``` + +### Custom Session Duration + +Extend session duration (max 12 hours): + +```yaml +script: + - aws sts assume-role-with-web-identity \ + --duration-seconds 43200 \ + --role-arn $AWS_ROLE_ARN \ + --web-identity-token $GITLAB_OIDC_TOKEN +``` + +## Resources + +- [AWS IAM OIDC Documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html) +- [GitLab OIDC Documentation](https://docs.gitlab.com/ee/ci/cloud_services/aws/) +- [AWS STS AssumeRoleWithWebIdentity](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html) + +## Support + +For issues with OIDC setup: + +1. Check pipeline logs for specific error messages +2. Verify trust policy conditions match your setup +3. Test role assumption manually with AWS CLI +4. Review CloudTrail logs for authentication attempts + +The OIDC approach provides significantly better security than access keys while simplifying credential management! \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/QUICK-REFERENCE.md b/src/agentic_platform/agent/code_transform/QUICK-REFERENCE.md new file mode 100644 index 0000000..97ea5d7 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/QUICK-REFERENCE.md @@ -0,0 +1,173 @@ +# ATX Container Test Runner - Quick Reference + +## Setup Method + +**Shell Script Setup:** + +```bash +# 1. Run OIDC setup script +chmod +x setup-gitlab-ci.sh +./setup-gitlab-ci.sh + +# 2. Provide GitLab project ID when prompted + +# 3. Push to GitLab +git push origin main + +# 4. Add OIDC variables to GitLab Variables +# Settings โ†’ CI/CD โ†’ Variables +# (No access keys needed!) + +# 5. Pipeline runs automatically with secure OIDC +``` + +## Required GitLab Variables (OIDC) + +| Variable | Example | Protected | Masked | +|----------|---------|-----------|--------| +| `AWS_REGION` | `us-east-1` | | | +| `AWS_ACCOUNT_ID` | `123456789012` | | | +| `AWS_ROLE_ARN` | `arn:aws:iam::123456789012:role/GitLabCIRole` | | | +| `SOURCE_BUCKET` | `atx-test-source-...` | | | +| `RESULTS_BUCKET` | `atx-test-results-...` | | | + +**โœ… No access keys needed! OIDC provides secure, temporary credentials.** + +## Pipeline Stages + +### Main Pipeline (`.gitlab-ci.yml`) + +1. **validate** - Validate CloudFormation +2. **build** - Build Docker image +3. **test** - Run smoke tests +4. **push** - Push to ECR (main branch only) +5. **deploy** - Deploy to ECS (main branch only) +6. **verify** - Verify deployment + +## Common Commands + +### Check Pipeline Status +```bash +# View in GitLab +# CI/CD โ†’ Pipelines +``` + +### Run Manual Jobs +```bash +# In GitLab pipeline view: +# Click โ–ถ (Play) button next to job name +``` + +### View Logs +```bash +# CloudWatch Logs +aws logs tail /ecs/production-atx-test-runner --follow + +# Or in AWS Console: +# CloudWatch โ†’ Log groups โ†’ /ecs/production-atx-test-runner +``` + +### Run Test Task +```bash +# Use manual job in pipeline: run:test-task +# Or via AWS CLI: +aws ecs run-task \ + --cluster production-atx-cluster \ + --task-definition production-atx-test-runner \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" +``` + +### Destroy Stack +```bash +# Use manual job in pipeline: destroy:stack +# Or via AWS CLI: +aws cloudformation delete-stack --stack-name atx-test-runner +``` + +## Troubleshooting + +### Pipeline Fails at Build +- Check Dockerfile syntax +- Ensure Docker executor available + +### Pipeline Fails at Push +- Verify AWS credentials in GitLab +- Check IAM permissions for ECR + +### Pipeline Fails at Deploy +- Verify S3 buckets exist +- Check CloudFormation events in AWS Console + +### Task Fails to Run +- Check CloudWatch Logs +- Verify IAM task role has S3 permissions + +## File Structure + +``` +atx-container-test-runner/ +โ”œโ”€โ”€ .gitlab-ci.yml โ† Main deployment pipeline +โ”œโ”€โ”€ .gitlab-ci-setup.yml โ† Setup pipeline +โ”œโ”€โ”€ setup-gitlab-ci.sh โ† Local setup script +โ”œโ”€โ”€ USER-GUIDE.md โ† Start here +โ”œโ”€โ”€ SETUP-PIPELINE-GUIDE.md โ† Pipeline setup guide +โ”œโ”€โ”€ QUICK-REFERENCE.md โ† This file +โ”œโ”€โ”€ Dockerfile โ† Container definition +โ”œโ”€โ”€ deployment/ โ† CloudFormation, Terraform, K8s +โ”œโ”€โ”€ scripts/ โ† Orchestration scripts +โ”œโ”€โ”€ docs/ โ† Documentation +โ””โ”€โ”€ examples/ โ† Sample configurations +``` + +## Cost Estimate + +**Monthly costs (us-east-1):** +- ECS Fargate: ~$30-50 +- ECR Storage: ~$1-5 +- S3 Storage: ~$1-10 +- CloudWatch Logs: ~$1-5 +- **Total: ~$35-70/month** + +## Support Resources + +- **Setup Issues:** `SETUP-PIPELINE-GUIDE.md` +- **Deployment Issues:** `docs/deployment-guide.md` +- **Troubleshooting:** `docs/troubleshooting.md` +- **GitLab CI/CD:** `GITLAB-DEPLOYMENT.md` +- **ECS Quick Start:** `QUICKSTART-ECS.md` + +## Quick Links + +- [GitLab CI/CD Docs](https://docs.gitlab.com/ee/ci/) +- [AWS ECS Docs](https://docs.aws.amazon.com/ecs/) +- [CloudFormation Docs](https://docs.aws.amazon.com/cloudformation/) +- [AWS CLI Reference](https://docs.aws.amazon.com/cli/) + +## Security Checklist + +- [ ] AWS credentials marked as Protected and Masked +- [ ] Using dedicated CI/CD IAM user (not personal credentials) +- [ ] S3 buckets have encryption enabled +- [ ] ECR image scanning enabled +- [ ] CloudWatch Logs configured +- [ ] IAM roles follow least privilege principle +- [ ] Access keys rotated regularly + +## Next Steps After Setup + +1. โœ… Verify all GitLab variables set +2. โœ… Push code to trigger pipeline +3. โœ… Monitor deployment in GitLab +4. โœ… Check ECS cluster in AWS Console +5. โœ… Run test task +6. โœ… Review CloudWatch Logs +7. โœ… Test with sample CSV file + +## Emergency Contacts + +For production issues: +1. Check CloudWatch Logs first +2. Review GitLab pipeline logs +3. Check AWS CloudFormation events +4. Contact your AWS support team if needed diff --git a/src/agentic_platform/agent/code_transform/QUICKSTART-ECS.md b/src/agentic_platform/agent/code_transform/QUICKSTART-ECS.md new file mode 100644 index 0000000..4013031 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/QUICKSTART-ECS.md @@ -0,0 +1,540 @@ +# Quick Start: Deploy to ECS and Test + +This guide will help you deploy the ATX Container Test Runner to AWS ECS Fargate and test it with sample Progress code. + +## Overview + +We'll use CloudFormation to deploy a complete ECS infrastructure including: +- VPC with public subnets +- ECS Fargate cluster +- IAM roles +- ECR repository +- CloudWatch Logs + +**Estimated time**: 20-25 minutes + +## Step 1: Prerequisites Check (2 minutes) + +```bash +# Verify AWS CLI +aws --version +# Expected: aws-cli/2.x.x or higher + +# Verify Docker +docker --version +# Expected: Docker version 20.10.x or higher + +# Check AWS credentials and get account info +aws sts get-caller-identity + +# Set environment variables +export AWS_REGION="us-east-1" # Change to your preferred region +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + +echo "โœ“ AWS Account ID: ${AWS_ACCOUNT_ID}" +echo "โœ“ AWS Region: ${AWS_REGION}" +``` + +## Step 2: Create S3 Buckets (2 minutes) + +```bash +# Create unique bucket names +export SOURCE_BUCKET="atx-test-source-${AWS_ACCOUNT_ID}" +export RESULTS_BUCKET="atx-test-results-${AWS_ACCOUNT_ID}" + +# Create source bucket +aws s3 mb s3://${SOURCE_BUCKET} --region ${AWS_REGION} +echo "โœ“ Created source bucket: ${SOURCE_BUCKET}" + +# Create results bucket +aws s3 mb s3://${RESULTS_BUCKET} --region ${AWS_REGION} +echo "โœ“ Created results bucket: ${RESULTS_BUCKET}" + +# Verify buckets +aws s3 ls | grep atx-test +``` + +## Step 3: Create Sample Progress Code (1 minute) + +```bash +# Create test directory +mkdir -p test-progress-code + +# Create a sample Progress file +cat > test-progress-code/sample.p << 'EOF' +/* Sample Progress Code for ATX Testing */ +DEFINE VARIABLE cCustomerName AS CHARACTER NO-UNDO. +DEFINE VARIABLE iOrderCount AS INTEGER NO-UNDO. +DEFINE VARIABLE dTotalAmount AS DECIMAL NO-UNDO. + +/* Sample procedure */ +PROCEDURE ProcessOrder: + DEFINE INPUT PARAMETER pcOrderId AS CHARACTER NO-UNDO. + DEFINE INPUT PARAMETER pdAmount AS DECIMAL NO-UNDO. + + ASSIGN + iOrderCount = iOrderCount + 1 + dTotalAmount = dTotalAmount + pdAmount. + + MESSAGE "Processed order: " + pcOrderId + " Amount: " + STRING(pdAmount) + VIEW-AS ALERT-BOX. +END PROCEDURE. + +/* Main block */ +DO: + ASSIGN + cCustomerName = "Test Customer" + iOrderCount = 0 + dTotalAmount = 0.00. + + RUN ProcessOrder(INPUT "ORD-001", INPUT 100.50). + RUN ProcessOrder(INPUT "ORD-002", INPUT 250.75). + + MESSAGE "Customer: " + cCustomerName SKIP + "Total Orders: " + STRING(iOrderCount) SKIP + "Total Amount: $" + STRING(dTotalAmount, ">>>,>>9.99") + VIEW-AS ALERT-BOX. +END. +EOF + +echo "โœ“ Created sample Progress code" +``` + +## Step 4: Upload Sample Code to S3 (1 minute) + +```bash +# Upload to S3 +aws s3 sync test-progress-code/ s3://${SOURCE_BUCKET}/customer1/folder1/ + +# Verify upload +echo "โœ“ Uploaded files:" +aws s3 ls s3://${SOURCE_BUCKET}/customer1/folder1/ +``` + +## Step 5: Build Docker Image (5 minutes) + +```bash +# Build the image +echo "Building Docker image..." +docker build -t atx-test-runner:latest . + +# Verify build +docker images | grep atx-test-runner + +# Run local smoke test +echo "Running smoke test..." +docker run --rm atx-test-runner:latest --smoke-test + +# Expected output: +# ========================================== +# ATX Container Smoke Test +# ========================================== +# โœ“ ATX CLI found +# โœ“ AWS CLI found +# โœ“ Test code created +# โœ“ ATX transformation successful +# SMOKE TEST PASSED +``` + +## Step 6: Push Image to ECR (3 minutes) + +```bash +# Use the automated script +chmod +x scripts/push-to-ecr.sh +./scripts/push-to-ecr.sh ${AWS_ACCOUNT_ID} ${AWS_REGION} + +# The script will: +# โœ“ Authenticate to ECR +# โœ“ Create repository (with image scanning and encryption) +# โœ“ Tag image with version and latest +# โœ“ Push to ECR +# โœ“ Verify push + +# Save ECR URI for later +export ECR_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/atx-test-runner" +echo "โœ“ Image pushed to: ${ECR_URI}:latest" +``` + +## Step 7: Deploy ECS Infrastructure with CloudFormation (8-10 minutes) + +```bash +# Deploy the complete stack +echo "Deploying CloudFormation stack..." +aws cloudformation create-stack \ + --stack-name atx-test-runner \ + --template-body file://deployment/cloudformation-complete-stack.yaml \ + --parameters \ + ParameterKey=SourceBucketName,ParameterValue=${SOURCE_BUCKET} \ + ParameterKey=ResultsBucketName,ParameterValue=${RESULTS_BUCKET} \ + ParameterKey=ImageVersion,ParameterValue=latest \ + ParameterKey=EnvironmentName,ParameterValue=test \ + ParameterKey=MaxParallelJobs,ParameterValue=4 \ + ParameterKey=TaskCpu,ParameterValue=2048 \ + ParameterKey=TaskMemory,ParameterValue=4096 \ + --capabilities CAPABILITY_NAMED_IAM \ + --region ${AWS_REGION} + +echo "โœ“ Stack creation initiated" +echo "โณ Waiting for stack to complete (this takes 5-10 minutes)..." +echo " You can monitor progress in the AWS Console:" +echo " https://console.aws.amazon.com/cloudformation/home?region=${AWS_REGION}#/stacks" + +# Wait for completion +aws cloudformation wait stack-create-complete \ + --stack-name atx-test-runner \ + --region ${AWS_REGION} + +echo "โœ“ Stack created successfully!" +``` + +## Step 8: Get Stack Outputs (1 minute) + +```bash +# Get all outputs +echo "Stack Outputs:" +aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --region ${AWS_REGION} \ + --query 'Stacks[0].Outputs[*].[OutputKey,OutputValue]' \ + --output table + +# Save important values +export CLUSTER_NAME=$(aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs[?OutputKey==`ClusterName`].OutputValue' \ + --output text \ + --region ${AWS_REGION}) + +export TASK_DEFINITION=$(aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs[?OutputKey==`TaskDefinitionArn`].OutputValue' \ + --output text \ + --region ${AWS_REGION}) + +export SUBNET_1=$(aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs[?OutputKey==`Subnet1Id`].OutputValue' \ + --output text \ + --region ${AWS_REGION}) + +export SUBNET_2=$(aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs[?OutputKey==`Subnet2Id`].OutputValue' \ + --output text \ + --region ${AWS_REGION}) + +export SECURITY_GROUP=$(aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs[?OutputKey==`SecurityGroupId`].OutputValue' \ + --output text \ + --region ${AWS_REGION}) + +echo "โœ“ Cluster: ${CLUSTER_NAME}" +echo "โœ“ Task Definition: ${TASK_DEFINITION}" +echo "โœ“ Subnets: ${SUBNET_1}, ${SUBNET_2}" +echo "โœ“ Security Group: ${SECURITY_GROUP}" +``` + +## Step 9: Create CSV Configuration (1 minute) + +```bash +# Create CSV file +cat > test-repos.csv << EOF +s3_path,build_command,transformation_name,output_s3_path +s3://${SOURCE_BUCKET}/customer1/folder1/,noop,Comprehensive-Codebase-Analysis,s3://${RESULTS_BUCKET}/customer1/folder1/ +EOF + +echo "โœ“ Created CSV configuration:" +cat test-repos.csv + +# Upload to S3 (optional - we'll pass it directly to the task) +aws s3 cp test-repos.csv s3://${SOURCE_BUCKET}/config/test-repos.csv +echo "โœ“ Uploaded CSV to S3" +``` + +## Step 10: Run ECS Task (2 minutes) + +```bash +# Run the task +echo "Running ECS task..." +TASK_ARN=$(aws ecs run-task \ + --cluster ${CLUSTER_NAME} \ + --task-definition ${TASK_DEFINITION} \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={ + subnets=[${SUBNET_1},${SUBNET_2}], + securityGroups=[${SECURITY_GROUP}], + assignPublicIp=ENABLED + }" \ + --region ${AWS_REGION} \ + --query 'tasks[0].taskArn' \ + --output text) + +echo "โœ“ Task started: ${TASK_ARN}" +echo "โณ Task is running..." + +# Get short task ID for logs +TASK_ID=$(echo ${TASK_ARN} | awk -F/ '{print $NF}') +echo " Task ID: ${TASK_ID}" +``` + +## Step 11: Monitor Task Execution (3-5 minutes) + +```bash +# Check task status +echo "Checking task status..." +aws ecs describe-tasks \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} \ + --region ${AWS_REGION} \ + --query 'tasks[0].[lastStatus,desiredStatus,stopCode,stoppedReason]' \ + --output table + +# Stream logs (in a new terminal or wait a moment for logs to appear) +echo "Streaming CloudWatch logs..." +echo "Press Ctrl+C to stop streaming" +aws logs tail /ecs/test-atx-test-runner --follow --region ${AWS_REGION} + +# Alternative: Get logs without streaming +# aws logs tail /ecs/test-atx-test-runner --since 5m --region ${AWS_REGION} +``` + +### Monitor in AWS Console + +You can also monitor in the AWS Console: +- **ECS Tasks**: https://console.aws.amazon.com/ecs/home?region=${AWS_REGION}#/clusters/${CLUSTER_NAME}/tasks +- **CloudWatch Logs**: https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group/$252Fecs$252Ftest-atx-test-runner + +## Step 12: Check Results (2 minutes) + +```bash +# Wait for task to complete +echo "Waiting for task to complete..." +aws ecs wait tasks-stopped \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} \ + --region ${AWS_REGION} + +echo "โœ“ Task completed" + +# Check final task status +aws ecs describe-tasks \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} \ + --region ${AWS_REGION} \ + --query 'tasks[0].[lastStatus,stopCode,stoppedReason,containers[0].exitCode]' \ + --output table + +# List results in S3 +echo "Results in S3:" +aws s3 ls s3://${RESULTS_BUCKET}/ --recursive + +# Download results +mkdir -p results +aws s3 sync s3://${RESULTS_BUCKET}/ ./results/ + +echo "โœ“ Results downloaded to ./results/" + +# View the analysis (if it exists) +if [ -f results/customer1/folder1/analysis.md ]; then + echo "ATX Analysis Output:" + cat results/customer1/folder1/analysis.md +else + echo "Checking for other result files..." + find results/ -type f +fi +``` + +## Step 13: Run Another Test (Optional) + +```bash +# Run the task again with different parameters +aws ecs run-task \ + --cluster ${CLUSTER_NAME} \ + --task-definition ${TASK_DEFINITION} \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={ + subnets=[${SUBNET_1},${SUBNET_2}], + securityGroups=[${SECURITY_GROUP}], + assignPublicIp=ENABLED + }" \ + --overrides '{ + "containerOverrides": [{ + "name": "atx-test-runner", + "command": [ + "/usr/local/bin/atx-orchestrator.sh", + "--csv-file", "/config/repos.csv", + "--mode", "parallel", + "--max-jobs", "8", + "--verbose" + ] + }] + }' \ + --region ${AWS_REGION} +``` + +## Troubleshooting + +### Task fails to start + +```bash +# Check task events +aws ecs describe-tasks \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} \ + --region ${AWS_REGION} \ + --query 'tasks[0].containers[0].[reason,lastStatus]' + +# Check CloudWatch logs +aws logs tail /ecs/test-atx-test-runner --since 10m --region ${AWS_REGION} +``` + +### No results in S3 + +```bash +# Check if task completed successfully +aws ecs describe-tasks \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} \ + --region ${AWS_REGION} \ + --query 'tasks[0].containers[0].exitCode' + +# Check logs for errors +aws logs filter-log-events \ + --log-group-name /ecs/test-atx-test-runner \ + --filter-pattern "ERROR" \ + --region ${AWS_REGION} +``` + +### IAM permission issues + +```bash +# Verify task role has S3 permissions +aws iam get-role-policy \ + --role-name test-atx-task-role \ + --policy-name S3Access \ + --region ${AWS_REGION} +``` + +### Network issues + +```bash +# Verify security group allows outbound traffic +aws ec2 describe-security-groups \ + --group-ids ${SECURITY_GROUP} \ + --region ${AWS_REGION} \ + --query 'SecurityGroups[0].IpPermissionsEgress' + +# Verify subnets have internet access +aws ec2 describe-route-tables \ + --filters "Name=association.subnet-id,Values=${SUBNET_1}" \ + --region ${AWS_REGION} \ + --query 'RouteTables[0].Routes' +``` + +## Cleanup + +When you're done testing, clean up all resources: + +```bash +# Delete CloudFormation stack (this removes ECS, VPC, IAM roles, etc.) +echo "Deleting CloudFormation stack..." +aws cloudformation delete-stack \ + --stack-name atx-test-runner \ + --region ${AWS_REGION} + +echo "โณ Waiting for stack deletion..." +aws cloudformation wait stack-delete-complete \ + --stack-name atx-test-runner \ + --region ${AWS_REGION} + +echo "โœ“ Stack deleted" + +# Empty and delete S3 buckets +echo "Cleaning up S3 buckets..." +aws s3 rm s3://${SOURCE_BUCKET} --recursive +aws s3 rb s3://${SOURCE_BUCKET} + +aws s3 rm s3://${RESULTS_BUCKET} --recursive +aws s3 rb s3://${RESULTS_BUCKET} + +echo "โœ“ S3 buckets deleted" + +# Delete ECR images and repository +echo "Cleaning up ECR..." +aws ecr batch-delete-image \ + --repository-name atx-test-runner \ + --image-ids imageTag=latest imageTag=0.1.0 \ + --region ${AWS_REGION} 2>/dev/null || true + +aws ecr delete-repository \ + --repository-name atx-test-runner \ + --force \ + --region ${AWS_REGION} + +echo "โœ“ ECR repository deleted" + +# Clean up local files +rm -rf test-progress-code results test-repos.csv + +echo "โœ“ Cleanup complete!" +``` + +## Quick Reference Commands + +```bash +# View running tasks +aws ecs list-tasks --cluster ${CLUSTER_NAME} --region ${AWS_REGION} + +# Stream logs +aws logs tail /ecs/test-atx-test-runner --follow --region ${AWS_REGION} + +# Check results +aws s3 ls s3://${RESULTS_BUCKET}/ --recursive + +# Stop a running task +aws ecs stop-task --cluster ${CLUSTER_NAME} --task ${TASK_ARN} --region ${AWS_REGION} + +# Update task definition (after code changes) +# 1. Build and push new image +docker build -t atx-test-runner:latest . +./scripts/push-to-ecr.sh ${AWS_ACCOUNT_ID} ${AWS_REGION} + +# 2. Update CloudFormation stack +aws cloudformation update-stack \ + --stack-name atx-test-runner \ + --use-previous-template \ + --parameters \ + ParameterKey=SourceBucketName,UsePreviousValue=true \ + ParameterKey=ResultsBucketName,UsePreviousValue=true \ + ParameterKey=ImageVersion,ParameterValue=latest \ + ParameterKey=EnvironmentName,UsePreviousValue=true \ + --capabilities CAPABILITY_NAMED_IAM \ + --region ${AWS_REGION} +``` + +## Next Steps + +1. **Test with your own Progress code**: Upload your code to S3 and update the CSV +2. **Test parallel execution**: Add multiple folders to the CSV +3. **Set up scheduled execution**: Use EventBridge to run tasks on a schedule +4. **Configure monitoring**: Set up CloudWatch alarms for task failures +5. **Optimize costs**: Use Fargate Spot for non-critical workloads + +## Additional Resources + +- [Complete Deployment Guide](docs/deployment-guide.md) +- [CloudFormation Template Details](deployment/cloudformation-complete-stack.yaml) +- [Troubleshooting Guide](docs/troubleshooting.md) +- [ECS Documentation](https://docs.aws.amazon.com/ecs/) + +## Summary + +You've successfully: +- โœ… Built and pushed a Docker image to ECR +- โœ… Deployed a complete ECS Fargate infrastructure +- โœ… Run an ATX transformation on sample Progress code +- โœ… Retrieved results from S3 + +The infrastructure is now ready for production use with your own Progress code! diff --git a/src/agentic_platform/agent/code_transform/README.md b/src/agentic_platform/agent/code_transform/README.md new file mode 100644 index 0000000..e449c14 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/README.md @@ -0,0 +1,224 @@ +# ATX Container Test Runner + +A containerized solution for running Amazon Transform eXtension (ATX) transformations at scale using AWS ECS, with automated CI/CD deployment via GitLab. + +## ๐ŸŽฏ Overview + +This project provides a complete infrastructure-as-code solution for deploying ATX transformations in a scalable, secure, and cost-effective manner. It orchestrates batch processing of code transformations stored in S3, executes them using ATX, and stores results back to S3. + +## โœจ Key Features + +- **๐Ÿณ Containerized ATX Runtime** - Consistent execution environment +- **๐Ÿ“Š Batch Processing** - Process multiple repositories simultaneously +- **๐Ÿ”„ GitLab CI/CD Integration** - Automated deployment and updates +- **๐Ÿ” OIDC Authentication** - Secure, keyless AWS access +- **๐Ÿ“ˆ Auto-scaling** - ECS Fargate with configurable scaling +- **๐Ÿ“ Comprehensive Logging** - CloudWatch integration with detailed logs +- **๐Ÿ’ฐ Cost Optimized** - Spot instances and pay-per-use pricing +- **๐Ÿ›ก๏ธ Security First** - Least privilege IAM, VPC isolation, encryption + +## ๐Ÿ—๏ธ Architecture + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ GitLab CI/CD โ”‚โ”€โ”€โ”€โ–ถโ”‚ Amazon ECR โ”‚โ”€โ”€โ”€โ–ถโ”‚ Amazon ECS โ”‚ +โ”‚ โ”‚ โ”‚ (Docker Images) โ”‚ โ”‚ (Fargate) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ Amazon S3 โ”‚โ—€โ”€โ”€โ”€โ”‚ ATX Container โ”‚โ—€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +โ”‚ (Source & Results) โ”‚ (Transformations)โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ CloudWatch Logs โ”‚ + โ”‚ (Monitoring) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +## ๐Ÿš€ Quick Start + +### For Customers (Recommended) +See **[USER-GUIDE.md](USER-GUIDE.md)** for the complete customer deployment guide. + +### For Developers +1. **Clone the repository** + ```bash + git clone + cd atx-container-test-runner + ``` + +2. **Set up AWS infrastructure** + ```bash + chmod +x setup-gitlab-ci.sh + ./setup-gitlab-ci.sh + ``` + +3. **Configure GitLab CI/CD** + - See [GITLAB-DEPLOYMENT.md](GITLAB-DEPLOYMENT.md) for detailed setup + +4. **Deploy** + ```bash + git push origin main + ``` + +## ๐Ÿ“š Documentation + +| Document | Purpose | +|----------|---------| +| **[USER-GUIDE.md](USER-GUIDE.md)** | ๐Ÿ‘ฅ Complete user deployment guide | +| **[GITLAB-DEPLOYMENT.md](GITLAB-DEPLOYMENT.md)** | ๐Ÿ”„ GitLab CI/CD setup and troubleshooting | +| **[QUICKSTART-ECS.md](QUICKSTART-ECS.md)** | โšก Quick ECS deployment guide | +| **[OIDC-SETUP-GUIDE.md](OIDC-SETUP-GUIDE.md)** | ๐Ÿ” OIDC authentication setup | +| **[QUICK-REFERENCE.md](QUICK-REFERENCE.md)** | ๐Ÿ“– Command reference and examples | +| **[docs/deployment-guide.md](docs/deployment-guide.md)** | ๐Ÿ—๏ธ Detailed deployment options | +| **[docs/troubleshooting.md](docs/troubleshooting.md)** | ๐Ÿ› Troubleshooting guide | +| **[docs/build-and-test.md](docs/build-and-test.md)** | ๐Ÿงช Build and test instructions | + +## ๐Ÿ”ง Deployment Options + +### 1. GitLab CI/CD (Recommended) +- **Automated deployment** on every push +- **OIDC authentication** for security +- **Built-in testing** and validation +- See [GITLAB-DEPLOYMENT.md](GITLAB-DEPLOYMENT.md) + +### 2. CloudFormation +- **Infrastructure as Code** approach +- **Complete stack** deployment +- Uses `deployment/cloudformation-complete-stack.yaml` + +### 3. Terraform +- **Multi-cloud** infrastructure management +- **State management** and planning +- Uses `deployment/terraform/` + +### 4. Manual ECS Deployment +- **Step-by-step** manual process +- **Learning-focused** approach +- See [QUICKSTART-ECS.md](QUICKSTART-ECS.md) + +## ๐Ÿ“ Repository Structure + +``` +atx-container-test-runner/ +โ”œโ”€โ”€ ๐Ÿ“„ USER-GUIDE.md # Customer deployment guide +โ”œโ”€โ”€ ๐Ÿ“„ GITLAB-DEPLOYMENT.md # GitLab CI/CD setup guide +โ”œโ”€โ”€ ๐Ÿ“„ QUICKSTART-ECS.md # Quick ECS deployment +โ”œโ”€โ”€ ๐Ÿ“„ OIDC-SETUP-GUIDE.md # OIDC authentication setup +โ”œโ”€โ”€ ๐Ÿ“„ QUICK-REFERENCE.md # Command reference +โ”œโ”€โ”€ ๐Ÿ”ง setup-gitlab-ci.sh # Automated AWS setup +โ”œโ”€โ”€ ๐Ÿณ Dockerfile # Container definition +โ”œโ”€โ”€ โš™๏ธ .gitlab-ci.yml # CI/CD pipeline +โ”œโ”€โ”€ ๐Ÿ“‹ VERSION # Current version +โ”‚ +โ”œโ”€โ”€ ๐Ÿ“‚ docs/ # Documentation +โ”‚ โ”œโ”€โ”€ deployment-guide.md # Detailed deployment guide +โ”‚ โ”œโ”€โ”€ troubleshooting.md # Troubleshooting help +โ”‚ โ”œโ”€โ”€ build-and-test.md # Build and test guide +โ”‚ โ””โ”€โ”€ exit-codes-and-output-modes.md # Exit codes reference +โ”‚ +โ”œโ”€โ”€ ๐Ÿ“‚ scripts/ # Orchestration scripts +โ”‚ โ”œโ”€โ”€ atx-orchestrator.sh # Main orchestrator +โ”‚ โ”œโ”€โ”€ s3-integration.sh # S3 operations +โ”‚ โ”œโ”€โ”€ csv-parser.sh # CSV parsing utilities +โ”‚ โ”œโ”€โ”€ smoke-test.sh # Container validation +โ”‚ โ””โ”€โ”€ push-to-ecr.sh # ECR deployment +โ”‚ +โ”œโ”€โ”€ ๐Ÿ“‚ deployment/ # Infrastructure templates +โ”‚ โ”œโ”€โ”€ cloudformation-complete-stack.yaml # Complete CF stack +โ”‚ โ”œโ”€โ”€ ecs-task-definition.json # ECS task definition +โ”‚ โ”œโ”€โ”€ kubernetes-deployment.yaml # Kubernetes manifests +โ”‚ โ””โ”€โ”€ terraform/ # Terraform IaC +โ”‚ โ”œโ”€โ”€ main.tf # Main Terraform config +โ”‚ โ”œโ”€โ”€ variables.tf # Variable definitions +โ”‚ โ””โ”€โ”€ outputs.tf # Output definitions +โ”‚ +โ”œโ”€โ”€ ๐Ÿ“‚ examples/ # Example configurations +โ”‚ โ”œโ”€โ”€ single-customer.csv # Single customer example +โ”‚ โ”œโ”€โ”€ multi-customer.csv # Multi-customer example +โ”‚ โ”œโ”€โ”€ sample-repos.csv # Sample repositories +โ”‚ โ””โ”€โ”€ menu-folders.csv # Menu structure example +โ”‚ +โ””โ”€โ”€ ๐Ÿ“‚ spuragu-progress-to-ir/ # ATX transformation + โ””โ”€โ”€ transformation_definition.md # Transformation specification +``` + +## ๐Ÿ” Security Features + +- **๐Ÿ”‘ OIDC Authentication** - No long-lived AWS access keys +- **๐Ÿ›ก๏ธ IAM Least Privilege** - Minimal required permissions +- **๐Ÿ”’ VPC Isolation** - Network security and isolation +- **๐Ÿ” Encryption at Rest** - S3 and ECR encryption +- **๐Ÿ“Š Audit Logging** - CloudTrail and CloudWatch integration +- **๐Ÿ” Container Scanning** - ECR vulnerability scanning +- **๐Ÿšซ Branch Protection** - OIDC limited to main branch + +## ๐Ÿ’ฐ Cost Optimization + +- **๐Ÿ’ก Spot Instances** - Up to 70% cost savings +- **๐Ÿ“Š Auto-scaling** - Pay only for what you use +- **โฐ Scheduled Scaling** - Scale down during off-hours +- **๐Ÿ—‚๏ธ Lifecycle Policies** - Automatic log and image cleanup +- **๐Ÿ“ˆ Cost Monitoring** - Built-in cost tracking and alerts + +**Estimated Monthly Cost:** $35-70 (varies by usage) + +## ๐Ÿงช Testing + +```bash +# Run smoke tests +./scripts/smoke-test.sh + +# Test container locally +docker build -t atx-test-runner . +docker run --rm atx-test-runner --smoke-test + +# Run integration tests +./scripts/test-orchestrator.sh --csv-file examples/sample-repos.csv --dry-run +``` + +## ๐Ÿ”„ CI/CD Pipeline + +The GitLab pipeline automatically: +1. โœ… **Validates** CloudFormation templates and scripts +2. ๐Ÿณ **Builds** Docker image with security scanning +3. ๐Ÿงช **Tests** functionality with smoke tests +4. ๐Ÿ“ฆ **Pushes** to Amazon ECR with proper tagging +5. ๐Ÿš€ **Deploys** to AWS ECS with health checks +6. โœ”๏ธ **Verifies** deployment success and functionality + +## ๐Ÿ“Š Monitoring and Logging + +- **๐Ÿ“ˆ CloudWatch Metrics** - ECS, ECR, and custom metrics +- **๐Ÿ“ Centralized Logging** - All logs in CloudWatch Logs +- **๐Ÿšจ Alerting** - Automated alerts for failures and issues +- **๐Ÿ“Š Dashboards** - Pre-built CloudWatch dashboards +- **๐Ÿ” Tracing** - Request tracing and performance monitoring + +## ๐Ÿค Contributing + +1. Fork the repository +2. Create a feature branch (`git checkout -b feature/amazing-feature`) +3. Commit your changes (`git commit -m 'Add amazing feature'`) +4. Push to the branch (`git push origin feature/amazing-feature`) +5. Open a Pull Request + +## ๐Ÿ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## ๐Ÿ†˜ Support + +- **๐Ÿ“š Documentation** - Check the `docs/` directory +- **๐Ÿ› Issues** - Report bugs via GitLab Issues +- **๐Ÿ’ฌ Discussions** - Use GitLab Discussions for questions +- **๐Ÿ“ง Contact** - Reach out to the development team + +## ๐ŸŽ‰ Getting Started + +Ready to deploy? Start with **[USER-GUIDE.md](USER-GUIDE.md)** for the complete setup guide! + +--- + +**Built with โค๏ธ for scalable ATX transformations** \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/USER-GUIDE.md b/src/agentic_platform/agent/code_transform/USER-GUIDE.md new file mode 100644 index 0000000..5f80040 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/USER-GUIDE.md @@ -0,0 +1,260 @@ +# ATX Container Test Runner - Customer Package + +Welcome! This package contains everything you need to deploy the ATX Container Test Runner to your AWS account using GitLab CI/CD. + +## ๐Ÿ“ฆ What's Included + +``` +atx-container-test-runner/ +โ”œโ”€โ”€ USER-GUIDE.md โ† Start here! +โ”œโ”€โ”€ README.md โ† Project overview +โ”œโ”€โ”€ GITLAB-DEPLOYMENT.md โ† Complete GitLab setup guide +โ”œโ”€โ”€ QUICKSTART-ECS.md โ† Quick start for ECS deployment +โ”œโ”€โ”€ OIDC-SETUP-GUIDE.md โ† OIDC authentication setup +โ”œโ”€โ”€ QUICK-REFERENCE.md โ† Quick reference guide +โ”œโ”€โ”€ setup-gitlab-ci.sh โ† Automated setup script +โ”œโ”€โ”€ .gitlab-ci.yml โ† GitLab CI/CD pipeline +โ”œโ”€โ”€ Dockerfile โ† Container definition +โ”œโ”€โ”€ VERSION โ† Current version +โ”œโ”€โ”€ LICENSE โ† License information +โ”œโ”€โ”€ MANIFEST.txt โ† Package manifest +โ”œโ”€โ”€ gitlab-ci-policy.json โ† GitLab CI policy configuration +โ”‚ +โ”œโ”€โ”€ docs/ โ† Documentation +โ”‚ โ”œโ”€โ”€ deployment-guide.md โ† Detailed deployment guide +โ”‚ โ”œโ”€โ”€ troubleshooting.md โ† Troubleshooting help +โ”‚ โ”œโ”€โ”€ build-and-test.md โ† Build and test instructions +โ”‚ โ””โ”€โ”€ exit-codes-and-output-modes.md โ† Exit codes reference +โ”‚ +โ”œโ”€โ”€ scripts/ โ† Orchestration scripts +โ”‚ โ”œโ”€โ”€ atx-orchestrator.sh โ† Main orchestrator +โ”‚ โ”œโ”€โ”€ s3-integration.sh โ† S3 operations +โ”‚ โ”œโ”€โ”€ csv-parser.sh โ† CSV parsing utilities +โ”‚ โ”œโ”€โ”€ smoke-test.sh โ† Container validation +โ”‚ โ”œโ”€โ”€ push-to-ecr.sh โ† ECR push automation +โ”‚ โ”œโ”€โ”€ entrypoint.sh โ† Container entrypoint +โ”‚ โ””โ”€โ”€ test-orchestrator.sh โ† Test orchestration +โ”‚ +โ”œโ”€โ”€ deployment/ โ† Deployment templates +โ”‚ โ”œโ”€โ”€ cloudformation-complete-stack.yaml โ† Complete infrastructure +โ”‚ โ”œโ”€โ”€ ecs-task-definition.json โ† ECS task definition +โ”‚ โ”œโ”€โ”€ kubernetes-deployment.yaml โ† Kubernetes manifests +โ”‚ โ””โ”€โ”€ terraform/ โ† Terraform IaC +โ”‚ โ”œโ”€โ”€ main.tf โ† Main Terraform config +โ”‚ โ”œโ”€โ”€ variables.tf โ† Variable definitions +โ”‚ โ”œโ”€โ”€ outputs.tf โ† Output definitions +โ”‚ โ””โ”€โ”€ terraform.tfvars.example โ† Example variables +โ”‚ +โ”œโ”€โ”€ examples/ โ† Example configurations +โ”‚ โ”œโ”€โ”€ single-customer.csv โ† Single customer example +โ”‚ โ”œโ”€โ”€ multi-customer.csv โ† Multi-customer example +โ”‚ โ”œโ”€โ”€ sample-repos.csv โ† Sample repositories +โ”‚ โ”œโ”€โ”€ menu-folders.csv โ† Menu structure example +โ”‚ โ”œโ”€โ”€ nested-structure.csv โ† Nested folder example +โ”‚ โ”œโ”€โ”€ different-transformations.csv โ† Multiple transformations +โ”‚ โ”œโ”€โ”€ ci-cd-integration.sh โ† CI/CD integration example +โ”‚ โ””โ”€โ”€ kubernetes-job.yaml โ† Kubernetes job example +โ”‚ +โ””โ”€โ”€ spuragu-progress-to-ir/ โ† ATX transformation definition + โ”œโ”€โ”€ transformation_definition.md โ† Transformation specification + โ””โ”€โ”€ document_references/ โ† Supporting documentation +``` + +## ๐Ÿš€ Quick Start (3 Steps) + +### Step 1: Run Setup Script + +```bash +# Make the script executable +chmod +x setup-gitlab-ci.sh + +# Run the setup (creates AWS resources and IAM credentials) +./setup-gitlab-ci.sh +``` + +This script will: +- Create S3 buckets for source code and results +- Set up OIDC Identity Provider for GitLab +- Create IAM role with secure temporary credentials +- Display configuration values + +**Benefits of OIDC:** +- โœ… No long-lived access keys to manage +- โœ… Automatic credential rotation +- โœ… Enhanced security with temporary tokens +- โœ… Better compliance and auditing + +### Step 2: Configure GitLab + +1. **Push this repository to your GitLab account:** + ```bash + git init + git add . + git commit -m "Initial commit" + git remote add origin https://gitlab.com/your-username/atx-container-test-runner.git + git push -u origin main + ``` + +2. **In GitLab, go to Settings โ†’ CI/CD โ†’ Variables** + +3. **Add the variables displayed by the setup script:** + - `AWS_REGION` + - `AWS_ACCOUNT_ID` + - `AWS_ROLE_ARN` + - `SOURCE_BUCKET` + - `RESULTS_BUCKET` + + **Note:** No access keys needed! OIDC provides secure, temporary credentials automatically. + +### Step 3: Deploy + +```bash +# Push to trigger the pipeline +git commit --allow-empty -m "Trigger deployment" +git push origin main +``` + +The GitLab pipeline will automatically: +1. โœ… Validate CloudFormation templates +2. ๐Ÿณ Build Docker image +3. ๐Ÿงช Run smoke tests +4. ๐Ÿ“ฆ Push to Amazon ECR +5. ๐Ÿš€ Deploy to AWS ECS +6. โœ”๏ธ Verify deployment + +## ๐Ÿ“– Documentation + +- **[README.md](README.md)** - Project overview and developer guide +- **[GITLAB-DEPLOYMENT.md](GITLAB-DEPLOYMENT.md)** - Complete GitLab CI/CD setup guide +- **[QUICKSTART-ECS.md](QUICKSTART-ECS.md)** - Quick start for ECS deployment +- **[OIDC-SETUP-GUIDE.md](OIDC-SETUP-GUIDE.md)** - OIDC authentication setup +- **[QUICK-REFERENCE.md](QUICK-REFERENCE.md)** - Quick reference and commands +- **[docs/deployment-guide.md](docs/deployment-guide.md)** - Detailed deployment guide +- **[docs/troubleshooting.md](docs/troubleshooting.md)** - Troubleshooting help +- **[docs/build-and-test.md](docs/build-and-test.md)** - Build and test instructions +- **[docs/exit-codes-and-output-modes.md](docs/exit-codes-and-output-modes.md)** - Exit codes reference + +## ๐Ÿ”ง Deployment Options + +This package supports multiple deployment methods: + +### 1. GitLab CI/CD (Recommended) +- Automated deployment on every push +- Uses `.gitlab-ci.yml` pipeline +- See `GITLAB-DEPLOYMENT.md` + +### 2. CloudFormation +- Complete infrastructure as code +- Uses `deployment/cloudformation-complete-stack.yaml` +- See `docs/deployment-guide.md` + +### 3. Terraform +- Infrastructure as code with Terraform +- Uses `deployment/terraform/` +- See `deployment/terraform/terraform.tfvars.example` + +### 4. Manual Deployment +- Step-by-step manual deployment +- See `QUICKSTART-ECS.md` + +## ๐Ÿ—๏ธ Architecture + +The solution deploys: +- **ECS Fargate Cluster** - Serverless container orchestration +- **ECR Repository** - Docker image storage +- **VPC & Networking** - Isolated network environment +- **IAM Roles** - Secure access to AWS services +- **CloudWatch Logs** - Centralized logging +- **S3 Buckets** - Source code and results storage + +## ๐Ÿ’ฐ Cost Estimate + +Typical monthly costs (us-east-1): +- ECS Fargate: ~$30-50/month (with Spot instances) +- ECR Storage: ~$1-5/month +- S3 Storage: ~$1-10/month +- CloudWatch Logs: ~$1-5/month +- **Total: ~$35-70/month** + +Costs vary based on: +- Number of transformations +- Code repository sizes +- Log retention +- Task execution time + +## ๐Ÿ”’ Security + +This package follows AWS security best practices: +- โœ… **OIDC authentication** - No long-lived access keys +- โœ… **Temporary credentials** - Auto-rotating tokens (1 hour) +- โœ… **IAM roles with least privilege** - Minimal required permissions +- โœ… **ECR image scanning** - Vulnerability detection +- โœ… **Encryption at rest** - S3 and ECR encrypted +- โœ… **VPC isolation** - Network security +- โœ… **CloudWatch Logs** - Complete audit trail +- โœ… **Branch restrictions** - OIDC limited to main branch + +## ๐Ÿ†˜ Support + +### Common Issues + +1. **Pipeline fails at build stage** + - Ensure Docker executor is available in GitLab Runner + - Check Dockerfile syntax + +2. **Pipeline fails at push stage** + - Verify AWS credentials in GitLab variables + - Check IAM permissions for ECR + +3. **Pipeline fails at deploy stage** + - Verify S3 buckets exist + - Check CloudFormation events in AWS Console + +4. **Task fails to run** + - Check CloudWatch Logs: `/ecs/production-atx-test-runner` + - Verify IAM task role has S3 permissions + +See **[docs/troubleshooting.md](docs/troubleshooting.md)** for detailed solutions. + +## ๐Ÿ“ž Getting Help + +1. Check the documentation in `docs/` +2. Review `GITLAB-DEPLOYMENT.md` for GitLab-specific issues +3. Check AWS CloudWatch Logs for runtime errors +4. Review GitLab pipeline logs for CI/CD issues + +## ๐Ÿ”„ Updates + +To update to a new version: + +```bash +# Pull latest changes +git pull origin main + +# Push to trigger redeployment +git push gitlab main +``` + +The pipeline will automatically rebuild and redeploy. + +## ๐Ÿงน Cleanup + +To remove all AWS resources: + +1. In GitLab, go to **CI/CD โ†’ Pipelines** +2. Find the latest pipeline +3. Click the **destroy:stack** manual job +4. Click **Play** to execute + +Or manually: +```bash +aws cloudformation delete-stack --stack-name atx-test-runner --region us-east-1 +``` + +## ๐Ÿ“ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## ๐ŸŽ‰ You're Ready! + +Run `./setup-gitlab-ci.sh` to get started! diff --git a/src/agentic_platform/agent/code_transform/VERSION b/src/agentic_platform/agent/code_transform/VERSION new file mode 100644 index 0000000..6e8bf73 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/src/agentic_platform/agent/code_transform/deployment/README.md b/src/agentic_platform/agent/code_transform/deployment/README.md new file mode 100644 index 0000000..e71d052 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/README.md @@ -0,0 +1,322 @@ +# ATX Test Runner Deployment Templates + +This directory contains deployment templates for running the ATX Test Runner on AWS infrastructure. + +## Available Templates + +### 1. ECS Task Definition (JSON) +- **File**: `ecs-task-definition.json` +- **Purpose**: Standalone ECS task definition for Fargate +- **Use Case**: Quick deployment to existing ECS cluster + +### 2. Kubernetes Deployment (YAML) +- **File**: `kubernetes-deployment.yaml` +- **Purpose**: Complete Kubernetes deployment including Job and CronJob +- **Use Case**: EKS or self-managed Kubernetes clusters + +### 3. CloudFormation Complete Stack (YAML) +- **File**: `cloudformation-complete-stack.yaml` +- **Purpose**: Complete infrastructure including VPC, ECS cluster, IAM roles +- **Use Case**: New AWS environment setup + +### 4. Terraform Configuration +- **Directory**: `terraform/` +- **Files**: `main.tf`, `variables.tf`, `outputs.tf` +- **Purpose**: Infrastructure as Code with Terraform +- **Use Case**: Terraform-managed infrastructure + +## Quick Start + +### Option 1: ECS Task Definition + +1. Update the task definition with your values: +```bash +sed -i 's/ACCOUNT_ID/123456789012/g' ecs-task-definition.json +sed -i 's/REGION/us-east-1/g' ecs-task-definition.json +sed -i 's/VERSION/0.1.0/g' ecs-task-definition.json +``` + +2. Register the task definition: +```bash +aws ecs register-task-definition \ + --cli-input-json file://ecs-task-definition.json +``` + +3. Run the task: +```bash +aws ecs run-task \ + --cluster my-cluster \ + --task-definition atx-test-runner \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" +``` + +### Option 2: Kubernetes + +1. Update the deployment YAML: +```bash +sed -i 's/ACCOUNT_ID/123456789012/g' kubernetes-deployment.yaml +sed -i 's/REGION/us-east-1/g' kubernetes-deployment.yaml +sed -i 's/VERSION/0.1.0/g' kubernetes-deployment.yaml +``` + +2. Apply the configuration: +```bash +kubectl apply -f kubernetes-deployment.yaml +``` + +3. Check job status: +```bash +kubectl get jobs -n atx-test-runner +kubectl logs -n atx-test-runner job/atx-test-runner-job +``` + +### Option 3: CloudFormation + +1. Create the stack: +```bash +aws cloudformation create-stack \ + --stack-name atx-test-runner \ + --template-body file://cloudformation-complete-stack.yaml \ + --parameters \ + ParameterKey=SourceBucketName,ParameterValue=my-source-bucket \ + ParameterKey=ResultsBucketName,ParameterValue=my-results-bucket \ + --capabilities CAPABILITY_NAMED_IAM +``` + +2. Wait for stack creation: +```bash +aws cloudformation wait stack-create-complete \ + --stack-name atx-test-runner +``` + +3. Get outputs: +```bash +aws cloudformation describe-stacks \ + --stack-name atx-test-runner \ + --query 'Stacks[0].Outputs' +``` + +### Option 4: Terraform + +1. Navigate to terraform directory: +```bash +cd terraform +``` + +2. Copy and edit variables: +```bash +cp terraform.tfvars.example terraform.tfvars +# Edit terraform.tfvars with your values +``` + +3. Initialize Terraform: +```bash +terraform init +``` + +4. Plan the deployment: +```bash +terraform plan +``` + +5. Apply the configuration: +```bash +terraform apply +``` + +6. Get outputs: +```bash +terraform output +``` + +## Configuration Parameters + +### Common Parameters + +| Parameter | Description | Default | Required | +|-----------|-------------|---------|----------| +| AWS Account ID | Your AWS account ID | - | Yes | +| AWS Region | AWS region for deployment | us-east-1 | Yes | +| Image Version | Docker image tag | latest | No | +| Source Bucket | S3 bucket with source code | - | Yes | +| Results Bucket | S3 bucket for results | - | Yes | +| Task CPU | CPU units (1024 = 1 vCPU) | 2048 | No | +| Task Memory | Memory in MB | 4096 | No | +| Max Parallel Jobs | Concurrent transformations | 4 | No | + +### ECS-Specific Parameters + +- **Network Mode**: awsvpc (required for Fargate) +- **Launch Type**: FARGATE or FARGATE_SPOT +- **Requires Compatibilities**: FARGATE + +### Kubernetes-Specific Parameters + +- **Namespace**: atx-test-runner +- **Service Account**: atx-test-runner (with IAM role annotation) +- **Restart Policy**: OnFailure +- **TTL After Finished**: 86400 seconds (24 hours) + +## IAM Permissions Required + +### Task Role Permissions +The ECS task needs the following S3 permissions: + +**Source Bucket (Read)**: +- `s3:GetObject` +- `s3:ListBucket` + +**Results Bucket (Write)**: +- `s3:PutObject` +- `s3:PutObjectAcl` +- `s3:ListBucket` + +### Execution Role Permissions +The ECS execution role needs: +- `AmazonECSTaskExecutionRolePolicy` (managed policy) +- ECR pull permissions (included in managed policy) +- CloudWatch Logs write permissions (included in managed policy) + +## Networking Requirements + +### ECS/Fargate +- VPC with public or private subnets +- Internet access (via Internet Gateway or NAT Gateway) +- Security group allowing outbound HTTPS (443) for S3 and ECR + +### Kubernetes/EKS +- EKS cluster with worker nodes or Fargate profile +- Service account with IAM role (IRSA) +- Network policies allowing S3 access + +## Cost Optimization + +### Use Fargate Spot +CloudFormation and Terraform templates include Fargate Spot capacity providers with 80% weight: +- 80% of tasks run on Spot (cheaper) +- 20% of tasks run on regular Fargate (reliability) + +### Right-Size Resources +Adjust CPU and memory based on your workload: +- Small repos: 1024 CPU / 2048 MB +- Medium repos: 2048 CPU / 4096 MB +- Large repos: 4096 CPU / 8192 MB + +### Use Lifecycle Policies +ECR lifecycle policies automatically clean up old images: +- Keep last 10 images +- Reduces storage costs + +## Monitoring and Logging + +### CloudWatch Logs +All templates configure CloudWatch Logs: +- Log Group: `/ecs/atx-test-runner` or `/ecs/{environment}-atx-test-runner` +- Retention: 30 days +- Stream Prefix: `atx` + +### Container Insights +ECS clusters have Container Insights enabled for: +- CPU and memory utilization +- Network metrics +- Task-level metrics + +### Viewing Logs + +**ECS**: +```bash +aws logs tail /ecs/atx-test-runner --follow +``` + +**Kubernetes**: +```bash +kubectl logs -n atx-test-runner -f job/atx-test-runner-job +``` + +## Troubleshooting + +### Task Fails to Start + +1. Check IAM permissions: +```bash +aws iam get-role --role-name atx-task-role +aws iam get-role-policy --role-name atx-task-role --policy-name s3-access +``` + +2. Verify ECR image exists: +```bash +aws ecr describe-images --repository-name atx-test-runner +``` + +3. Check CloudWatch logs: +```bash +aws logs tail /ecs/atx-test-runner --follow +``` + +### S3 Access Denied + +1. Verify bucket permissions: +```bash +aws s3 ls s3://my-source-bucket/ +aws s3 ls s3://my-results-bucket/ +``` + +2. Check task role policy: +```bash +aws iam get-role-policy --role-name atx-task-role --policy-name s3-access +``` + +### Network Issues + +1. Verify security group allows outbound HTTPS: +```bash +aws ec2 describe-security-groups --group-ids sg-xxx +``` + +2. Check subnet has internet access: +```bash +aws ec2 describe-route-tables --filters "Name=association.subnet-id,Values=subnet-xxx" +``` + +## Cleanup + +### CloudFormation +```bash +aws cloudformation delete-stack --stack-name atx-test-runner +``` + +### Terraform +```bash +cd terraform +terraform destroy +``` + +### Kubernetes +```bash +kubectl delete namespace atx-test-runner +``` + +### Manual ECS +```bash +# Deregister task definition +aws ecs deregister-task-definition --task-definition atx-test-runner:1 + +# Delete cluster (if empty) +aws ecs delete-cluster --cluster my-cluster +``` + +## Next Steps + +1. Review the [Deployment Guide](../docs/deployment.md) for detailed instructions +2. Check [Troubleshooting Guide](../docs/troubleshooting.md) for common issues +3. See [Examples](../examples/) for sample configurations +4. Read [Build and Test Guide](../docs/build-and-test.md) for local testing + +## Support + +For issues or questions: +1. Check the troubleshooting guide +2. Review CloudWatch logs +3. Verify IAM permissions +4. Check network connectivity diff --git a/src/agentic_platform/agent/code_transform/deployment/cloudformation-complete-stack.yaml b/src/agentic_platform/agent/code_transform/deployment/cloudformation-complete-stack.yaml new file mode 100644 index 0000000..483edd4 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/cloudformation-complete-stack.yaml @@ -0,0 +1,384 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Complete ATX Test Runner infrastructure including VPC, ECS Cluster, Task Definition, and IAM roles' + +Parameters: + EnvironmentName: + Type: String + Default: production + Description: Environment name for resource tagging + AllowedValues: + - development + - staging + - production + + ImageVersion: + Type: String + Default: latest + Description: Docker image version tag + + SourceBucketName: + Type: String + Description: S3 bucket containing source code folders + + ResultsBucketName: + Type: String + Description: S3 bucket for storing transformation results + + VpcCIDR: + Type: String + Default: 10.0.0.0/16 + Description: CIDR block for VPC + + PublicSubnet1CIDR: + Type: String + Default: 10.0.1.0/24 + Description: CIDR block for public subnet 1 + + PublicSubnet2CIDR: + Type: String + Default: 10.0.2.0/24 + Description: CIDR block for public subnet 2 + + MaxParallelJobs: + Type: Number + Default: 4 + MinValue: 1 + MaxValue: 10 + Description: Maximum number of parallel transformation jobs + + TaskCpu: + Type: String + Default: '2048' + AllowedValues: ['256', '512', '1024', '2048', '4096'] + Description: CPU units for ECS task (1024 = 1 vCPU) + + TaskMemory: + Type: String + Default: '4096' + AllowedValues: ['512', '1024', '2048', '4096', '8192', '16384'] + Description: Memory for ECS task in MB + +Resources: + # VPC and Networking + VPC: + Type: AWS::EC2::VPC + Properties: + CidrBlock: !Ref VpcCIDR + EnableDnsHostnames: true + EnableDnsSupport: true + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-vpc + + InternetGateway: + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-igw + + InternetGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + Properties: + InternetGatewayId: !Ref InternetGateway + VpcId: !Ref VPC + + PublicSubnet1: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [0, !GetAZs ''] + CidrBlock: !Ref PublicSubnet1CIDR + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-public-subnet-1 + + PublicSubnet2: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [1, !GetAZs ''] + CidrBlock: !Ref PublicSubnet2CIDR + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-public-subnet-2 + + PublicRouteTable: + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-public-routes + + DefaultPublicRoute: + Type: AWS::EC2::Route + DependsOn: InternetGatewayAttachment + Properties: + RouteTableId: !Ref PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway + + PublicSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PublicRouteTable + SubnetId: !Ref PublicSubnet1 + + PublicSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PublicRouteTable + SubnetId: !Ref PublicSubnet2 + + # Security Group + ECSSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupName: !Sub ${EnvironmentName}-atx-ecs-sg + GroupDescription: Security group for ATX Test Runner ECS tasks + VpcId: !Ref VPC + SecurityGroupEgress: + - IpProtocol: -1 + CidrIp: 0.0.0.0/0 + Description: Allow all outbound traffic + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-ecs-sg + + # IAM Roles + ECSTaskExecutionRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub ${EnvironmentName}-atx-execution-role + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: ecs-tasks.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-execution-role + + ECSTaskRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub ${EnvironmentName}-atx-task-role + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: ecs-tasks.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: S3Access + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - s3:GetObject + - s3:ListBucket + Resource: + - !Sub arn:aws:s3:::${SourceBucketName} + - !Sub arn:aws:s3:::${SourceBucketName}/* + - Effect: Allow + Action: + - s3:PutObject + - s3:PutObjectAcl + - s3:ListBucket + Resource: + - !Sub arn:aws:s3:::${ResultsBucketName} + - !Sub arn:aws:s3:::${ResultsBucketName}/* + - PolicyName: ATXTransformAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - transform-custom:* + - bedrock:InvokeModel + - bedrock:InvokeModelWithResponseStream + - bedrock:ListFoundationModels + - bedrock:GetFoundationModel + Resource: "*" + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/atx/*" + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-task-role + + # CloudWatch Log Group + LogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub /ecs/${EnvironmentName}-atx-test-runner + RetentionInDays: 30 + + # ECS Cluster + ECSCluster: + Type: AWS::ECS::Cluster + Properties: + ClusterName: !Sub ${EnvironmentName}-atx-cluster + CapacityProviders: + - FARGATE + - FARGATE_SPOT + DefaultCapacityProviderStrategy: + - CapacityProvider: FARGATE + Weight: 1 + - CapacityProvider: FARGATE_SPOT + Weight: 4 + ClusterSettings: + - Name: containerInsights + Value: enabled + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-cluster + + # ECS Task Definition + TaskDefinition: + Type: AWS::ECS::TaskDefinition + Properties: + Family: !Sub ${EnvironmentName}-atx-test-runner + NetworkMode: awsvpc + RequiresCompatibilities: + - FARGATE + Cpu: !Ref TaskCpu + Memory: !Ref TaskMemory + ExecutionRoleArn: !GetAtt ECSTaskExecutionRole.Arn + TaskRoleArn: !GetAtt ECSTaskRole.Arn + ContainerDefinitions: + - Name: atx-test-runner + Image: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/atx-test-runner:${ImageVersion} + Essential: true + Command: + - --csv-file + - s3://atx-test-source-438227048955/sample-repos.csv + - --mode + - serial + - --verbose + Environment: + - Name: AWS_DEFAULT_REGION + Value: !Ref AWS::Region + - Name: ATX_LOG_LEVEL + Value: INFO + - Name: SOURCE_BUCKET + Value: !Ref SourceBucketName + - Name: RESULTS_BUCKET + Value: !Ref ResultsBucketName + LogConfiguration: + LogDriver: awslogs + Options: + awslogs-group: !Ref LogGroup + awslogs-region: !Ref AWS::Region + awslogs-stream-prefix: atx + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-atx-task-definition + + # ECR Repository + ECRRepository: + Type: AWS::ECR::Repository + Properties: + RepositoryName: atx-test-runner + ImageScanningConfiguration: + ScanOnPush: true + EncryptionConfiguration: + EncryptionType: AES256 + LifecyclePolicy: + LifecyclePolicyText: | + { + "rules": [ + { + "rulePriority": 1, + "description": "Keep last 10 images", + "selection": { + "tagStatus": "any", + "countType": "imageCountMoreThan", + "countNumber": 10 + }, + "action": { + "type": "expire" + } + } + ] + } + Tags: + - Key: Name + Value: atx-test-runner-ecr + +Outputs: + ClusterName: + Description: ECS Cluster Name + Value: !Ref ECSCluster + Export: + Name: !Sub ${EnvironmentName}-atx-cluster-name + + TaskDefinitionArn: + Description: Task Definition ARN + Value: !Ref TaskDefinition + Export: + Name: !Sub ${EnvironmentName}-atx-task-definition-arn + + TaskRoleArn: + Description: Task Role ARN + Value: !GetAtt ECSTaskRole.Arn + Export: + Name: !Sub ${EnvironmentName}-atx-task-role-arn + + ExecutionRoleArn: + Description: Execution Role ARN + Value: !GetAtt ECSTaskExecutionRole.Arn + Export: + Name: !Sub ${EnvironmentName}-atx-execution-role-arn + + SecurityGroupId: + Description: Security Group ID + Value: !Ref ECSSecurityGroup + Export: + Name: !Sub ${EnvironmentName}-atx-security-group-id + + Subnet1Id: + Description: Public Subnet 1 ID + Value: !Ref PublicSubnet1 + Export: + Name: !Sub ${EnvironmentName}-atx-subnet-1-id + + Subnet2Id: + Description: Public Subnet 2 ID + Value: !Ref PublicSubnet2 + Export: + Name: !Sub ${EnvironmentName}-atx-subnet-2-id + + ECRRepositoryUri: + Description: ECR Repository URI + Value: !GetAtt ECRRepository.RepositoryUri + Export: + Name: atx-test-runner-ecr-uri + + LogGroupName: + Description: CloudWatch Log Group Name + Value: !Ref LogGroup + Export: + Name: !Sub ${EnvironmentName}-atx-log-group-name + + RunTaskCommand: + Description: AWS CLI command to run the task + Value: !Sub | + aws ecs run-task \ + --cluster ${ECSCluster} \ + --task-definition ${TaskDefinition} \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[${PublicSubnet1},${PublicSubnet2}],securityGroups=[${ECSSecurityGroup}],assignPublicIp=ENABLED}" \ + --region ${AWS::Region} diff --git a/src/agentic_platform/agent/code_transform/deployment/ecs-task-definition.json b/src/agentic_platform/agent/code_transform/deployment/ecs-task-definition.json new file mode 100644 index 0000000..bb41eae --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/ecs-task-definition.json @@ -0,0 +1,67 @@ +{ + "family": "atx-test-runner", + "taskRoleArn": "arn:aws:iam::ACCOUNT_ID:role/atx-test-runner-task-role", + "executionRoleArn": "arn:aws:iam::ACCOUNT_ID:role/atx-test-runner-execution-role", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "2048", + "memory": "4096", + "containerDefinitions": [ + { + "name": "atx-test-runner", + "image": "ACCOUNT_ID.dkr.ecr.REGION.amazonaws.com/atx-test-runner:VERSION", + "essential": true, + "command": [ + "/usr/local/bin/atx-orchestrator.sh", + "--csv-file", + "/config/repos.csv", + "--mode", + "parallel", + "--max-jobs", + "4" + ], + "environment": [ + { + "name": "AWS_DEFAULT_REGION", + "value": "us-east-1" + }, + { + "name": "ATX_LOG_LEVEL", + "value": "INFO" + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/atx-test-runner", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "atx" + } + }, + "mountPoints": [], + "volumesFrom": [], + "portMappings": [], + "healthCheck": { + "command": [ + "CMD-SHELL", + "test -f /tmp/health || exit 1" + ], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 60 + } + } + ], + "volumes": [], + "tags": [ + { + "key": "Application", + "value": "ATX-Test-Runner" + }, + { + "key": "Environment", + "value": "Production" + } + ] +} diff --git a/src/agentic_platform/agent/code_transform/deployment/kubernetes-deployment.yaml b/src/agentic_platform/agent/code_transform/deployment/kubernetes-deployment.yaml new file mode 100644 index 0000000..2d3c928 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/kubernetes-deployment.yaml @@ -0,0 +1,151 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: atx-test-runner + labels: + app: atx-test-runner + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: atx-test-runner + namespace: atx-test-runner + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/atx-test-runner-role + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: atx-test-runner-config + namespace: atx-test-runner +data: + repos.csv: | + s3_path,build_command,transformation_name,output_s3_path + s3://source-bucket/customer1/folder1/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer1/folder1/ + +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: atx-test-runner-job + namespace: atx-test-runner + labels: + app: atx-test-runner + version: "0.1.0" +spec: + backoffLimit: 3 + completions: 1 + parallelism: 1 + ttlSecondsAfterFinished: 86400 # Clean up after 24 hours + template: + metadata: + labels: + app: atx-test-runner + spec: + serviceAccountName: atx-test-runner + restartPolicy: OnFailure + containers: + - name: atx-test-runner + image: ACCOUNT_ID.dkr.ecr.REGION.amazonaws.com/atx-test-runner:VERSION + imagePullPolicy: Always + command: + - /usr/local/bin/atx-orchestrator.sh + args: + - --csv-file + - /config/repos.csv + - --mode + - parallel + - --max-jobs + - "4" + env: + - name: AWS_DEFAULT_REGION + value: "us-east-1" + - name: ATX_LOG_LEVEL + value: "INFO" + - name: AWS_REGION + value: "us-east-1" + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + volumeMounts: + - name: config + mountPath: /config + readOnly: true + - name: tmp + mountPath: /tmp + volumes: + - name: config + configMap: + name: atx-test-runner-config + - name: tmp + emptyDir: + sizeLimit: 10Gi + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: atx-test-runner-scheduled + namespace: atx-test-runner + labels: + app: atx-test-runner +spec: + schedule: "0 2 * * *" # Run daily at 2 AM + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 2 + template: + metadata: + labels: + app: atx-test-runner + spec: + serviceAccountName: atx-test-runner + restartPolicy: OnFailure + containers: + - name: atx-test-runner + image: ACCOUNT_ID.dkr.ecr.REGION.amazonaws.com/atx-test-runner:VERSION + imagePullPolicy: Always + command: + - /usr/local/bin/atx-orchestrator.sh + args: + - --csv-file + - /config/repos.csv + - --mode + - parallel + - --max-jobs + - "4" + env: + - name: AWS_DEFAULT_REGION + value: "us-east-1" + - name: ATX_LOG_LEVEL + value: "INFO" + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + volumeMounts: + - name: config + mountPath: /config + readOnly: true + - name: tmp + mountPath: /tmp + volumes: + - name: config + configMap: + name: atx-test-runner-config + - name: tmp + emptyDir: + sizeLimit: 10Gi diff --git a/src/agentic_platform/agent/code_transform/deployment/terraform/main.tf b/src/agentic_platform/agent/code_transform/deployment/terraform/main.tf new file mode 100644 index 0000000..f66647e --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/terraform/main.tf @@ -0,0 +1,330 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +# Data sources +data "aws_caller_identity" "current" {} +data "aws_availability_zones" "available" { + state = "available" +} + +# ECR Repository +resource "aws_ecr_repository" "atx_test_runner" { + name = "atx-test-runner" + image_tag_mutability = "MUTABLE" + + image_scanning_configuration { + scan_on_push = true + } + + encryption_configuration { + encryption_type = "AES256" + } + + tags = { + Name = "atx-test-runner" + Environment = var.environment + Application = "ATX-Test-Runner" + } +} + +# ECR Lifecycle Policy +resource "aws_ecr_lifecycle_policy" "atx_test_runner" { + repository = aws_ecr_repository.atx_test_runner.name + + policy = jsonencode({ + rules = [ + { + rulePriority = 1 + description = "Keep last 10 images" + selection = { + tagStatus = "any" + countType = "imageCountMoreThan" + countNumber = 10 + } + action = { + type = "expire" + } + } + ] + }) +} + +# VPC +resource "aws_vpc" "main" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "${var.environment}-atx-vpc" + Environment = var.environment + } +} + +# Internet Gateway +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + + tags = { + Name = "${var.environment}-atx-igw" + Environment = var.environment + } +} + +# Public Subnets +resource "aws_subnet" "public" { + count = 2 + vpc_id = aws_vpc.main.id + cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index) + availability_zone = data.aws_availability_zones.available.names[count.index] + map_public_ip_on_launch = true + + tags = { + Name = "${var.environment}-atx-public-subnet-${count.index + 1}" + Environment = var.environment + } +} + +# Route Table +resource "aws_route_table" "public" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = { + Name = "${var.environment}-atx-public-routes" + Environment = var.environment + } +} + +# Route Table Associations +resource "aws_route_table_association" "public" { + count = 2 + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.public.id +} + +# Security Group +resource "aws_security_group" "ecs_tasks" { + name = "${var.environment}-atx-ecs-sg" + description = "Security group for ATX Test Runner ECS tasks" + vpc_id = aws_vpc.main.id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow all outbound traffic" + } + + tags = { + Name = "${var.environment}-atx-ecs-sg" + Environment = var.environment + } +} + +# CloudWatch Log Group +resource "aws_cloudwatch_log_group" "atx_test_runner" { + name = "/ecs/${var.environment}-atx-test-runner" + retention_in_days = 30 + + tags = { + Name = "${var.environment}-atx-log-group" + Environment = var.environment + } +} + +# IAM Role for ECS Task Execution +resource "aws_iam_role" "ecs_task_execution" { + name = "${var.environment}-atx-execution-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + Service = "ecs-tasks.amazonaws.com" + } + Action = "sts:AssumeRole" + } + ] + }) + + tags = { + Name = "${var.environment}-atx-execution-role" + Environment = var.environment + } +} + +resource "aws_iam_role_policy_attachment" "ecs_task_execution" { + role = aws_iam_role.ecs_task_execution.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} + +# IAM Role for ECS Task +resource "aws_iam_role" "ecs_task" { + name = "${var.environment}-atx-task-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + Service = "ecs-tasks.amazonaws.com" + } + Action = "sts:AssumeRole" + } + ] + }) + + tags = { + Name = "${var.environment}-atx-task-role" + Environment = var.environment + } +} + +# IAM Policy for S3 Access +resource "aws_iam_role_policy" "s3_access" { + name = "s3-access" + role = aws_iam_role.ecs_task.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "s3:GetObject", + "s3:ListBucket" + ] + Resource = [ + "arn:aws:s3:::${var.source_bucket_name}", + "arn:aws:s3:::${var.source_bucket_name}/*" + ] + }, + { + Effect = "Allow" + Action = [ + "s3:PutObject", + "s3:PutObjectAcl", + "s3:ListBucket" + ] + Resource = [ + "arn:aws:s3:::${var.results_bucket_name}", + "arn:aws:s3:::${var.results_bucket_name}/*" + ] + } + ] + }) +} + +# ECS Cluster +resource "aws_ecs_cluster" "main" { + name = "${var.environment}-atx-cluster" + + setting { + name = "containerInsights" + value = "enabled" + } + + tags = { + Name = "${var.environment}-atx-cluster" + Environment = var.environment + } +} + +# ECS Cluster Capacity Providers +resource "aws_ecs_cluster_capacity_providers" "main" { + cluster_name = aws_ecs_cluster.main.name + + capacity_providers = ["FARGATE", "FARGATE_SPOT"] + + default_capacity_provider_strategy { + capacity_provider = "FARGATE" + weight = 1 + base = 0 + } + + default_capacity_provider_strategy { + capacity_provider = "FARGATE_SPOT" + weight = 4 + base = 0 + } +} + +# ECS Task Definition +resource "aws_ecs_task_definition" "atx_test_runner" { + family = "${var.environment}-atx-test-runner" + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + cpu = var.task_cpu + memory = var.task_memory + execution_role_arn = aws_iam_role.ecs_task_execution.arn + task_role_arn = aws_iam_role.ecs_task.arn + + container_definitions = jsonencode([ + { + name = "atx-test-runner" + image = "${aws_ecr_repository.atx_test_runner.repository_url}:${var.image_version}" + essential = true + command = [ + "/usr/local/bin/atx-orchestrator.sh", + "--csv-file", + "/config/repos.csv", + "--mode", + "parallel", + "--max-jobs", + tostring(var.max_parallel_jobs) + ] + environment = [ + { + name = "AWS_DEFAULT_REGION" + value = var.aws_region + }, + { + name = "ATX_LOG_LEVEL" + value = "INFO" + }, + { + name = "SOURCE_BUCKET" + value = var.source_bucket_name + }, + { + name = "RESULTS_BUCKET" + value = var.results_bucket_name + } + ] + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.atx_test_runner.name + "awslogs-region" = var.aws_region + "awslogs-stream-prefix" = "atx" + } + } + } + ]) + + tags = { + Name = "${var.environment}-atx-task-definition" + Environment = var.environment + } +} diff --git a/src/agentic_platform/agent/code_transform/deployment/terraform/outputs.tf b/src/agentic_platform/agent/code_transform/deployment/terraform/outputs.tf new file mode 100644 index 0000000..8fff4f5 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/terraform/outputs.tf @@ -0,0 +1,61 @@ +output "cluster_name" { + description = "ECS Cluster Name" + value = aws_ecs_cluster.main.name +} + +output "cluster_arn" { + description = "ECS Cluster ARN" + value = aws_ecs_cluster.main.arn +} + +output "task_definition_arn" { + description = "Task Definition ARN" + value = aws_ecs_task_definition.atx_test_runner.arn +} + +output "task_definition_family" { + description = "Task Definition Family" + value = aws_ecs_task_definition.atx_test_runner.family +} + +output "task_role_arn" { + description = "Task Role ARN" + value = aws_iam_role.ecs_task.arn +} + +output "execution_role_arn" { + description = "Execution Role ARN" + value = aws_iam_role.ecs_task_execution.arn +} + +output "security_group_id" { + description = "Security Group ID" + value = aws_security_group.ecs_tasks.id +} + +output "subnet_ids" { + description = "Public Subnet IDs" + value = aws_subnet.public[*].id +} + +output "ecr_repository_url" { + description = "ECR Repository URL" + value = aws_ecr_repository.atx_test_runner.repository_url +} + +output "log_group_name" { + description = "CloudWatch Log Group Name" + value = aws_cloudwatch_log_group.atx_test_runner.name +} + +output "run_task_command" { + description = "AWS CLI command to run the task" + value = <<-EOT + aws ecs run-task \ + --cluster ${aws_ecs_cluster.main.name} \ + --task-definition ${aws_ecs_task_definition.atx_test_runner.family} \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[${join(",", aws_subnet.public[*].id)}],securityGroups=[${aws_security_group.ecs_tasks.id}],assignPublicIp=ENABLED}" \ + --region ${var.aws_region} + EOT +} diff --git a/src/agentic_platform/agent/code_transform/deployment/terraform/terraform.tfvars.example b/src/agentic_platform/agent/code_transform/deployment/terraform/terraform.tfvars.example new file mode 100644 index 0000000..d9e44ec --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/terraform/terraform.tfvars.example @@ -0,0 +1,12 @@ +# Example Terraform variables file +# Copy this file to terraform.tfvars and update with your values + +aws_region = "us-east-1" +environment = "production" +image_version = "0.1.0" +source_bucket_name = "my-source-bucket" +results_bucket_name = "my-results-bucket" +vpc_cidr = "10.0.0.0/16" +max_parallel_jobs = 4 +task_cpu = "2048" +task_memory = "4096" diff --git a/src/agentic_platform/agent/code_transform/deployment/terraform/variables.tf b/src/agentic_platform/agent/code_transform/deployment/terraform/variables.tf new file mode 100644 index 0000000..c84655d --- /dev/null +++ b/src/agentic_platform/agent/code_transform/deployment/terraform/variables.tf @@ -0,0 +1,71 @@ +variable "aws_region" { + description = "AWS region for resources" + type = string + default = "us-east-1" +} + +variable "environment" { + description = "Environment name (development, staging, production)" + type = string + default = "production" + + validation { + condition = contains(["development", "staging", "production"], var.environment) + error_message = "Environment must be development, staging, or production." + } +} + +variable "image_version" { + description = "Docker image version tag" + type = string + default = "latest" +} + +variable "source_bucket_name" { + description = "S3 bucket containing source code folders" + type = string +} + +variable "results_bucket_name" { + description = "S3 bucket for storing transformation results" + type = string +} + +variable "vpc_cidr" { + description = "CIDR block for VPC" + type = string + default = "10.0.0.0/16" +} + +variable "max_parallel_jobs" { + description = "Maximum number of parallel transformation jobs" + type = number + default = 4 + + validation { + condition = var.max_parallel_jobs >= 1 && var.max_parallel_jobs <= 10 + error_message = "Max parallel jobs must be between 1 and 10." + } +} + +variable "task_cpu" { + description = "CPU units for ECS task (1024 = 1 vCPU)" + type = string + default = "2048" + + validation { + condition = contains(["256", "512", "1024", "2048", "4096"], var.task_cpu) + error_message = "Task CPU must be one of: 256, 512, 1024, 2048, 4096." + } +} + +variable "task_memory" { + description = "Memory for ECS task in MB" + type = string + default = "4096" + + validation { + condition = contains(["512", "1024", "2048", "4096", "8192", "16384"], var.task_memory) + error_message = "Task memory must be one of: 512, 1024, 2048, 4096, 8192, 16384." + } +} diff --git a/src/agentic_platform/agent/code_transform/docs/build-and-test.md b/src/agentic_platform/agent/code_transform/docs/build-and-test.md new file mode 100644 index 0000000..d35b327 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/docs/build-and-test.md @@ -0,0 +1,671 @@ +# Build and Test Documentation + +## Overview + +This document describes how to build and test the ATX Container Test Runner Docker image. The build and test process validates that the container is properly configured with all required components and can execute ATX transformations successfully. + +## Quick Start + +```bash +# Ensure Docker is running +docker info + +# Build and test with defaults +./scripts/build-and-test.sh + +# Build without cache (clean build) +./scripts/build-and-test.sh --no-cache + +# Build with verbose output +./scripts/build-and-test.sh --verbose +``` + +## Prerequisites + +### Required + +- **Docker**: Version 20.10 or later + - Install: https://docs.docker.com/get-docker/ + - Verify: `docker --version` + - Ensure Docker daemon is running: `docker info` + +### Optional (for S3 integration test) + +- **AWS CLI**: Configured with credentials + - Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html + - Configure: `aws configure` + - Verify: `aws sts get-caller-identity` + +- **S3 Bucket**: With sample Progress code + - Create bucket: `aws s3 mb s3://my-test-bucket` + - Upload sample code: `aws s3 sync ./examples/sample-code s3://my-test-bucket/test/` + +## Build and Test Script + +### Usage + +```bash +./scripts/build-and-test.sh [OPTIONS] +``` + +### Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--image-name ` | Docker image name | `atx-test-runner` | +| `--image-tag ` | Docker image tag | `latest` | +| `--output-dir ` | Output directory for test results | `./build_test_results` | +| `--max-size-mb ` | Maximum acceptable image size in MB | `2000` (2GB) | +| `--no-cache` | Build without using Docker cache | `false` | +| `--skip-build` | Skip Docker build step | `false` | +| `--skip-smoke-test` | Skip smoke test | `false` | +| `--skip-s3-test` | Skip S3 integration test | `false` | +| `--verbose` | Enable verbose output | `false` | +| `--help` | Show help message | - | + +### Examples + +```bash +# Standard build and test +./scripts/build-and-test.sh + +# Clean build (no cache) +./scripts/build-and-test.sh --no-cache + +# Build with custom image name and tag +./scripts/build-and-test.sh --image-name my-atx-runner --image-tag v1.0.0 + +# Test existing image without rebuilding +./scripts/build-and-test.sh --skip-build --image-tag v1.0.0 + +# Build and skip S3 test (no AWS setup) +./scripts/build-and-test.sh --skip-s3-test + +# Verbose output for debugging +./scripts/build-and-test.sh --verbose + +# Custom output directory +./scripts/build-and-test.sh --output-dir ./my-test-results +``` + +## Build Process + +### Step 1: Docker Image Build + +The script builds the Docker image using the Dockerfile in the project root. + +**What it does:** +- Validates Dockerfile exists +- Executes `docker build` command +- Captures build logs +- Reports build duration + +**Build command:** +```bash +docker build -t atx-test-runner:latest -f ./Dockerfile . +``` + +**Expected output:** +``` +========================================== +STEP 1: Building Docker Image +========================================== +[INFO] Building image: atx-test-runner:latest +[INFO] Build context: /path/to/project +[INFO] Starting Docker build (this may take several minutes)... +[SUCCESS] Docker image built successfully +[INFO] Image: atx-test-runner:latest +[INFO] Build time: 180s +``` + +**Build time:** Typically 2-5 minutes depending on: +- Network speed (downloading packages) +- Docker cache availability +- System resources + +### Step 2: Image Size Verification + +The script verifies the built image size is within acceptable limits. + +**What it checks:** +- Image size in MB +- Compares against maximum allowed size (default: 2000 MB) +- Reports size as percentage of maximum + +**Expected output:** +``` +========================================== +STEP 2: Verifying Image Size +========================================== +[INFO] Image size: 1250 MB +[INFO] Maximum allowed: 2000 MB +[SUCCESS] Image size is acceptable (62% of maximum) +``` + +**Typical image sizes:** +- Base Ubuntu 22.04: ~80 MB +- With system dependencies: ~200 MB +- With ATX CLI: ~500 MB +- With AWS CLI: ~600 MB +- Final image: ~1200-1500 MB + +**If size exceeds limit:** +- Review Dockerfile for optimization opportunities +- Remove unnecessary packages +- Use multi-stage builds +- Clean up apt cache and temporary files + +### Step 3: Component Verification + +The script verifies all required components are installed and accessible. + +**Components checked:** +1. **ATX CLI** + - Command exists: `command -v atx` + - Version check: `atx --version` + - Expected location: `/opt/atx/atx` or in PATH + +2. **AWS CLI** + - Command exists: `command -v aws` + - Version check: `aws --version` + - Expected version: AWS CLI v2 + +3. **Scripts** + - `/usr/local/bin/atx-orchestrator.sh` + - `/usr/local/bin/s3-integration.sh` + - `/usr/local/bin/smoke-test.sh` + - `/usr/local/bin/csv-parser.sh` + - All must be present and executable + +**Expected output:** +``` +========================================== +STEP 3: Verifying Component Installation +========================================== +[INFO] Checking ATX CLI installation... +[SUCCESS] ATX CLI is installed +[INFO] Version: 1.0.1318.0 +[INFO] Checking AWS CLI installation... +[SUCCESS] AWS CLI is installed +[INFO] aws-cli/2.15.0 Python/3.11.6 +[INFO] Checking scripts are installed... +[SUCCESS] /usr/local/bin/atx-orchestrator.sh is present and executable +[SUCCESS] /usr/local/bin/s3-integration.sh is present and executable +[SUCCESS] /usr/local/bin/smoke-test.sh is present and executable +[SUCCESS] /usr/local/bin/csv-parser.sh is present and executable +[SUCCESS] All components are properly installed +``` + +### Step 4: Smoke Test + +The script runs the smoke test to verify ATX can execute transformations. + +**What it does:** +- Runs container with smoke test flag +- Creates minimal Progress code sample +- Executes ATX transformation +- Verifies transformation completes successfully + +**Smoke test command:** +```bash +docker run --rm \ + -v ./build_test_results/smoke_test:/workspace/results \ + atx-test-runner:latest \ + --smoke-test \ + --output-dir /workspace/results +``` + +**Expected output:** +``` +========================================== +STEP 4: Running Smoke Test +========================================== +[INFO] Executing smoke test in container... +[INFO] This verifies ATX can execute transformations +[SUCCESS] Smoke test passed +[INFO] Duration: 15s +[INFO] Results: ./build_test_results/smoke_test/ +[INFO] Log file: ./build_test_results/smoke_test/smoke_test.log +``` + +**Smoke test validates:** +- ATX CLI is accessible in container +- AWS CLI is accessible in container +- Test Progress code can be created +- ATX can execute a transformation +- Transformation completes without errors + +See [Smoke Test Documentation](smoke-test.md) for detailed information. + +### Step 5: S3 Integration Test (Optional) + +The script can optionally run an S3 integration test to verify end-to-end functionality. + +**Status:** Currently skipped by default (not yet implemented) + +**When implemented, it will:** +1. Check AWS credentials are configured +2. Create or use existing S3 test bucket +3. Upload sample Progress code to S3 +4. Create CSV with S3 paths +5. Run orchestrator with CSV +6. Verify results are uploaded to S3 +7. Clean up test resources + +**To skip:** Use `--skip-s3-test` flag (default behavior currently) + +## Output Files + +The build and test process generates several output files in the output directory (default: `./build_test_results/`). + +### Directory Structure + +``` +build_test_results/ +โ”œโ”€โ”€ build.log # Docker build output +โ”œโ”€โ”€ atx_version.txt # ATX CLI version info +โ”œโ”€โ”€ aws_version.txt # AWS CLI version info +โ”œโ”€โ”€ summary.txt # Summary report +โ””โ”€โ”€ smoke_test/ # Smoke test results + โ”œโ”€โ”€ smoke_test.log # Smoke test execution log + โ””โ”€โ”€ smoke_test_failure/ # Preserved on failure + โ”œโ”€โ”€ workspace/ + โ”‚ โ””โ”€โ”€ test.p # Test Progress code + โ””โ”€โ”€ README.txt # Debugging guide +``` + +### Summary Report + +The `summary.txt` file contains a comprehensive summary of the build and test process: + +``` +ATX Container Build and Test Summary +==================================== +Generated: 2025-12-08 14:30:00 + +IMAGE INFORMATION +----------------- +Image Name: atx-test-runner:latest +Image ID: sha256:abc123... +Image Size: 1250 MB +Created: 2025-12-08T14:25:00Z + +COMPONENTS VERIFIED +------------------- +โœ“ ATX CLI installed and accessible +โœ“ AWS CLI installed and accessible +โœ“ Orchestrator script present +โœ“ S3 integration script present +โœ“ Smoke test script present +โœ“ CSV parser script present + +TEST RESULTS +------------ +โœ“ Docker build: PASSED +โœ“ Image size check: PASSED +โœ“ Component verification: PASSED +โœ“ Smoke test: PASSED + +OUTPUT FILES +------------ +- Build log: ./build_test_results/build.log +- ATX version: ./build_test_results/atx_version.txt +- AWS version: ./build_test_results/aws_version.txt +- Smoke test results: ./build_test_results/smoke_test/ +- Summary: ./build_test_results/summary.txt + +NEXT STEPS +---------- +1. Push image to ECR: + docker tag atx-test-runner:latest .dkr.ecr..amazonaws.com/atx-test-runner:latest + docker push .dkr.ecr..amazonaws.com/atx-test-runner:latest + +2. Deploy to ECS/EKS: + See docs/deployment.md for deployment instructions + +3. Run with sample data: + docker run --rm -v $(pwd)/examples:/data atx-test-runner:latest --csv-file /data/single-customer.csv +``` + +## Exit Codes + +The build and test script uses specific exit codes to indicate different failure types: + +| Exit Code | Meaning | Description | +|-----------|---------|-------------| +| 0 | Success | All tests passed, image is ready | +| 1 | Build failed | Docker build failed | +| 2 | Size check failed | Image size exceeds maximum allowed | +| 3 | Component verification failed | Required component missing or not accessible | +| 4 | Smoke test failed | Smoke test execution failed | +| 5 | S3 test failed | S3 integration test failed | +| 10 | Docker not available | Docker not installed or daemon not running | +| 11 | Invalid arguments | Invalid command-line arguments | + +## Troubleshooting + +### Docker Daemon Not Running + +**Symptoms:** +``` +[ERROR] Docker daemon is not running +[ERROR] Please start Docker Desktop or the Docker daemon +``` + +**Solutions:** +- **macOS/Windows**: Start Docker Desktop application +- **Linux**: Start Docker daemon: `sudo systemctl start docker` +- Verify: `docker info` + +### Build Failed + +**Symptoms:** +``` +[ERROR] Docker build failed +[ERROR] See build log: ./build_test_results/build.log +``` + +**Common causes:** +1. **Network issues**: Cannot download packages + - Check internet connectivity + - Try again with `--no-cache` + +2. **Disk space**: Insufficient disk space + - Check: `df -h` + - Clean up: `docker system prune -a` + +3. **Dockerfile errors**: Syntax or command errors + - Review build log + - Check Dockerfile syntax + - Verify package names and URLs + +**Solutions:** +1. Review build log: `cat ./build_test_results/build.log` +2. Try clean build: `./scripts/build-and-test.sh --no-cache` +3. Check Docker disk space: `docker system df` +4. Clean up old images: `docker system prune -a` + +### Image Size Too Large + +**Symptoms:** +``` +[ERROR] Image size exceeds maximum allowed size +[ERROR] Actual: 2500 MB +[ERROR] Maximum: 2000 MB +``` + +**Solutions:** +1. **Increase limit** (if acceptable): + ```bash + ./scripts/build-and-test.sh --max-size-mb 3000 + ``` + +2. **Optimize Dockerfile**: + - Combine RUN commands to reduce layers + - Clean up apt cache: `rm -rf /var/lib/apt/lists/*` + - Remove unnecessary packages + - Use multi-stage builds + +3. **Review layers**: + ```bash + docker history atx-test-runner:latest --human + ``` + +### Component Verification Failed + +**Symptoms:** +``` +[ERROR] ATX CLI is not installed or not accessible +[ERROR] Some components are missing or not properly installed +``` + +**Solutions:** +1. **Check Dockerfile**: Verify ATX installation commands +2. **Check PATH**: Ensure ATX is in PATH or at expected location +3. **Manual verification**: + ```bash + docker run --rm atx-test-runner:latest sh -c "command -v atx && atx --version" + ``` +4. **Review installation logs**: Check build.log for errors + +### Smoke Test Failed + +**Symptoms:** +``` +[ERROR] Smoke test failed (exit code: 4) +``` + +**Common causes:** +1. **ATX not accessible**: ATX CLI not in PATH +2. **Git requirement**: ATX requires code in git repository +3. **Authentication**: Midway cookie expired or AWS credentials not configured +4. **Network issues**: Cannot reach ATX service endpoints + +**Solutions:** +1. **Review smoke test log**: + ```bash + cat ./build_test_results/smoke_test/smoke_test.log + ``` + +2. **Check preserved artifacts**: + ```bash + ls -la ./build_test_results/smoke_test/smoke_test_failure/ + ``` + +3. **Run smoke test manually**: + ```bash + docker run --rm -it atx-test-runner:latest --smoke-test --verbose + ``` + +4. **Check ATX authentication**: + ```bash + docker run --rm atx-test-runner:latest sh -c "atx --version" + ``` + +See [Smoke Test Documentation](smoke-test.md) for detailed troubleshooting. + +## Manual Testing + +### Test Individual Components + +```bash +# Test ATX CLI +docker run --rm atx-test-runner:latest sh -c "atx --version" + +# Test AWS CLI +docker run --rm atx-test-runner:latest sh -c "aws --version" + +# Test scripts are present +docker run --rm atx-test-runner:latest sh -c "ls -la /usr/local/bin/*.sh" + +# Test orchestrator help +docker run --rm atx-test-runner:latest --help +``` + +### Test with Sample Data + +```bash +# Run with single customer CSV +docker run --rm \ + -v $(pwd)/examples:/data \ + -v $(pwd)/test_results:/workspace/results \ + atx-test-runner:latest \ + --csv-file /data/single-customer.csv \ + --output-dir /workspace/results + +# Run with verbose output +docker run --rm \ + -v $(pwd)/examples:/data \ + -v $(pwd)/test_results:/workspace/results \ + atx-test-runner:latest \ + --csv-file /data/single-customer.csv \ + --output-dir /workspace/results \ + --verbose +``` + +### Interactive Testing + +```bash +# Start interactive shell in container +docker run --rm -it atx-test-runner:latest sh + +# Inside container, test commands: +atx --version +aws --version +ls -la /usr/local/bin/ +cat /usr/local/bin/smoke-test.sh +``` + +## CI/CD Integration + +### GitHub Actions + +```yaml +name: Build and Test ATX Container + +on: [push, pull_request] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build and test + run: ./scripts/build-and-test.sh --verbose + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v3 + with: + name: build-test-results + path: build_test_results/ + + - name: Upload summary + if: always() + uses: actions/upload-artifact@v3 + with: + name: summary + path: build_test_results/summary.txt +``` + +### GitLab CI + +```yaml +build-and-test: + stage: test + image: docker:latest + services: + - docker:dind + script: + - chmod +x scripts/build-and-test.sh + - ./scripts/build-and-test.sh --verbose + artifacts: + when: always + paths: + - build_test_results/ + expire_in: 1 week +``` + +### Jenkins Pipeline + +```groovy +pipeline { + agent any + + stages { + stage('Build and Test') { + steps { + sh 'chmod +x scripts/build-and-test.sh' + sh './scripts/build-and-test.sh --verbose' + } + } + } + + post { + always { + archiveArtifacts artifacts: 'build_test_results/**/*', allowEmptyArchive: true + publishHTML([ + reportDir: 'build_test_results', + reportFiles: 'summary.txt', + reportName: 'Build and Test Summary' + ]) + } + } +} +``` + +## Best Practices + +1. **Always run tests after building** + - Don't skip smoke test + - Verify all components before deployment + +2. **Use clean builds for releases** + - Use `--no-cache` for production builds + - Ensures reproducible builds + +3. **Monitor image size** + - Keep image size reasonable + - Optimize Dockerfile regularly + +4. **Preserve test artifacts** + - Keep build_test_results for debugging + - Archive in CI/CD pipelines + +5. **Test locally before CI/CD** + - Run build-and-test.sh locally + - Catch issues early + +6. **Version your images** + - Use semantic versioning + - Tag with git commit SHA + +7. **Document custom configurations** + - Document any Dockerfile changes + - Update this documentation + +## Next Steps + +After successful build and test: + +1. **Tag image for registry**: + ```bash + docker tag atx-test-runner:latest .dkr.ecr..amazonaws.com/atx-test-runner:v1.0.0 + ``` + +2. **Push to ECR**: + ```bash + aws ecr get-login-password --region | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com + docker push .dkr.ecr..amazonaws.com/atx-test-runner:v1.0.0 + ``` + +3. **Deploy to ECS/EKS**: + - See [Deployment Guide](deployment.md) + +4. **Run with production data**: + - Create production CSV + - Configure AWS credentials + - Run orchestrator + +## Requirements Validation + +This build and test process validates the following requirements: + +- **Requirement 7.5**: Dockerfile builds custom image with ATX and dependencies +- **Requirement 10.1**: Smoke test command executes sample transformation +- **Requirement 10.3**: Verifies ATX is properly installed and accessible +- **Requirement 10.4**: Exits with code 0 on success, displays success message +- **Requirement 1.3**: Container can execute ATX transformations +- **Requirement 1.4**: Results are captured and accessible + +## Related Documentation + +- [Smoke Test Documentation](smoke-test.md) - Detailed smoke test information +- [Deployment Guide](deployment.md) - Container deployment instructions +- [Troubleshooting Guide](troubleshooting.md) - General troubleshooting +- [Scripts README](../scripts/README.md) - Script documentation + diff --git a/src/agentic_platform/agent/code_transform/docs/deployment-guide.md b/src/agentic_platform/agent/code_transform/docs/deployment-guide.md new file mode 100644 index 0000000..605a9f9 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/docs/deployment-guide.md @@ -0,0 +1,649 @@ +# ATX Test Runner Deployment Guide + +This guide provides step-by-step instructions for deploying the ATX Test Runner to AWS infrastructure. + +## Table of Contents + +1. [Prerequisites](#prerequisites) +2. [IAM Setup](#iam-setup) +3. [ECR Setup and Image Push](#ecr-setup-and-image-push) +4. [Deployment Options](#deployment-options) + - [Option A: ECS on Fargate](#option-a-ecs-on-fargate) + - [Option B: EKS (Kubernetes)](#option-b-eks-kubernetes) + - [Option C: EC2 with Docker](#option-c-ec2-with-docker) +5. [Configuration](#configuration) +6. [Running Tasks](#running-tasks) +7. [Monitoring and Troubleshooting](#monitoring-and-troubleshooting) +8. [Scaling and Optimization](#scaling-and-optimization) + +## Prerequisites + +Before deploying, ensure you have: + +### Required Tools +- AWS CLI v2 installed and configured +- Docker installed and running +- Git (for cloning the repository) +- jq (for JSON processing, optional but recommended) + +### AWS Account Requirements +- AWS account with appropriate permissions +- S3 buckets created: + - Source bucket for Progress code + - Results bucket for transformation outputs +- ECR repository access or permissions to create one + +### Verify Prerequisites + +```bash +# Check AWS CLI +aws --version +# Expected: aws-cli/2.x.x or higher + +# Check Docker +docker --version +# Expected: Docker version 20.10.x or higher + +# Check AWS credentials +aws sts get-caller-identity +# Should return your account ID and user/role + +# Verify S3 buckets exist +aws s3 ls s3://your-source-bucket/ +aws s3 ls s3://your-results-bucket/ +``` + +## IAM Setup + +### Step 1: Create IAM Roles + +The ATX Test Runner requires two IAM roles: + +1. **Task Execution Role**: For ECS to pull images and write logs +2. **Task Role**: For the container to access S3 buckets + +#### Create Task Execution Role + +```bash +# Create trust policy +cat > trust-policy.json << 'EOF' +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ecs-tasks.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] +} +EOF + +# Create the role +aws iam create-role \ + --role-name atx-test-runner-execution-role \ + --assume-role-policy-document file://trust-policy.json + +# Attach managed policy +aws iam attach-role-policy \ + --role-name atx-test-runner-execution-role \ + --policy-arn arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy +``` + +#### Create Task Role with S3 Access + +```bash +# Create S3 access policy +cat > s3-policy.json << 'EOF' +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::YOUR-SOURCE-BUCKET", + "arn:aws:s3:::YOUR-SOURCE-BUCKET/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:PutObjectAcl", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::YOUR-RESULTS-BUCKET", + "arn:aws:s3:::YOUR-RESULTS-BUCKET/*" + ] + } + ] +} +EOF + +# Replace bucket names +sed -i 's/YOUR-SOURCE-BUCKET/your-actual-source-bucket/g' s3-policy.json +sed -i 's/YOUR-RESULTS-BUCKET/your-actual-results-bucket/g' s3-policy.json + +# Create the role +aws iam create-role \ + --role-name atx-test-runner-task-role \ + --assume-role-policy-document file://trust-policy.json + +# Attach S3 policy +aws iam put-role-policy \ + --role-name atx-test-runner-task-role \ + --policy-name s3-access \ + --policy-document file://s3-policy.json +``` + +### Step 2: Verify IAM Roles + +```bash +# Get execution role ARN +aws iam get-role --role-name atx-test-runner-execution-role \ + --query 'Role.Arn' --output text + +# Get task role ARN +aws iam get-role --role-name atx-test-runner-task-role \ + --query 'Role.Arn' --output text + +# Save these ARNs - you'll need them later +``` + +## ECR Setup and Image Push + +### Step 1: Build the Docker Image + +```bash +# Clone the repository (if not already done) +git clone +cd atx-containers + +# Build the image +docker build -t atx-test-runner:latest . + +# Verify the build +docker images atx-test-runner +``` + +### Step 2: Push to ECR + +Use the provided script for easy ECR push: + +```bash +# Make the script executable +chmod +x scripts/push-to-ecr.sh + +# Push to ECR (replace with your account ID and region) +./scripts/push-to-ecr.sh 123456789012 us-east-1 + +# The script will: +# - Authenticate to ECR +# - Create repository if needed +# - Tag the image +# - Push version and latest tags +# - Verify the push +``` + +#### Manual ECR Push (Alternative) + +```bash +# Set variables +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +AWS_REGION="us-east-1" +REPO_NAME="atx-test-runner" +VERSION="0.1.0" + +# Authenticate Docker to ECR +aws ecr get-login-password --region ${AWS_REGION} | \ + docker login --username AWS --password-stdin \ + ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com + +# Create ECR repository (if it doesn't exist) +aws ecr create-repository \ + --repository-name ${REPO_NAME} \ + --region ${AWS_REGION} \ + --image-scanning-configuration scanOnPush=true \ + --encryption-configuration encryptionType=AES256 \ + || echo "Repository already exists" + +# Tag the image +docker tag atx-test-runner:latest \ + ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPO_NAME}:${VERSION} + +docker tag atx-test-runner:latest \ + ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPO_NAME}:latest + +# Push the images +docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPO_NAME}:${VERSION} +docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPO_NAME}:latest + +# Verify +aws ecr describe-images \ + --repository-name ${REPO_NAME} \ + --region ${AWS_REGION} +``` + +## Deployment Options + +### Option A: ECS on Fargate + +This is the recommended option for most use cases. + +#### Step 1: Create ECS Cluster + +```bash +# Create cluster +aws ecs create-cluster \ + --cluster-name atx-test-runner-cluster \ + --capacity-providers FARGATE FARGATE_SPOT \ + --default-capacity-provider-strategy \ + capacityProvider=FARGATE,weight=1 \ + capacityProvider=FARGATE_SPOT,weight=4 + +# Enable Container Insights (optional) +aws ecs update-cluster-settings \ + --cluster atx-test-runner-cluster \ + --settings name=containerInsights,value=enabled +``` + +#### Step 2: Create CloudWatch Log Group + +```bash +aws logs create-log-group \ + --log-group-name /ecs/atx-test-runner + +# Set retention (optional) +aws logs put-retention-policy \ + --log-group-name /ecs/atx-test-runner \ + --retention-in-days 30 +``` + +#### Step 3: Register Task Definition + +```bash +# Navigate to deployment directory +cd deployment + +# Update task definition with your values +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +export AWS_REGION="us-east-1" +export IMAGE_VERSION="0.1.0" + +# Replace placeholders +sed "s/ACCOUNT_ID/${AWS_ACCOUNT_ID}/g" ecs-task-definition.json | \ +sed "s/REGION/${AWS_REGION}/g" | \ +sed "s/VERSION/${IMAGE_VERSION}/g" > ecs-task-definition-updated.json + +# Register the task definition +aws ecs register-task-definition \ + --cli-input-json file://ecs-task-definition-updated.json + +# Verify registration +aws ecs describe-task-definition \ + --task-definition atx-test-runner \ + --query 'taskDefinition.taskDefinitionArn' +``` + +#### Step 4: Create VPC and Networking (if needed) + +If you don't have a VPC with public subnets: + +```bash +# Use CloudFormation template +aws cloudformation create-stack \ + --stack-name atx-test-runner-stack \ + --template-body file://cloudformation-complete-stack.yaml \ + --parameters \ + ParameterKey=SourceBucketName,ParameterValue=your-source-bucket \ + ParameterKey=ResultsBucketName,ParameterValue=your-results-bucket \ + ParameterKey=EnvironmentName,ParameterValue=production \ + --capabilities CAPABILITY_NAMED_IAM + +# Wait for completion +aws cloudformation wait stack-create-complete \ + --stack-name atx-test-runner-stack + +# Get outputs +aws cloudformation describe-stacks \ + --stack-name atx-test-runner-stack \ + --query 'Stacks[0].Outputs' +``` + +#### Step 5: Run the Task + +```bash +# Set variables from CloudFormation outputs or your existing VPC +CLUSTER_NAME="atx-test-runner-cluster" +TASK_DEFINITION="atx-test-runner" +SUBNET_1="subnet-xxxxx" +SUBNET_2="subnet-yyyyy" +SECURITY_GROUP="sg-zzzzz" + +# Run the task +aws ecs run-task \ + --cluster ${CLUSTER_NAME} \ + --task-definition ${TASK_DEFINITION} \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={ + subnets=[${SUBNET_1},${SUBNET_2}], + securityGroups=[${SECURITY_GROUP}], + assignPublicIp=ENABLED + }" \ + --region ${AWS_REGION} + +# Get task ARN from output +TASK_ARN=$(aws ecs list-tasks \ + --cluster ${CLUSTER_NAME} \ + --query 'taskArns[0]' \ + --output text) + +# Monitor task +aws ecs describe-tasks \ + --cluster ${CLUSTER_NAME} \ + --tasks ${TASK_ARN} +``` + +### Option B: EKS (Kubernetes) + +#### Step 1: Configure kubectl + +```bash +# Update kubeconfig +aws eks update-kubeconfig \ + --name your-eks-cluster \ + --region us-east-1 + +# Verify connection +kubectl get nodes +``` + +#### Step 2: Create IAM Role for Service Account (IRSA) + +```bash +# Create OIDC provider (if not already done) +eksctl utils associate-iam-oidc-provider \ + --cluster your-eks-cluster \ + --approve + +# Create service account with IAM role +eksctl create iamserviceaccount \ + --name atx-test-runner \ + --namespace atx-test-runner \ + --cluster your-eks-cluster \ + --attach-policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/atx-s3-access \ + --approve +``` + +#### Step 3: Deploy to Kubernetes + +```bash +# Update deployment YAML +cd deployment +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +export AWS_REGION="us-east-1" +export IMAGE_VERSION="0.1.0" + +sed "s/ACCOUNT_ID/${AWS_ACCOUNT_ID}/g" kubernetes-deployment.yaml | \ +sed "s/REGION/${AWS_REGION}/g" | \ +sed "s/VERSION/${IMAGE_VERSION}/g" > kubernetes-deployment-updated.yaml + +# Apply configuration +kubectl apply -f kubernetes-deployment-updated.yaml + +# Verify deployment +kubectl get all -n atx-test-runner + +# Check job status +kubectl get jobs -n atx-test-runner +kubectl describe job atx-test-runner-job -n atx-test-runner + +# View logs +kubectl logs -n atx-test-runner job/atx-test-runner-job -f +``` + +### Option C: EC2 with Docker + +For running on EC2 instances: + +#### Step 1: Launch EC2 Instance + +```bash +# Launch instance with appropriate IAM role +aws ec2 run-instances \ + --image-id ami-xxxxx \ + --instance-type t3.large \ + --iam-instance-profile Name=atx-test-runner-instance-profile \ + --user-data file://examples/ec2-user-data.sh \ + --tag-specifications 'ResourceType=instance,Tags=[{Key=Name,Value=atx-test-runner}]' +``` + +#### Step 2: SSH and Run Container + +```bash +# SSH to instance +ssh ec2-user@ + +# Authenticate to ECR +aws ecr get-login-password --region us-east-1 | \ + docker login --username AWS --password-stdin \ + ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com + +# Pull image +docker pull ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/atx-test-runner:latest + +# Run container +docker run \ + -e AWS_DEFAULT_REGION=us-east-1 \ + ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/atx-test-runner:latest \ + --csv-file /config/repos.csv \ + --mode parallel \ + --max-jobs 4 +``` + +## Configuration + +### CSV Configuration File + +Create a CSV file with your repository configurations: + +```csv +s3_path,build_command,transformation_name,output_s3_path +s3://source-bucket/customer1/folder1/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer1/folder1/ +s3://source-bucket/customer1/folder2/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer1/folder2/ +s3://source-bucket/customer2/folder1/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer2/folder1/ +``` + +Upload to S3 or mount as ConfigMap (Kubernetes): + +```bash +# Upload to S3 +aws s3 cp repos.csv s3://your-config-bucket/repos.csv + +# Or create Kubernetes ConfigMap +kubectl create configmap atx-test-runner-config \ + --from-file=repos.csv \ + -n atx-test-runner +``` + +### Environment Variables + +Configure the following environment variables: + +| Variable | Description | Default | Required | +|----------|-------------|---------|----------| +| AWS_DEFAULT_REGION | AWS region | - | Yes | +| AWS_REGION | AWS region (alternative) | - | Yes | +| ATX_LOG_LEVEL | Logging level | INFO | No | +| SOURCE_BUCKET | Source S3 bucket | - | No | +| RESULTS_BUCKET | Results S3 bucket | - | No | + +## Running Tasks + +### One-Time Execution + +```bash +# ECS +aws ecs run-task \ + --cluster atx-test-runner-cluster \ + --task-definition atx-test-runner \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" + +# Kubernetes +kubectl create job atx-manual-run \ + --from=cronjob/atx-test-runner-scheduled \ + -n atx-test-runner +``` + +### Scheduled Execution + +#### ECS with EventBridge + +```bash +# Create EventBridge rule +aws events put-rule \ + --name atx-daily-run \ + --schedule-expression "cron(0 2 * * ? *)" \ + --state ENABLED + +# Add ECS task as target +aws events put-targets \ + --rule atx-daily-run \ + --targets "Id"="1","Arn"="arn:aws:ecs:us-east-1:${AWS_ACCOUNT_ID}:cluster/atx-test-runner-cluster","RoleArn"="arn:aws:iam::${AWS_ACCOUNT_ID}:role/ecsEventsRole","EcsParameters"="{TaskDefinitionArn=arn:aws:ecs:us-east-1:${AWS_ACCOUNT_ID}:task-definition/atx-test-runner,TaskCount=1,LaunchType=FARGATE,NetworkConfiguration={awsvpcConfiguration={Subnets=[subnet-xxx],SecurityGroups=[sg-xxx],AssignPublicIp=ENABLED}}}" +``` + +#### Kubernetes CronJob + +The Kubernetes deployment includes a CronJob that runs daily at 2 AM: + +```bash +# View CronJob +kubectl get cronjobs -n atx-test-runner + +# Manually trigger +kubectl create job atx-manual-$(date +%s) \ + --from=cronjob/atx-test-runner-scheduled \ + -n atx-test-runner +``` + +## Monitoring and Troubleshooting + +### View Logs + +#### ECS CloudWatch Logs + +```bash +# Tail logs +aws logs tail /ecs/atx-test-runner --follow + +# Get specific task logs +aws logs get-log-events \ + --log-group-name /ecs/atx-test-runner \ + --log-stream-name atx/atx-test-runner/ +``` + +#### Kubernetes Logs + +```bash +# View job logs +kubectl logs -n atx-test-runner job/atx-test-runner-job -f + +# View pod logs +kubectl logs -n atx-test-runner -f + +# View previous pod logs (if crashed) +kubectl logs -n atx-test-runner --previous +``` + +### Check Task Status + +#### ECS + +```bash +# List running tasks +aws ecs list-tasks --cluster atx-test-runner-cluster + +# Describe task +aws ecs describe-tasks \ + --cluster atx-test-runner-cluster \ + --tasks +``` + +#### Kubernetes + +```bash +# Check job status +kubectl get jobs -n atx-test-runner + +# Describe job +kubectl describe job atx-test-runner-job -n atx-test-runner + +# Check pod status +kubectl get pods -n atx-test-runner +``` + +### Common Issues + +See the [Troubleshooting Guide](troubleshooting.md) for detailed solutions. + +## Scaling and Optimization + +### Vertical Scaling + +Adjust CPU and memory based on workload: + +```bash +# Update task definition with more resources +# Edit ecs-task-definition.json: +# "cpu": "4096", +# "memory": "8192" + +# Re-register +aws ecs register-task-definition \ + --cli-input-json file://ecs-task-definition.json +``` + +### Horizontal Scaling + +Run multiple tasks in parallel: + +```bash +# Run multiple tasks +for i in {1..5}; do + aws ecs run-task \ + --cluster atx-test-runner-cluster \ + --task-definition atx-test-runner \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" +done +``` + +### Cost Optimization + +1. **Use Fargate Spot**: 70% cheaper than regular Fargate +2. **Right-size resources**: Monitor and adjust CPU/memory +3. **Use lifecycle policies**: Clean up old ECR images +4. **Schedule during off-peak**: Run during cheaper hours + +## Next Steps + +1. Review [Monitoring Guide](troubleshooting.md#monitoring) +2. Set up [Alerting](troubleshooting.md#alerting) +3. Configure [Auto-scaling](troubleshooting.md#auto-scaling) +4. Implement [CI/CD Pipeline](../examples/ci-cd-integration.sh) + +## Support + +For issues or questions: +- Check [Troubleshooting Guide](troubleshooting.md) +- Review [Examples](../examples/) +- Check CloudWatch Logs +- Verify IAM permissions diff --git a/src/agentic_platform/agent/code_transform/docs/exit-codes-and-output-modes.md b/src/agentic_platform/agent/code_transform/docs/exit-codes-and-output-modes.md new file mode 100644 index 0000000..a9ed452 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/docs/exit-codes-and-output-modes.md @@ -0,0 +1,231 @@ +# Exit Codes and Output Modes + +This document describes the exit code behavior and output modes implemented in the ATX Container Test Runner. + +## Exit Code Propagation + +The orchestrator script (`scripts/atx-orchestrator.sh`) implements proper exit code propagation to support CI/CD integration. + +### Exit Code Behavior + +| Exit Code | Meaning | Description | +|-----------|---------|-------------| +| 0 | Success | All transformations completed successfully | +| 1 | Failure | One or more transformations failed | +| 2+ | System Error | Missing dependencies, invalid configuration, etc. | + +### Implementation Details + +**Requirements Satisfied:** +- Requirement 5.1: Return 0 for all successful transformations +- Requirement 5.2: Return non-zero for any failures + +**Key Implementation Points:** + +1. **Main Function Exit**: The main function checks `TOTAL_FAILED` counter and exits with code 1 if any failures occurred, otherwise exits with 0. + +2. **Cleanup Function**: The cleanup function preserves the original exit code if it was already non-zero, otherwise checks for failures and exits appropriately. + +3. **Smoke Test**: The smoke test returns 0 on success, 1 on failure, allowing early detection of configuration issues. + +### Usage in CI/CD + +```bash +#!/bin/bash +# Example CI/CD integration + +./scripts/atx-orchestrator.sh --csv-file repos.csv --quiet + +if [ $? -eq 0 ]; then + echo "All transformations passed" + # Continue with deployment +else + echo "Transformations failed" + exit 1 +fi +``` + +## Output Modes + +The orchestrator supports three output modes to accommodate different use cases. + +### Normal Mode (Default) + +Balanced output suitable for interactive use: +- Info-level messages +- Success/failure notifications +- Summary statistics +- Progress indicators + +**Usage:** +```bash +./scripts/atx-orchestrator.sh --csv-file repos.csv +``` + +### Verbose Mode + +Detailed output for debugging and troubleshooting: +- All log messages (DEBUG, INFO, WARN, ERROR) +- Progress messages for each step +- Detailed timing information +- Full command output + +**Requirements Satisfied:** +- Requirement 5.3: Output real-time progress to stdout when invoked with verbose flag + +**Usage:** +```bash +./scripts/atx-orchestrator.sh --csv-file repos.csv --verbose +``` + +**When to Use:** +- Debugging transformation failures +- Understanding execution flow +- Troubleshooting performance issues +- Development and testing + +### Quiet Mode + +Minimal output for automated environments: +- Only ERROR-level messages displayed +- Suppresses info and success messages +- Full logs still written to files +- Suitable for CI/CD pipelines + +**Requirements Satisfied:** +- Requirement 5.4: Suppress non-essential output when invoked with quiet flag + +**Usage:** +```bash +./scripts/atx-orchestrator.sh --csv-file repos.csv --quiet +``` + +**When to Use:** +- CI/CD pipelines +- Automated batch processing +- Cron jobs +- When you only care about failures + +## Implementation Architecture + +### Logging Functions + +The logging system is implemented across two scripts: + +**s3-integration.sh:** +- `log()`: Base logging function with level filtering +- `log_info()`: Info-level messages +- `log_warn()`: Warning messages +- `log_error()`: Error messages +- `log_debug()`: Debug messages + +**atx-orchestrator.sh:** +- `log_info()`: Respects QUIET flag +- `log_success()`: Respects QUIET flag +- `log_progress()`: Only outputs when VERBOSE=true and QUIET=false + +### Flag Interaction + +| Flag | VERBOSE | QUIET | LOG_LEVEL | Output Behavior | +|------|---------|-------|-----------|-----------------| +| (none) | false | false | INFO | Normal output | +| --verbose | true | false | DEBUG | All messages + progress | +| --quiet | false | true | ERROR | Only errors | +| --verbose --quiet | true | true | ERROR | Quiet takes precedence | + +### Environment Variables + +The output mode can also be controlled via environment variables: + +```bash +# Set log level directly +LOG_LEVEL=DEBUG ./scripts/atx-orchestrator.sh --csv-file repos.csv + +# Combine with flags +LOG_LEVEL=WARN ./scripts/atx-orchestrator.sh --csv-file repos.csv --verbose +``` + +## Testing + +Exit code behavior is tested in `tests/test_exit_codes.sh`: + +```bash +# Run exit code tests +./tests/test_exit_codes.sh +``` + +**Test Coverage:** +- Help flag returns 0 +- Missing CSV file returns non-zero +- Invalid CSV file returns non-zero +- Verbose flag is accepted +- Quiet flag is accepted +- Smoke test flag is accepted + +## Examples + +### Example 1: CI/CD Pipeline + +```bash +#!/bin/bash +# Jenkins/GitLab CI pipeline + +set -e + +# Run transformations in quiet mode +./scripts/atx-orchestrator.sh \ + --csv-file repos.csv \ + --mode parallel \ + --max-jobs 8 \ + --quiet + +# Exit code automatically propagates +# Pipeline fails if any transformation fails +``` + +### Example 2: Interactive Debugging + +```bash +# Run with verbose output to debug issues +./scripts/atx-orchestrator.sh \ + --csv-file repos.csv \ + --verbose \ + --output-dir ./debug_results + +# Review detailed logs +cat ./debug_results/summary.log +``` + +### Example 3: Automated Monitoring + +```bash +#!/bin/bash +# Cron job for automated processing + +# Run in quiet mode, only log errors +./scripts/atx-orchestrator.sh \ + --csv-file /data/repos.csv \ + --quiet \ + --output-dir /var/log/atx/$(date +%Y%m%d) + +# Send alert if failed +if [ $? -ne 0 ]; then + echo "ATX transformations failed" | mail -s "Alert" admin@example.com +fi +``` + +## Best Practices + +1. **Use Quiet Mode in CI/CD**: Reduces log noise while preserving full logs in files +2. **Use Verbose Mode for Debugging**: Provides detailed information for troubleshooting +3. **Check Exit Codes**: Always check exit codes in scripts and pipelines +4. **Review Log Files**: Even in quiet mode, full logs are written to files +5. **Combine with Dry Run**: Use `--dry-run --verbose` to preview execution without running + +## Related Documentation + +- [README.md](../README.md) - Main project documentation +- [scripts/README.md](../scripts/README.md) - Script documentation +- [examples/ci-cd-integration.sh](../examples/ci-cd-integration.sh) - CI/CD integration example +- [docs/troubleshooting.md](troubleshooting.md) - Troubleshooting guide + diff --git a/src/agentic_platform/agent/code_transform/docs/troubleshooting.md b/src/agentic_platform/agent/code_transform/docs/troubleshooting.md new file mode 100644 index 0000000..e35f3b1 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/docs/troubleshooting.md @@ -0,0 +1,1120 @@ +# Troubleshooting Guide + +This guide provides solutions to common issues encountered when using the ATX Container Test Runner. + +## Quick Diagnostics + +Before diving into specific issues, run these quick checks: + +```bash +# 1. Verify Docker is running +docker ps + +# 2. Run smoke test +docker run --rm atx-test-runner:latest --smoke-test + +# 3. Check AWS credentials +aws sts get-caller-identity + +# 4. Verify S3 bucket access +aws s3 ls s3://your-source-bucket/ + +# 5. Check ECR authentication +aws ecr describe-repositories --region us-east-1 +``` + +## Build Issues + +### Docker Build Fails + +**Symptom:** Docker build command fails with errors + +**Common Error Messages:** +``` +ERROR [internal] load metadata for docker.io/library/ubuntu:22.04 +failed to solve with frontend dockerfile.v0 +``` + +**Possible Causes:** +1. Docker daemon not running +2. Network connectivity issues +3. Insufficient disk space +4. Invalid Dockerfile syntax + +**Solutions:** + +```bash +# Check Docker is running +docker ps +# If error: "Cannot connect to the Docker daemon" +sudo systemctl start docker # Linux +# or restart Docker Desktop on Mac/Windows + +# Check disk space (need at least 2GB free) +df -h +docker system df # Check Docker disk usage +docker system prune -a # Clean up if needed + +# Test network connectivity +curl -I https://hub.docker.com + +# Verify Dockerfile syntax +docker build --no-cache -t atx-test-runner:latest . + +# Build with verbose output +docker build --progress=plain -t atx-test-runner:latest . +``` + +### ATX Installation Fails + +**Symptom:** ATX CLI installation fails during image build + +**Common Error Messages:** +``` +curl: (6) Could not resolve host: atx-install-url.com +curl: (7) Failed to connect to atx-install-url.com +``` + +**Possible Causes:** +1. Network connectivity issues +2. Proxy configuration needed +3. ATX installation URL changed +4. DNS resolution problems + +**Solutions:** + +```bash +# Test ATX installation URL +curl -I https://atx-install-url.com/install.sh + +# Build with proxy settings +docker build \ + --build-arg HTTP_PROXY=http://proxy.example.com:8080 \ + --build-arg HTTPS_PROXY=http://proxy.example.com:8080 \ + -t atx-test-runner:latest . + +# Check DNS resolution +nslookup atx-install-url.com + +# Try building with host network (Linux only) +docker build --network=host -t atx-test-runner:latest . +``` + +### AWS CLI Installation Fails + +**Symptom:** AWS CLI installation fails during image build + +**Common Error Messages:** +``` +curl: (28) Operation timed out after 300000 milliseconds +unzip: cannot find or open awscliv2.zip +``` + +**Solutions:** + +```bash +# Increase Docker build timeout +export DOCKER_BUILDKIT=1 +docker build --network=host -t atx-test-runner:latest . + +# Manually download and verify AWS CLI installer +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip -t awscliv2.zip # Test archive integrity + +# Use alternative AWS CLI installation method in Dockerfile +# Add to Dockerfile: +# RUN pip3 install awscli --upgrade +``` + +## Runtime Issues + +### S3 Access Denied + +**Symptom:** Container fails with S3 permission errors + +**Common Error Messages:** +``` +An error occurred (AccessDenied) when calling the GetObject operation +fatal error: Unable to locate credentials +``` + +**Possible Causes:** +1. Missing or incorrect IAM permissions +2. AWS credentials not configured +3. Bucket policy denies access +4. Wrong bucket name or region + +**Solutions:** + +```bash +# Verify AWS credentials are configured +aws sts get-caller-identity + +# Test S3 access manually +aws s3 ls s3://your-source-bucket/ +aws s3 cp s3://your-source-bucket/test.txt ./ + +# Check IAM role permissions (if using IAM role) +aws iam get-role --role-name ATXRunnerTaskRole +aws iam list-attached-role-policies --role-name ATXRunnerTaskRole + +# Verify bucket policy +aws s3api get-bucket-policy --bucket your-source-bucket + +# Test with explicit credentials +docker run --rm \ + -e AWS_ACCESS_KEY_ID=your-key \ + -e AWS_SECRET_ACCESS_KEY=your-secret \ + -e AWS_REGION=us-east-1 \ + atx-test-runner:latest --smoke-test + +# Check bucket region matches +aws s3api get-bucket-location --bucket your-source-bucket +``` + +**Required IAM Permissions:** + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::source-bucket/*", + "arn:aws:s3:::source-bucket" + ] + }, + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:PutObjectAcl" + ], + "Resource": [ + "arn:aws:s3:::results-bucket/*" + ] + } + ] +} +``` + +### Container Fails to Start + +**Symptom:** Docker run command fails immediately + +**Common Error Messages:** +``` +docker: Error response from daemon: OCI runtime create failed +docker: invalid reference format +``` + +**Possible Causes:** +1. Invalid command syntax +2. Missing environment variables +3. Volume mount path doesn't exist +4. Image not found + +**Solutions:** + +```bash +# Check if image exists +docker images | grep atx-test-runner + +# Verify container logs +docker ps -a # List all containers including stopped +docker logs + +# Test with minimal command +docker run --rm atx-test-runner:latest --help + +# Check volume mount paths exist +ls -la /path/to/mount + +# Run with interactive shell for debugging +docker run -it --entrypoint /bin/bash atx-test-runner:latest + +# Inside container, verify: +which atx +which aws +ls -la /usr/local/bin/ +``` + +### ATX Transformation Fails + +**Symptom:** ATX execution returns non-zero exit code + +**Common Error Messages:** +``` +ATX transformation failed with exit code: 1 +Error: Transformation 'XYZ' not found +Error: Invalid code repository structure +``` + +**Possible Causes:** +1. Invalid transformation name +2. Code repository structure issues +3. Missing Progress code files +4. ATX authentication expired +5. Network connectivity issues + +**Solutions:** + +```bash +# Check transformation name is correct +# Common transformations: +# - Comprehensive-Codebase-Analysis +# - Code-Modernization +# - Security-Analysis + +# Verify code structure in S3 +aws s3 ls s3://source-bucket/customer1/folder1/ --recursive + +# Check for .p files (Progress code) +aws s3 ls s3://source-bucket/customer1/folder1/ --recursive | grep "\.p$" + +# Test ATX manually +docker run -it --entrypoint /bin/bash atx-test-runner:latest +# Inside container: +atx --version +atx custom def exec --help + +# Check ATX authentication +atx auth status + +# Review execution logs +cat orchestrator_results/customer1_folder1_execution.log + +# Enable verbose mode for detailed output +docker run --rm \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv \ + --verbose +``` + +### CSV Parsing Errors + +**Symptom:** Container fails with CSV parsing errors + +**Common Error Messages:** +``` +ERROR: Invalid CSV format at line 3 +ERROR: Missing required column: s3_path +ERROR: Invalid S3 URI format +``` + +**Possible Causes:** +1. Incorrect CSV format +2. Missing header row +3. Special characters not quoted +4. Invalid S3 URI format + +**Solutions:** + +```bash +# Verify CSV format +cat repos.csv + +# Check for required columns +head -1 repos.csv +# Expected: s3_path,build_command,transformation_name,output_s3_path + +# Validate S3 URIs +grep -E "^s3://[a-z0-9.-]+/.*" repos.csv + +# Check for special characters +cat -A repos.csv # Shows hidden characters + +# Test with sample CSV +cat > test.csv < + +# Filter logs by pattern +aws logs filter-log-events \ + --log-group-name /ecs/atx-container-test-runner \ + --filter-pattern "ERROR" + +# Export logs to S3 +aws logs create-export-task \ + --log-group-name /ecs/atx-container-test-runner \ + --from $(date -d '1 hour ago' +%s)000 \ + --to $(date +%s)000 \ + --destination s3://my-logs-bucket/ecs-logs/ +``` + +### Kubernetes Logs + +When running on EKS, logs are available via kubectl: + +```bash +# View pod logs +kubectl logs job/atx-container-test-runner + +# Follow logs in real-time +kubectl logs -f job/atx-container-test-runner + +# View logs from specific container +kubectl logs job/atx-container-test-runner -c atx-runner + +# View previous pod logs (if pod restarted) +kubectl logs job/atx-container-test-runner --previous + +# Get logs from all pods in job +kubectl logs -l job-name=atx-container-test-runner + +# Save logs to file +kubectl logs job/atx-container-test-runner > atx-logs.txt +``` + +### EC2 Logs + +When running on EC2, logs are stored locally on the instance: + +```bash +# Connect to instance +aws ssm start-session --target + +# View setup logs +sudo tail -f /var/log/atx-runner-setup.log + +# View Docker logs +sudo docker ps -a +sudo docker logs + +# View system logs +sudo journalctl -u docker -f + +# Copy logs to S3 for analysis +aws s3 cp /var/log/atx-runner-setup.log s3://my-logs-bucket/ec2-logs/ +``` + +## Debugging Tips + +### Enable Verbose Mode + +Verbose mode provides detailed output for troubleshooting: + +```bash +# Local execution +docker run --rm \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv \ + --verbose + +# ECS task override +aws ecs run-task \ + --cluster atx-runner-cluster \ + --task-definition atx-runner-task \ + --overrides '{ + "containerOverrides": [{ + "name": "atx-container", + "command": ["--csv-file", "/workspace/repos.csv", "--verbose"] + }] + }' +``` + +### Use Dry Run Mode + +Preview what will be executed without actually running: + +```bash +docker run --rm \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv \ + --dry-run \ + --verbose +``` + +### Interactive Debugging + +Access container shell for manual testing: + +```bash +# Start container with shell +docker run -it --entrypoint /bin/bash atx-test-runner:latest + +# Inside container, test components: +atx --version +aws --version +which atx +which aws +echo $PATH + +# Test S3 access +aws s3 ls s3://source-bucket/ + +# Test ATX manually +cd /tmp +mkdir test-workspace +cd test-workspace +echo "/* test */" > test.p +atx custom def exec \ + --code-repository-path . \ + --transformation-name "Comprehensive-Codebase-Analysis" \ + --build-command "noop" +``` + +### Preserve Failed State + +When failures occur, preserve the container state for inspection: + +```bash +# List all containers (including stopped) +docker ps -a + +# Find the failed container +docker ps -a | grep atx-test-runner + +# Start the stopped container +docker start + +# Access shell in the container +docker exec -it /bin/bash + +# Inside container, inspect: +ls -la /tmp/ +cat /workspace/results/summary.log +cat /workspace/results/*_execution.log +``` + +### Check Resource Usage + +Monitor resource consumption to identify bottlenecks: + +```bash +# Monitor Docker resource usage +docker stats + +# Check specific container +docker stats + +# View container resource limits +docker inspect | grep -A 10 "Memory" + +# ECS task resource usage +aws ecs describe-tasks \ + --cluster atx-runner-cluster \ + --tasks \ + --query 'tasks[0].containers[0].{CPU:cpu,Memory:memory}' + +# Kubernetes pod resource usage +kubectl top pod +``` + +### Network Debugging + +Test network connectivity from within the container: + +```bash +# Access container shell +docker run -it --entrypoint /bin/bash atx-test-runner:latest + +# Test DNS resolution +nslookup s3.amazonaws.com +nslookup atx-service.amazon.com + +# Test connectivity +curl -I https://s3.amazonaws.com +curl -I https://atx-service.amazon.com + +# Test S3 endpoint +aws s3 ls --debug 2>&1 | grep -i endpoint + +# Check network routes +ip route +netstat -rn +``` + +### Trace Script Execution + +Enable bash tracing for detailed script execution: + +```bash +# Run with bash tracing +docker run --rm \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + bash -x /usr/local/bin/atx-orchestrator.sh \ + --csv-file /workspace/repos.csv + +# Or set in environment +docker run --rm \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e BASH_XTRACEFD=1 \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv +``` + +## Common Error Messages and Solutions + +### "AWS CLI not found" + +**Error Message:** +``` +/usr/local/bin/atx-orchestrator.sh: line 45: aws: command not found +ERROR: AWS CLI not found in PATH +``` + +**Cause:** AWS CLI not properly installed in container + +**Solutions:** +```bash +# Verify AWS CLI in image +docker run --rm atx-test-runner:latest which aws +docker run --rm atx-test-runner:latest aws --version + +# Rebuild image with verbose output +docker build --progress=plain -t atx-test-runner:latest . + +# Check Dockerfile AWS CLI installation step +grep -A 10 "Install AWS CLI" Dockerfile +``` + +### "ATX command not found" + +**Error Message:** +``` +/usr/local/bin/atx-orchestrator.sh: line 67: atx: command not found +ERROR: ATX CLI not found in PATH +SMOKE TEST FAILED: ATX CLI not available +``` + +**Cause:** ATX CLI not in PATH or not installed + +**Solutions:** +```bash +# Verify ATX in image +docker run --rm atx-test-runner:latest which atx +docker run --rm atx-test-runner:latest atx --version + +# Check PATH +docker run --rm atx-test-runner:latest echo $PATH + +# Verify ATX installation in Dockerfile +grep -A 10 "Install ATX" Dockerfile + +# Run smoke test to verify +docker run --rm atx-test-runner:latest --smoke-test +``` + +### "CSV parsing error" + +**Error Message:** +``` +ERROR: Invalid CSV format at line 3: unexpected end of line +ERROR: Missing required column: s3_path +ERROR: Invalid S3 URI format: source-bucket/folder1/ +``` + +**Cause:** Invalid CSV format + +**Solutions:** +```bash +# Verify CSV structure +head -5 repos.csv + +# Check for required columns +head -1 repos.csv | tr ',' '\n' + +# Validate S3 URIs (must start with s3://) +grep -v "^s3://" repos.csv | grep -v "^s3_path" + +# Check for hidden characters +cat -A repos.csv | head -5 + +# Test with minimal CSV +cat > test.csv < +docker pull :latest +``` + +### "Transformation timeout" + +**Error Message:** +``` +ERROR: ATX transformation timed out after 3600 seconds +ERROR: Transformation did not complete within expected time +``` + +**Cause:** Large codebase or slow transformation + +**Solutions:** +```bash +# Increase task timeout (ECS) +# Update task definition with longer timeout + +# Use parallel mode for multiple folders +docker run --rm \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv \ + --mode parallel \ + --max-jobs 4 + +# Process folders in smaller batches +# Split CSV into multiple files + +# Check transformation progress +docker logs -f +``` + +## Troubleshooting Workflow + +Follow this systematic approach to diagnose and resolve issues: + +### Step 1: Identify the Problem + +```bash +# Run smoke test first +docker run --rm atx-test-runner:latest --smoke-test + +# If smoke test passes, test with minimal CSV +cat > minimal.csv < +sudo tail -f /var/log/atx-runner-setup.log +``` + +### Step 4: Test Components Individually + +```bash +# Test S3 download +aws s3 sync s3://source-bucket/customer1/folder1/ /tmp/test-download/ + +# Test ATX manually +docker run -it --entrypoint /bin/bash atx-test-runner:latest +# Inside container: +cd /tmp +mkdir test && cd test +echo "/* test */" > test.p +atx custom def exec \ + --code-repository-path . \ + --transformation-name "Comprehensive-Codebase-Analysis" \ + --build-command "noop" +``` + +### Step 5: Isolate the Issue + +```bash +# Test with single folder +# Create CSV with one entry +cat > single.csv < atx-debug-env.txt + aws --version >> atx-debug-env.txt + uname -a >> atx-debug-env.txt + ``` + +2. **Document the Issue:** + - Error messages (exact text) + - Steps to reproduce + - Expected vs actual behavior + - Environment details (OS, Docker version, AWS region) + - Logs and configuration files + +3. **Check Documentation:** + - [README.md](../README.md) - Main documentation + - [Deployment Guide](deployment.md) - Deployment instructions + - [Exit Codes and Output Modes](exit-codes-and-output-modes.md) - Exit code reference + - [Smoke Test Documentation](smoke-test.md) - Smoke test details + +## Performance Troubleshooting + +### Slow Execution + +**Symptoms:** +- Transformations take longer than expected +- High CPU or memory usage +- Timeouts + +**Diagnostic Steps:** + +```bash +# Monitor resource usage +docker stats + +# Check parallel execution +# Increase max-jobs if resources available +docker run --rm \ + -v $(pwd)/repos.csv:/workspace/repos.csv \ + atx-test-runner:latest \ + --csv-file /workspace/repos.csv \ + --mode parallel \ + --max-jobs 8 + +# Check S3 transfer speeds +time aws s3 sync s3://source-bucket/test/ /tmp/test/ + +# Verify region matches +aws s3api get-bucket-location --bucket source-bucket +``` + +**Solutions:** + +1. **Use Parallel Mode:** + ```bash + --mode parallel --max-jobs 8 + ``` + +2. **Increase Resources (ECS):** + ```bash + # Update task definition with more CPU/memory + TaskCpu: 2048 + TaskMemory: 4096 + ``` + +3. **Use Same Region:** + - Ensure S3 buckets and compute are in same region + - Consider S3 Transfer Acceleration for cross-region + +4. **Optimize Code Structure:** + - Split large folders into smaller ones + - Process in batches + +### High Memory Usage + +**Symptoms:** +- Container killed due to OOM +- Slow performance +- Swap usage + +**Diagnostic Steps:** + +```bash +# Monitor memory +docker stats + +# Check container memory limit +docker inspect | grep -i memory + +# ECS task memory +aws ecs describe-tasks \ + --cluster atx-runner-cluster \ + --tasks \ + --query 'tasks[0].containers[0].memory' +``` + +**Solutions:** + +1. **Increase Memory Limits:** + ```bash + # ECS task definition + TaskMemory: 4096 # or higher + + # Docker run + docker run --memory=4g ... + ``` + +2. **Process Fewer Folders in Parallel:** + ```bash + --mode parallel --max-jobs 4 # reduce from 8 + ``` + +3. **Use Serial Mode:** + ```bash + --mode serial + ``` + +## Getting Help + +If issues persist after following this guide: + +1. **Review Documentation:** + - [README.md](../README.md) - Main project documentation + - [Deployment Guide](deployment.md) - Deployment instructions + - [Exit Codes and Output Modes](exit-codes-and-output-modes.md) - Exit code reference + - [Smoke Test Documentation](smoke-test.md) - Smoke test details + - [Examples](../examples/README.md) - Usage examples + +2. **Check Prerequisites:** + - Docker 20.10+ installed and running + - AWS credentials configured with S3 access + - S3 buckets exist and are accessible + - CSV file format is correct + +3. **Gather Debug Information:** + - Container logs + - Execution logs from output directory + - CSV configuration file + - Environment details (OS, Docker version, AWS region) + +4. **Common Solutions:** + - Run smoke test: `docker run --rm atx-test-runner:latest --smoke-test` + - Enable verbose mode: `--verbose` + - Test with minimal CSV (single folder) + - Verify AWS credentials: `aws sts get-caller-identity` + - Check S3 access: `aws s3 ls s3://your-bucket/` diff --git a/src/agentic_platform/agent/code_transform/examples/ci-cd-integration.sh b/src/agentic_platform/agent/code_transform/examples/ci-cd-integration.sh new file mode 100755 index 0000000..0f5e3ab --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/ci-cd-integration.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# Example: CI/CD Integration with Exit Code Handling +# This script demonstrates how to use the ATX orchestrator in a CI/CD pipeline +# with proper exit code handling + +set -euo pipefail + +# Configuration +CSV_FILE="${CSV_FILE:-repos.csv}" +OUTPUT_DIR="${OUTPUT_DIR:-./ci_results}" +MODE="${MODE:-parallel}" +MAX_JOBS="${MAX_JOBS:-4}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo "==========================================" +echo "CI/CD Pipeline: ATX Transformation Tests" +echo "==========================================" +echo "" +echo "Configuration:" +echo " CSV File: $CSV_FILE" +echo " Output Directory: $OUTPUT_DIR" +echo " Execution Mode: $MODE" +echo " Max Parallel Jobs: $MAX_JOBS" +echo "" + +# Step 1: Validate prerequisites +echo -e "${BLUE}Step 1: Validating prerequisites...${NC}" + +if ! command -v atx &> /dev/null; then + echo -e "${RED}ERROR: ATX CLI not found${NC}" + echo "Please install ATX CLI before running this pipeline" + exit 2 +fi + +if ! command -v aws &> /dev/null; then + echo -e "${RED}ERROR: AWS CLI not found${NC}" + echo "Please install AWS CLI before running this pipeline" + exit 2 +fi + +if [[ ! -f "$CSV_FILE" ]]; then + echo -e "${RED}ERROR: CSV file not found: $CSV_FILE${NC}" + exit 2 +fi + +echo -e "${GREEN}โœ“ Prerequisites validated${NC}" +echo "" + +# Step 2: Run smoke test (optional but recommended) +echo -e "${BLUE}Step 2: Running smoke test...${NC}" + +if ./scripts/atx-orchestrator.sh --smoke-test; then + echo -e "${GREEN}โœ“ Smoke test passed${NC}" +else + smoke_exit_code=$? + echo -e "${RED}โœ— Smoke test failed (exit code: $smoke_exit_code)${NC}" + echo "Container is not properly configured. Please review smoke test logs." + exit $smoke_exit_code +fi +echo "" + +# Step 3: Execute transformations +echo -e "${BLUE}Step 3: Executing ATX transformations...${NC}" + +# Use quiet mode in CI/CD to reduce log noise +# Full logs are still written to files +if ./scripts/atx-orchestrator.sh \ + --csv-file "$CSV_FILE" \ + --output-dir "$OUTPUT_DIR" \ + --mode "$MODE" \ + --max-jobs "$MAX_JOBS" \ + --quiet; then + + echo -e "${GREEN}โœ“ All transformations completed successfully${NC}" + transformation_exit_code=0 +else + transformation_exit_code=$? + echo -e "${RED}โœ— One or more transformations failed (exit code: $transformation_exit_code)${NC}" +fi +echo "" + +# Step 4: Parse results and generate report +echo -e "${BLUE}Step 4: Analyzing results...${NC}" + +if [[ -f "$OUTPUT_DIR/results.json" ]]; then + # Extract statistics from JSON + total=$(jq -r '.summary.total' "$OUTPUT_DIR/results.json" 2>/dev/null || echo "0") + successful=$(jq -r '.summary.successful' "$OUTPUT_DIR/results.json" 2>/dev/null || echo "0") + failed=$(jq -r '.summary.failed' "$OUTPUT_DIR/results.json" 2>/dev/null || echo "0") + success_rate=$(jq -r '.summary.success_rate' "$OUTPUT_DIR/results.json" 2>/dev/null || echo "0") + + echo "Results Summary:" + echo " Total folders: $total" + echo " Successful: $successful" + echo " Failed: $failed" + echo " Success rate: ${success_rate}%" + echo "" + + # Display failed folders if any + if [[ $failed -gt 0 ]]; then + echo -e "${YELLOW}Failed Folders:${NC}" + jq -r '.tests[] | select(.status == "FAILED") | " - \(.name): \(.message)"' "$OUTPUT_DIR/results.json" 2>/dev/null || echo " (Unable to parse failed folders)" + echo "" + fi +else + echo -e "${YELLOW}Warning: results.json not found${NC}" +fi + +# Step 5: Upload artifacts (example) +echo -e "${BLUE}Step 5: Uploading artifacts...${NC}" + +# Example: Upload to S3 (uncomment if needed) +# if [[ -n "${CI_ARTIFACTS_BUCKET:-}" ]]; then +# aws s3 sync "$OUTPUT_DIR" "s3://$CI_ARTIFACTS_BUCKET/builds/$CI_BUILD_ID/" --quiet +# echo -e "${GREEN}โœ“ Artifacts uploaded to S3${NC}" +# else +# echo "Skipping artifact upload (CI_ARTIFACTS_BUCKET not set)" +# fi + +echo "Artifacts available at: $OUTPUT_DIR" +echo "" + +# Step 6: Final status +echo "==========================================" +if [[ $transformation_exit_code -eq 0 ]]; then + echo -e "${GREEN}CI/CD Pipeline: SUCCESS${NC}" + echo "All transformations completed successfully" +else + echo -e "${RED}CI/CD Pipeline: FAILURE${NC}" + echo "One or more transformations failed" + echo "" + echo "Troubleshooting:" + echo " 1. Review summary log: $OUTPUT_DIR/summary.log" + echo " 2. Check individual logs: $OUTPUT_DIR/*_execution.log" + echo " 3. Inspect results JSON: $OUTPUT_DIR/results.json" +fi +echo "==========================================" + +# Exit with the transformation exit code +exit $transformation_exit_code diff --git a/src/agentic_platform/agent/code_transform/examples/different-transformations.csv b/src/agentic_platform/agent/code_transform/examples/different-transformations.csv new file mode 100644 index 0000000..c5e1c75 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/different-transformations.csv @@ -0,0 +1,10 @@ +# Example: Different Transformations for Different Folders +# Use case: Applying different ATX transformations to different code folders +# This demonstrates using multiple transformation types in a single batch + +s3_path,build_command,transformation_name,output_s3_path +s3://source/customer1/legacy-code/,noop,Comprehensive-Codebase-Analysis,s3://results/customer1/legacy-code/analysis/ +s3://source/customer1/migration-target/,noop,Code-Modernization,s3://results/customer1/migration-target/modernization/ +s3://source/customer2/security-audit/,noop,Security-Analysis,s3://results/customer2/security-audit/security/ +s3://source/customer2/performance-review/,noop,Performance-Analysis,s3://results/customer2/performance-review/performance/ +s3://source/customer3/documentation/,noop,Documentation-Generation,s3://results/customer3/documentation/docs/ diff --git a/src/agentic_platform/agent/code_transform/examples/kubernetes-job.yaml b/src/agentic_platform/agent/code_transform/examples/kubernetes-job.yaml new file mode 100644 index 0000000..c16ea7f --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/kubernetes-job.yaml @@ -0,0 +1,143 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: atx-container-test-runner + namespace: default + labels: + app: atx-runner + version: v1 +spec: + # Number of times to retry on failure + backoffLimit: 2 + + # Clean up completed jobs after 1 hour + ttlSecondsAfterFinished: 3600 + + template: + metadata: + labels: + app: atx-runner + spec: + restartPolicy: Never + + # Service account with S3 access (using IRSA - IAM Roles for Service Accounts) + serviceAccountName: atx-runner-sa + + containers: + - name: atx-runner + image: .dkr.ecr..amazonaws.com/atx-test-runner:latest + imagePullPolicy: Always + + # Command to run + command: + - "/usr/local/bin/atx-orchestrator.sh" + args: + - "--csv-file" + - "s3://my-source-bucket/repos.csv" + - "--mode" + - "parallel" + - "--max-jobs" + - "5" + + # Resource limits + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + + # Environment variables + env: + - name: AWS_REGION + value: "us-east-1" + - name: SOURCE_BUCKET + value: "my-source-bucket" + - name: RESULTS_BUCKET + value: "my-results-bucket" + + # Volume mounts (optional - for local CSV files) + volumeMounts: + - name: config + mountPath: /config + readOnly: true + + # Volumes (optional) + volumes: + - name: config + configMap: + name: atx-runner-config + +--- +# ConfigMap for CSV configuration (optional - if not using S3) +apiVersion: v1 +kind: ConfigMap +metadata: + name: atx-runner-config + namespace: default +data: + repos.csv: | + s3_path,build_command,transformation_name,output_s3_path + s3://source-bucket/customer1/folder1/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer1/folder1/ + s3://source-bucket/customer1/folder2/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer1/folder2/ + +--- +# Service Account with IAM role annotation (IRSA) +apiVersion: v1 +kind: ServiceAccount +metadata: + name: atx-runner-sa + namespace: default + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam:::role/atx-runner-task-role + +--- +# CronJob example for scheduled execution +apiVersion: batch/v1 +kind: CronJob +metadata: + name: atx-runner-scheduled + namespace: default +spec: + # Run every day at 2 AM + schedule: "0 2 * * *" + + # Keep last 3 successful and 1 failed job + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + + jobTemplate: + spec: + backoffLimit: 2 + ttlSecondsAfterFinished: 3600 + template: + metadata: + labels: + app: atx-runner + spec: + restartPolicy: Never + serviceAccountName: atx-runner-sa + containers: + - name: atx-runner + image: .dkr.ecr..amazonaws.com/atx-test-runner:latest + imagePullPolicy: Always + command: + - "/usr/local/bin/atx-orchestrator.sh" + args: + - "--csv-file" + - "s3://my-source-bucket/repos.csv" + - "--mode" + - "parallel" + - "--max-jobs" + - "5" + resources: + requests: + memory: "2Gi" + cpu: "1000m" + limits: + memory: "4Gi" + cpu: "2000m" + env: + - name: AWS_REGION + value: "us-east-1" diff --git a/src/agentic_platform/agent/code_transform/examples/menu-folders.csv b/src/agentic_platform/agent/code_transform/examples/menu-folders.csv new file mode 100644 index 0000000..681db53 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/menu-folders.csv @@ -0,0 +1,11 @@ +# Example: Menu-Based Folder Structure +# Use case: Processing Progress code organized by menu structure +# Note: "menu" and "folder" are used interchangeably by customers + +s3_path,build_command,transformation_name,output_s3_path +s3://progress-code/customer-xyz/menu-main/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-main/ +s3://progress-code/customer-xyz/menu-accounting/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-accounting/ +s3://progress-code/customer-xyz/menu-inventory/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-inventory/ +s3://progress-code/customer-xyz/menu-sales/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-sales/ +s3://progress-code/customer-xyz/menu-reports/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-reports/ +s3://progress-code/customer-xyz/menu-utilities/,noop,Comprehensive-Codebase-Analysis,s3://progress-results/customer-xyz/menu-utilities/ diff --git a/src/agentic_platform/agent/code_transform/examples/multi-customer.csv b/src/agentic_platform/agent/code_transform/examples/multi-customer.csv new file mode 100644 index 0000000..53286e6 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/multi-customer.csv @@ -0,0 +1,12 @@ +# Example: Multiple Customers with Various Folder Structures +# Use case: Processing code from multiple customers in a single batch +# This demonstrates handling different customers with different folder organizations + +s3_path,build_command,transformation_name,output_s3_path +s3://source-bucket/customer-alpha/main-app/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-alpha/main-app/ +s3://source-bucket/customer-alpha/reports/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-alpha/reports/ +s3://source-bucket/customer-beta/core/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-beta/core/ +s3://source-bucket/customer-beta/modules/billing/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-beta/modules/billing/ +s3://source-bucket/customer-beta/modules/crm/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-beta/modules/crm/ +s3://source-bucket/customer-gamma/legacy/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-gamma/legacy/ +s3://source-bucket/customer-gamma/modernized/,noop,Comprehensive-Codebase-Analysis,s3://results-bucket/customer-gamma/modernized/ diff --git a/src/agentic_platform/agent/code_transform/examples/nested-structure.csv b/src/agentic_platform/agent/code_transform/examples/nested-structure.csv new file mode 100644 index 0000000..2bba02e --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/nested-structure.csv @@ -0,0 +1,10 @@ +# Example: Deeply Nested Folder Structure +# Use case: Processing code organized in a hierarchical structure +# This shows how to handle nested folder paths in S3 + +s3_path,build_command,transformation_name,output_s3_path +s3://code-archive/2024/q1/customer1/project-a/,noop,Comprehensive-Codebase-Analysis,s3://analysis-results/2024/q1/customer1/project-a/ +s3://code-archive/2024/q1/customer1/project-b/,noop,Comprehensive-Codebase-Analysis,s3://analysis-results/2024/q1/customer1/project-b/ +s3://code-archive/2024/q2/customer1/project-a/,noop,Comprehensive-Codebase-Analysis,s3://analysis-results/2024/q2/customer1/project-a/ +s3://code-archive/2024/q2/customer2/migration/phase1/,noop,Comprehensive-Codebase-Analysis,s3://analysis-results/2024/q2/customer2/migration/phase1/ +s3://code-archive/2024/q2/customer2/migration/phase2/,noop,Comprehensive-Codebase-Analysis,s3://analysis-results/2024/q2/customer2/migration/phase2/ diff --git a/src/agentic_platform/agent/code_transform/examples/sample-repos.csv b/src/agentic_platform/agent/code_transform/examples/sample-repos.csv new file mode 100644 index 0000000..934ffe2 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/sample-repos.csv @@ -0,0 +1,13 @@ +# QAD Sample CSV Configuration for ATX Container Test Runner +# This file demonstrates the CSV format for specifying S3 folders to process +# +# Format: s3_path,build_command,transformation_name,output_s3_path +# - s3_path: S3 URI to folder containing Progress code files +# - build_command: Build command (typically 'noop' for Progress analysis) +# - transformation_name: ATX transformation to apply +# - output_s3_path: S3 URI where results should be uploaded +# +# Note: "folder" and "menu" are used interchangeably - both refer to directories containing code + +s3_path,build_command,transformation_name,output_s3_path +s3://atx-test-source-438227048955/customer-abc/menu1/,noop,spuragu-progress-to-ir,s3://atx-test-results-438227048955/customer-abc/menu1/ \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/examples/single-customer.csv b/src/agentic_platform/agent/code_transform/examples/single-customer.csv new file mode 100644 index 0000000..c89ae37 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/examples/single-customer.csv @@ -0,0 +1,10 @@ +# Example: Single Customer with Multiple Folders +# Use case: Processing all folders for a single customer +# This example shows a typical scenario where one customer has multiple code folders + +s3_path,build_command,transformation_name,output_s3_path +s3://my-source-bucket/acme-corp/accounting/,noop,Comprehensive-Codebase-Analysis,s3://my-results-bucket/acme-corp/accounting/ +s3://my-source-bucket/acme-corp/inventory/,noop,Comprehensive-Codebase-Analysis,s3://my-results-bucket/acme-corp/inventory/ +s3://my-source-bucket/acme-corp/sales/,noop,Comprehensive-Codebase-Analysis,s3://my-results-bucket/acme-corp/sales/ +s3://my-source-bucket/acme-corp/reporting/,noop,Comprehensive-Codebase-Analysis,s3://my-results-bucket/acme-corp/reporting/ +s3://my-source-bucket/acme-corp/utilities/,noop,Comprehensive-Codebase-Analysis,s3://my-results-bucket/acme-corp/utilities/ diff --git a/src/agentic_platform/agent/code_transform/gitlab-ci-policy.json b/src/agentic_platform/agent/code_transform/gitlab-ci-policy.json new file mode 100644 index 0000000..54f6d04 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/gitlab-ci-policy.json @@ -0,0 +1,280 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "CloudFormationAccess", + "Effect": "Allow", + "Action": [ + "cloudformation:CreateStack", + "cloudformation:UpdateStack", + "cloudformation:DeleteStack", + "cloudformation:DescribeStacks", + "cloudformation:DescribeStackEvents", + "cloudformation:DescribeStackResources", + "cloudformation:ValidateTemplate", + "cloudformation:GetTemplate", + "cloudformation:ListStacks" + ], + "Resource": [ + "arn:aws:cloudformation:*:*:stack/atx-test-runner/*", + "arn:aws:cloudformation:*:*:stack/atx-test-runner" + ] + }, + { + "Sid": "CloudFormationTemplateValidation", + "Effect": "Allow", + "Action": [ + "cloudformation:ValidateTemplate" + ], + "Resource": "*" + }, + { + "Sid": "ECRAccess", + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:PutImage", + "ecr:CreateRepository", + "ecr:DescribeRepositories", + "ecr:DescribeImages", + "ecr:ListImages", + "ecr:TagResource" + ], + "Resource": [ + "arn:aws:ecr:*:*:repository/atx-test-runner", + "arn:aws:ecr:*:*:repository/atx-test-runner/*" + ] + }, + { + "Sid": "ECRAuthToken", + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + { + "Sid": "ECSAccess", + "Effect": "Allow", + "Action": [ + "ecs:CreateCluster", + "ecs:DescribeClusters", + "ecs:UpdateCluster", + "ecs:DeleteCluster", + "ecs:CreateService", + "ecs:DescribeServices", + "ecs:UpdateService", + "ecs:DeleteService", + "ecs:RegisterTaskDefinition", + "ecs:DescribeTaskDefinition", + "ecs:DeregisterTaskDefinition", + "ecs:RunTask", + "ecs:DescribeTasks", + "ecs:StopTask", + "ecs:ListTasks", + "ecs:TagResource", + "ecs:UntagResource" + ], + "Resource": [ + "arn:aws:ecs:*:*:cluster/atx-test-runner-*", + "arn:aws:ecs:*:*:service/atx-test-runner-*/*", + "arn:aws:ecs:*:*:task-definition/atx-test-runner-*:*", + "arn:aws:ecs:*:*:task/atx-test-runner-*/*" + ] + }, + { + "Sid": "EC2NetworkingAccess", + "Effect": "Allow", + "Action": [ + "ec2:CreateVpc", + "ec2:DeleteVpc", + "ec2:DescribeVpcs", + "ec2:ModifyVpcAttribute", + "ec2:CreateSubnet", + "ec2:DeleteSubnet", + "ec2:DescribeSubnets", + "ec2:ModifySubnetAttribute", + "ec2:CreateInternetGateway", + "ec2:DeleteInternetGateway", + "ec2:DescribeInternetGateways", + "ec2:AttachInternetGateway", + "ec2:DetachInternetGateway", + "ec2:CreateRouteTable", + "ec2:DeleteRouteTable", + "ec2:DescribeRouteTables", + "ec2:CreateRoute", + "ec2:DeleteRoute", + "ec2:AssociateRouteTable", + "ec2:DisassociateRouteTable", + "ec2:CreateSecurityGroup", + "ec2:DeleteSecurityGroup", + "ec2:DescribeSecurityGroups", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:AuthorizeSecurityGroupEgress", + "ec2:RevokeSecurityGroupIngress", + "ec2:RevokeSecurityGroupEgress", + "ec2:CreateTags", + "ec2:DescribeTags", + "ec2:DescribeAvailabilityZones" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "aws:RequestedRegion": ["us-east-1", "us-west-2", "eu-west-1"] + } + } + }, + { + "Sid": "IAMRoleAccess", + "Effect": "Allow", + "Action": [ + "iam:CreateRole", + "iam:DeleteRole", + "iam:GetRole", + "iam:UpdateRole", + "iam:AttachRolePolicy", + "iam:DetachRolePolicy", + "iam:PutRolePolicy", + "iam:DeleteRolePolicy", + "iam:GetRolePolicy", + "iam:ListRolePolicies", + "iam:ListAttachedRolePolicies", + "iam:PassRole", + "iam:TagRole", + "iam:UntagRole" + ], + "Resource": [ + "arn:aws:iam::*:role/atx-test-runner-*", + "arn:aws:iam::*:role/ecsTaskExecutionRole*", + "arn:aws:iam::*:role/ECS*" + ] + }, + { + "Sid": "IAMPolicyAccess", + "Effect": "Allow", + "Action": [ + "iam:CreatePolicy", + "iam:DeletePolicy", + "iam:GetPolicy", + "iam:GetPolicyVersion", + "iam:ListPolicyVersions" + ], + "Resource": [ + "arn:aws:iam::*:policy/atx-test-runner-*" + ] + }, + { + "Sid": "CloudWatchLogsAccess", + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:DeleteLogGroup", + "logs:DescribeLogGroups", + "logs:PutRetentionPolicy", + "logs:TagLogGroup", + "logs:UntagLogGroup", + "logs:CreateLogStream", + "logs:DescribeLogStreams", + "logs:PutLogEvents" + ], + "Resource": [ + "arn:aws:logs:*:*:log-group:/ecs/atx-test-runner-*", + "arn:aws:logs:*:*:log-group:/ecs/atx-test-runner-*:*" + ] + }, + { + "Sid": "S3BucketAccess", + "Effect": "Allow", + "Action": [ + "s3:CreateBucket", + "s3:DeleteBucket", + "s3:GetBucketLocation", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:GetBucketEncryption", + "s3:PutBucketEncryption", + "s3:GetBucketPublicAccessBlock", + "s3:PutBucketPublicAccessBlock", + "s3:GetBucketTagging", + "s3:PutBucketTagging", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::atx-test-*", + "arn:aws:s3:::*-atx-test-*" + ] + }, + { + "Sid": "S3ObjectAccess", + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:GetObjectVersion", + "s3:DeleteObjectVersion" + ], + "Resource": [ + "arn:aws:s3:::atx-test-*/*", + "arn:aws:s3:::*-atx-test-*/*" + ] + }, + { + "Sid": "ApplicationAutoScalingAccess", + "Effect": "Allow", + "Action": [ + "application-autoscaling:RegisterScalableTarget", + "application-autoscaling:DeregisterScalableTarget", + "application-autoscaling:DescribeScalableTargets", + "application-autoscaling:PutScalingPolicy", + "application-autoscaling:DeleteScalingPolicy", + "application-autoscaling:DescribeScalingPolicies" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "application-autoscaling:service-namespace": "ecs" + } + } + }, + { + "Sid": "LoadBalancerAccess", + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:DescribeLoadBalancers", + "elasticloadbalancing:CreateTargetGroup", + "elasticloadbalancing:DeleteTargetGroup", + "elasticloadbalancing:DescribeTargetGroups", + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:DescribeListeners", + "elasticloadbalancing:ModifyLoadBalancerAttributes", + "elasticloadbalancing:ModifyTargetGroupAttributes", + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/Project": "atx-test-runner" + } + } + }, + { + "Sid": "STSAccess", + "Effect": "Allow", + "Action": [ + "sts:GetCallerIdentity" + ], + "Resource": "*" + } + ] +} \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/scripts/atx-orchestrator.sh b/src/agentic_platform/agent/code_transform/scripts/atx-orchestrator.sh new file mode 100755 index 0000000..56e0df7 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/atx-orchestrator.sh @@ -0,0 +1,1345 @@ +#!/bin/bash +# ATX Orchestration Script +# Orchestrates batch processing of ATX transformations on S3-stored code folders +# +# This script: +# - Parses CSV configuration with S3 paths +# - Downloads folders from S3 +# - Executes ATX transformations +# - Uploads results back to S3 +# - Supports both serial and parallel execution modes +# +# Requirements: 1.3, 1.4, 1.5, 2.5, 4.1, 4.2, 4.3, 4.4, 6.1, 6.2, 6.3, 6.4 + +set -euo pipefail + +####################################### +# Script Configuration +####################################### +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT_NAME=$(basename "$0") + +# Source required scripts +source "$SCRIPT_DIR/s3-integration.sh" +source "$SCRIPT_DIR/csv-parser.sh" + +####################################### +# Default Configuration +####################################### +DEFAULT_OUTPUT_DIR="./orchestrator_results" +DEFAULT_TEMP_BASE="/tmp/atx-orchestrator" +DEFAULT_MAX_PARALLEL_JOBS=4 +DEFAULT_LOG_LEVEL="INFO" +DEFAULT_ATX_TIMEOUT=3600 # 1 hour per transformation +DEFAULT_PRESERVE_ON_FAILURE=true # Preserve workspace and logs on failure + +####################################### +# Global Variables +####################################### +CSV_FILE="" +OUTPUT_DIR="$DEFAULT_OUTPUT_DIR" +TEMP_BASE="$DEFAULT_TEMP_BASE" +EXECUTION_MODE="serial" +MAX_PARALLEL_JOBS="$DEFAULT_MAX_PARALLEL_JOBS" +LOG_LEVEL="$DEFAULT_LOG_LEVEL" +ATX_TIMEOUT="$DEFAULT_ATX_TIMEOUT" +VERBOSE=false +QUIET=false +SMOKE_TEST=false +DRY_RUN=false +PRESERVE_ON_FAILURE="$DEFAULT_PRESERVE_ON_FAILURE" + +# Failure preservation tracking +declare -a FAILED_WORKSPACES=() +declare -a FAILED_LOGS=() + +# Results tracking +declare -a PROCESSING_RESULTS=() +declare -a PROCESSING_PIDS=() +TOTAL_PROCESSED=0 +TOTAL_SUCCESS=0 +TOTAL_FAILED=0 + +# Timing +START_TIME=$(date +%s) + +####################################### +# Usage and Help +####################################### + +usage() { + cat << EOF +ATX Orchestration Script + +USAGE: + $SCRIPT_NAME [OPTIONS] --csv-file + +REQUIRED: + --csv-file CSV file with S3 paths and transformation configs + +OPTIONS: + --output-dir Output directory for results (default: $DEFAULT_OUTPUT_DIR) + --temp-base Base directory for temp files (default: $DEFAULT_TEMP_BASE) + --mode Execution mode (default: serial) + --max-jobs Max parallel jobs (default: $DEFAULT_MAX_PARALLEL_JOBS) + --atx-timeout Timeout per ATX transformation (default: $DEFAULT_ATX_TIMEOUT) + --log-level Log level: DEBUG, INFO, WARN, ERROR (default: $DEFAULT_LOG_LEVEL) + --verbose Enable verbose output + --quiet Suppress non-essential output + --smoke-test Run smoke test only + --dry-run Show what would be executed without running + --preserve-failures Preserve workspace and logs on failure (default: true) + --no-preserve-failures Clean up workspace even on failure + --help Show this help message + +CSV FORMAT: + s3_path,build_command,transformation_name,output_s3_path + + - s3_path: S3 path to folder with code (e.g., s3://bucket/customer1/folder1/) + - build_command: Build command (typically "noop" for Progress analysis) + - transformation_name: ATX transformation to apply + - output_s3_path: S3 path for results (optional, defaults to results bucket) + +EXAMPLES: + # Serial execution + $SCRIPT_NAME --csv-file repos.csv + + # Parallel execution with 8 jobs + $SCRIPT_NAME --csv-file repos.csv --mode parallel --max-jobs 8 + + # Verbose mode with custom output directory + $SCRIPT_NAME --csv-file repos.csv --verbose --output-dir ./my_results + + # Smoke test to verify container setup + $SCRIPT_NAME --smoke-test + + # Dry run to see what would be executed + $SCRIPT_NAME --csv-file repos.csv --dry-run + +ENVIRONMENT VARIABLES: + LOG_LEVEL Override default log level + TEMP_BASE_DIR Override temp base directory + MAX_RETRIES S3 operation retry count (default: 3) + RETRY_DELAY Delay between retries in seconds (default: 5) + +EOF +} + +####################################### +# Logging Functions +####################################### + +# Enhanced logging with quiet mode support +log_info() { + if [[ "$QUIET" != true ]]; then + log "INFO" "$@" + fi +} + +log_success() { + if [[ "$QUIET" != true ]]; then + echo -e "\033[0;32m[SUCCESS]\033[0m $*" >&2 + fi +} + +log_progress() { + if [[ "$QUIET" != true && "$VERBOSE" == true ]]; then + echo -e "\033[0;36m[PROGRESS]\033[0m $*" >&2 + fi +} + +####################################### +# Argument Parsing +####################################### + +parse_arguments() { + # Filter out script path if it appears as first argument (container issue workaround) + if [[ $# -gt 0 && "$1" == "/usr/local/bin/atx-orchestrator.sh" ]]; then + shift + fi + + while [[ $# -gt 0 ]]; do + case "$1" in + --csv-file) + CSV_FILE="$2" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="$2" + shift 2 + ;; + --temp-base) + TEMP_BASE="$2" + shift 2 + ;; + --mode) + EXECUTION_MODE="$2" + if [[ "$EXECUTION_MODE" != "serial" && "$EXECUTION_MODE" != "parallel" ]]; then + die "Invalid execution mode: $EXECUTION_MODE. Must be 'serial' or 'parallel'" 1 + fi + shift 2 + ;; + --max-jobs) + MAX_PARALLEL_JOBS="$2" + if ! [[ "$MAX_PARALLEL_JOBS" =~ ^[1-9][0-9]*$ ]]; then + die "Invalid --max-jobs value: $MAX_PARALLEL_JOBS. Must be a positive integer" 1 + fi + shift 2 + ;; + --atx-timeout) + ATX_TIMEOUT="$2" + if ! [[ "$ATX_TIMEOUT" =~ ^[0-9]+$ ]]; then + die "Invalid --atx-timeout value: $ATX_TIMEOUT. Must be a non-negative integer" 1 + fi + shift 2 + ;; + --log-level) + LOG_LEVEL="$2" + shift 2 + ;; + --verbose) + VERBOSE=true + LOG_LEVEL="DEBUG" + shift + ;; + --quiet) + QUIET=true + LOG_LEVEL="ERROR" + shift + ;; + --smoke-test) + SMOKE_TEST=true + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --preserve-failures) + PRESERVE_ON_FAILURE=true + shift + ;; + --no-preserve-failures) + PRESERVE_ON_FAILURE=false + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "Unknown option: $1" 1 + ;; + esac + done + + # Validate required arguments (unless smoke test) + if [[ "$SMOKE_TEST" != true ]]; then + if [[ -z "$CSV_FILE" ]]; then + die "Missing required argument: --csv-file" 1 + fi + + # Handle S3 CSV files by downloading them locally + if [[ "$CSV_FILE" =~ ^s3:// ]]; then + log_info "Downloading CSV file from S3: $CSV_FILE" + local local_csv="/tmp/$(basename "$CSV_FILE")" + if ! aws s3 cp "$CSV_FILE" "$local_csv"; then + die "Failed to download CSV file from S3: $CSV_FILE" 1 + fi + CSV_FILE="$local_csv" + log_info "CSV file downloaded to: $CSV_FILE" + elif [[ ! -f "$CSV_FILE" ]]; then + die "CSV file not found: $CSV_FILE" 1 + fi + fi +} + +####################################### +# Initialization +####################################### + +initialize_environment() { + log_info "Initializing orchestration environment" + + # Export log level for sourced scripts + export LOG_LEVEL="$LOG_LEVEL" + export TEMP_BASE_DIR="$TEMP_BASE" + + # Create output directory + mkdir -p "$OUTPUT_DIR" + + # Create temp base directory + mkdir -p "$TEMP_BASE" + + # Initialize results file + local results_file="$OUTPUT_DIR/results.json" + echo '{"summary":{},"tests":[]}' > "$results_file" + + # Initialize summary log with detailed configuration + local summary_log="$OUTPUT_DIR/summary.log" + { + echo "========================================" + echo "ATX Container Test Execution Summary" + echo "========================================" + echo "" + echo "CONFIGURATION" + echo "-------------" + echo "Started at: $(date '+%Y-%m-%d %H:%M:%S %Z')" + echo "Start Timestamp: $START_TIME" + echo "CSV file: $CSV_FILE" + echo "Execution mode: $EXECUTION_MODE" + echo "Max parallel jobs: $MAX_PARALLEL_JOBS" + echo "ATX timeout: ${ATX_TIMEOUT}s" + echo "Log level: $LOG_LEVEL" + echo "Output directory: $OUTPUT_DIR" + echo "Temp base directory: $TEMP_BASE" + echo "Verbose: $VERBOSE" + echo "Quiet: $QUIET" + echo "Dry run: $DRY_RUN" + echo "" + echo "ENVIRONMENT" + echo "-----------" + echo "Hostname: $(hostname)" + echo "User: $(whoami)" + echo "Working directory: $(pwd)" + echo "ATX version: $(atx --version 2>&1 || echo 'not found')" + echo "AWS CLI version: $(aws --version 2>&1 || echo 'not found')" + echo "" + echo "EXECUTION LOG" + echo "-------------" + echo "" + } > "$summary_log" + + log_info "Environment initialized successfully" +} + + +####################################### +# Single Folder Processing Function +# Requirements: 1.3, 1.4, 1.5 +####################################### + +process_single_folder() { + local s3_path="$1" + local build_command="$2" + local transformation_name="$3" + local output_s3_path="$4" + local folder_index="$5" + + local folder_name=$(basename "$s3_path") + local folder_id="${folder_name}_${folder_index}" + local start_time=$(date +%s) + + log_info "Processing folder $folder_index: $folder_name" + log_debug "S3 path: $s3_path" + log_debug "Build command: $build_command" + log_debug "Transformation: $transformation_name" + log_debug "Output S3 path: $output_s3_path" + + # Create workspace for this folder + local workspace="$TEMP_BASE/workspace_${folder_id}" + local results_dir="$TEMP_BASE/results_${folder_id}" + # Ensure OUTPUT_DIR is absolute path + local abs_output_dir + if [[ "$OUTPUT_DIR" = /* ]]; then + abs_output_dir="$OUTPUT_DIR" + else + abs_output_dir="$(pwd)/$OUTPUT_DIR" + fi + + local log_file="$abs_output_dir/${folder_id}_execution.log" + + mkdir -p "$workspace" + mkdir -p "$results_dir" + mkdir -p "$abs_output_dir" + + # Debug log file creation + log_debug "Creating log file: $log_file" + log_debug "Working directory: $(pwd)" + log_debug "Output directory: $abs_output_dir" + + # Initialize per-folder execution log with detailed metadata + { + echo "========================================" + echo "ATX Transformation Execution Log" + echo "========================================" + echo "" + echo "METADATA" + echo "--------" + echo "Folder ID: $folder_id" + echo "Folder Name: $folder_name" + echo "Folder Index: $folder_index" + echo "S3 Source Path: $s3_path" + echo "S3 Output Path: $output_s3_path" + echo "Transformation: $transformation_name" + echo "Build Command: $build_command" + echo "Started: $(date '+%Y-%m-%d %H:%M:%S %Z')" + echo "Start Timestamp: $start_time" + echo "Workspace: $workspace" + echo "Results Directory: $results_dir" + echo "Log File: $log_file" + echo "" + echo "EXECUTION LOG" + echo "-------------" + echo "" + } > "$log_file" 2>&1 + + # Verify log file was created + if [[ ! -f "$log_file" ]]; then + log_error "Failed to create log file: $log_file" + log_error "Directory permissions: $(ls -ld "$abs_output_dir" 2>/dev/null || echo 'Directory does not exist')" + # Create a fallback log file in /tmp + log_file="/tmp/${folder_id}_execution.log" + log_warn "Using fallback log file: $log_file" + { + echo "========================================" + echo "ATX Transformation Execution Log (FALLBACK)" + echo "========================================" + echo "Original log file failed: $abs_output_dir/${folder_id}_execution.log" + echo "" + } > "$log_file" + fi + + local exit_code=0 + local status="SUCCESS" + local message="Transformation completed" + + # Step 1: Download folder from S3 + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] STEP 1: Downloading folder from S3" + echo "Source: $s3_path" + echo "Destination: $workspace" + } >> "$log_file" + + log_progress "Downloading folder from S3: $s3_path" + + if ! s3_download "$s3_path" "$workspace" >> "$log_file" 2>&1; then + exit_code=$? + status="FAILED" + message="S3 download failed (exit code: $exit_code)" + log_error "Failed to download folder from S3: $s3_path" + + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: S3 download failed" + echo "Exit code: $exit_code" + echo "Status: $status" + } >> "$log_file" + else + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: S3 download completed" + } >> "$log_file" + + # Initialize Git repository (required by ATX) + { + echo "" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] STEP 1.5: Initializing Git repository" + echo "ATX requires the code repository to be managed by Git" + } >> "$log_file" + + log_progress "Initializing Git repository in workspace" + + if cd "$workspace" && git init --quiet && git config user.name "ATX Orchestrator" && git config user.email "atx@example.com" && git add . && git commit -m "Initial commit for ATX transformation" --quiet; then + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: Git repository initialized" + } >> "$log_file" + else + exit_code=$? + status="FAILED" + message="Git initialization failed (exit code: $exit_code)" + log_error "Failed to initialize Git repository in workspace: $workspace" + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Git initialization failed" + echo "Exit code: $exit_code" + } >> "$log_file" + fi + fi + + # Step 2: Execute ATX transformation (if download and git init succeeded) + if [[ $exit_code -eq 0 ]]; then + { + echo "" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] STEP 2: Executing ATX transformation" + echo "Transformation: $transformation_name" + echo "Build command: $build_command" + echo "Workspace: $workspace" + echo "Timeout: ${ATX_TIMEOUT}s" + } >> "$log_file" + + log_progress "Executing ATX transformation: $transformation_name" + + if [[ "$DRY_RUN" == true ]]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] DRY RUN: Would execute ATX transformation" >> "$log_file" + log_info "DRY RUN: Skipping actual ATX execution" + else + # Execute ATX transformation with timeout + local atx_cmd="atx custom def exec" + atx_cmd+=" --code-repository-path \"$workspace\"" + atx_cmd+=" --transformation-name \"$transformation_name\"" + atx_cmd+=" --build-command \"$build_command\"" + atx_cmd+=" --non-interactive" + atx_cmd+=" --trust-all-tools" + + { + echo "" + echo "Command: $atx_cmd" + echo "----------------------------------------" + } >> "$log_file" + + # Use timeout if available + local timeout_cmd="" + if command -v timeout &> /dev/null; then + timeout_cmd="timeout $ATX_TIMEOUT" + fi + + # Execute and capture exit code with detailed error handling + local atx_start=$(date +%s) + local atx_output + if atx_output=$(eval "$timeout_cmd $atx_cmd" 2>&1); then + local atx_end=$(date +%s) + local atx_duration=$((atx_end - atx_start)) + + echo "$atx_output" >> "$log_file" + { + echo "----------------------------------------" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: ATX transformation completed" + echo "Duration: ${atx_duration}s" + } >> "$log_file" + + log_success "ATX transformation completed for folder: $folder_name (${atx_duration}s)" + + # Copy ATX output to results directory + if [[ -d "$workspace" ]]; then + local files_copied=0 + files_copied=$(find "$workspace" -name "*.md" -exec cp {} "$results_dir/" \; -print 2>/dev/null | wc -l) + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Copied $files_copied markdown files to results" >> "$log_file" + + files_copied=$(find "$workspace" -name "*.log" -exec cp {} "$results_dir/" \; -print 2>/dev/null | wc -l) + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Copied $files_copied log files to results" >> "$log_file" + + files_copied=$(find "$workspace" -name "*.json" -exec cp {} "$results_dir/" \; -print 2>/dev/null | wc -l) + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Copied $files_copied JSON files to results" >> "$log_file" + fi + else + exit_code=$? + local atx_end=$(date +%s) + local atx_duration=$((atx_end - atx_start)) + + echo "$atx_output" >> "$log_file" + { + echo "----------------------------------------" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] FAILED: ATX transformation failed" + echo "Duration: ${atx_duration}s" + } >> "$log_file" + + status="FAILED" + + # Detailed error analysis + if [[ $exit_code -eq 124 ]]; then + message="ATX execution timeout after ${ATX_TIMEOUT}s" + log_error "ATX execution timeout for folder: $folder_name" + log_error "Transformation exceeded maximum time limit of ${ATX_TIMEOUT}s" + log_error "Consider increasing --atx-timeout or checking for infinite loops" + echo "ERROR: Timeout after ${ATX_TIMEOUT}s" >> "$log_file" + elif [[ $exit_code -eq 127 ]]; then + message="ATX command not found" + log_error "ATX CLI not found or not in PATH" + log_error "Verify ATX installation: atx --version" + echo "ERROR: ATX command not found" >> "$log_file" + elif [[ $exit_code -eq 126 ]]; then + message="ATX command not executable" + log_error "ATX CLI found but not executable" + log_error "Check file permissions on ATX binary" + echo "ERROR: ATX not executable" >> "$log_file" + else + message="ATX execution failed (exit code: $exit_code)" + log_error "ATX execution failed for folder: $folder_name (exit code: $exit_code)" + + # Analyze error output for common issues + if echo "$atx_output" | grep -qi "permission denied"; then + log_error "Permission denied - check file/directory permissions" + message="$message - Permission denied" + elif echo "$atx_output" | grep -qi "out of memory\|cannot allocate memory"; then + log_error "Out of memory - transformation requires more RAM" + log_error "Consider increasing container memory limits" + message="$message - Out of memory" + elif echo "$atx_output" | grep -qi "no such file\|file not found"; then + log_error "File not found - check code repository structure" + message="$message - File not found" + elif echo "$atx_output" | grep -qi "syntax error\|parse error"; then + log_error "Syntax/parse error in source code" + message="$message - Parse error" + elif echo "$atx_output" | grep -qi "network\|connection"; then + log_error "Network error during transformation" + log_error "Check internet connectivity if transformation requires external resources" + message="$message - Network error" + fi + + echo "ERROR: Exit code $exit_code" >> "$log_file" + echo "See log file for details: $log_file" >> "$log_file" + fi + + # Preserve workspace for debugging on failure + log_warn "Preserving workspace for debugging: $workspace" + echo "Workspace preserved at: $workspace" >> "$log_file" + fi + fi + fi + + # Step 3: Upload results to S3 (even if transformation failed, upload logs) + if [[ "$DRY_RUN" != true ]]; then + { + echo "" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] STEP 3: Uploading results to S3" + } >> "$log_file" + + log_progress "Uploading results to S3: $output_s3_path" + + # Copy execution log to results directory + cp "$log_file" "$results_dir/" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Execution log copied to results directory" >> "$log_file" + + # Determine output S3 path (use default if not specified) + local final_output_path="$output_s3_path" + if [[ -z "$final_output_path" ]]; then + # Default: mirror source structure in results bucket + final_output_path="${s3_path/source/results}" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using default output path: $final_output_path" >> "$log_file" + fi + + { + echo "Destination: $final_output_path" + echo "Source: $results_dir" + } >> "$log_file" + + # Upload results + if ! s3_upload_results "$results_dir" "$final_output_path" >> "$log_file" 2>&1; then + log_warn "Failed to upload results to S3: $final_output_path" + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] WARNING: S3 upload failed" + } >> "$log_file" + + # Don't change status if transformation succeeded + if [[ "$status" == "SUCCESS" ]]; then + status="SUCCESS_WITH_WARNINGS" + message="Transformation completed but upload had issues" + fi + else + log_progress "Results uploaded successfully to: $final_output_path" + { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS: Results uploaded to S3" + } >> "$log_file" + fi + fi + + # Step 4: Cleanup temp files with failure preservation + { + echo "" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] STEP 4: Cleaning up temporary files" + } >> "$log_file" + + log_progress "Cleaning up temporary files" + + # Calculate duration for reporting + local end_time=$(date +%s) + local duration=$((end_time - start_time)) + + # Preserve workspace and logs on failure if configured + if [[ "$status" == "FAILED" && "$PRESERVE_ON_FAILURE" == true ]]; then + log_warn "Preserving workspace for debugging: $workspace" + + # Create a failure preservation directory + local failure_dir="$abs_output_dir/failures/${folder_id}" + mkdir -p "$failure_dir" + + # Copy workspace to failure directory for debugging + if [[ -d "$workspace" ]]; then + log_info "Copying workspace to failure directory: $failure_dir" + cp -r "$workspace" "$failure_dir/workspace" 2>/dev/null || true + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Workspace preserved at: $failure_dir/workspace" >> "$log_file" + fi + + # Copy execution log to failure directory + cp "$log_file" "$failure_dir/execution.log" 2>/dev/null || true + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Execution log preserved at: $failure_dir/execution.log" >> "$log_file" + + # Create a failure summary file + { + echo "FAILURE SUMMARY" + echo "===============" + echo "Folder: $folder_name" + echo "Folder ID: $folder_id" + echo "S3 Path: $s3_path" + echo "Transformation: $transformation_name" + echo "Status: $status" + echo "Message: $message" + echo "Exit Code: $exit_code" + echo "Duration: ${duration}s" + echo "" + echo "PRESERVED ARTIFACTS" + echo "===================" + echo "Workspace: $failure_dir/workspace" + echo "Execution log: $failure_dir/execution.log" + echo "Original workspace: $workspace" + echo "Original log: $log_file" + echo "" + echo "DEBUGGING STEPS" + echo "===============" + echo "1. Review execution log: cat $failure_dir/execution.log" + echo "2. Inspect workspace: ls -la $failure_dir/workspace" + echo "3. Check source files: find $failure_dir/workspace -type f" + echo "4. Manually run ATX: cd $failure_dir/workspace && atx custom def exec ..." + echo "5. Review S3 source: aws s3 ls $s3_path" + echo "" + } > "$failure_dir/README.txt" + + # Track failed workspace for summary + FAILED_WORKSPACES+=("$failure_dir") + FAILED_LOGS+=("$log_file") + + log_info "Failure artifacts preserved at: $failure_dir" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Failure artifacts preserved at: $failure_dir" >> "$log_file" + + # Still cleanup the original workspace to save space (we have a copy) + cleanup_safe "$workspace" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Original workspace cleaned up (copy preserved)" >> "$log_file" + elif [[ "$status" == "SUCCESS" || "$status" == "SUCCESS_WITH_WARNINGS" ]]; then + cleanup_safe "$workspace" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Workspace cleaned up: $workspace" >> "$log_file" + else + # Failure but preservation disabled + log_info "Cleaning up workspace (preservation disabled)" + cleanup_safe "$workspace" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Workspace cleaned up: $workspace" >> "$log_file" + fi + + cleanup_safe "$results_dir" + echo "[$(date '+%Y-%m-%d %H:%M:%S')] Results directory cleaned up: $results_dir" >> "$log_file" + + # Log completion with detailed status information + { + echo "" + echo "========================================" + echo "EXECUTION SUMMARY" + echo "========================================" + echo "Completed: $(date '+%Y-%m-%d %H:%M:%S %Z')" + echo "End Timestamp: $end_time" + echo "Duration: ${duration}s" + echo "Status Code: $exit_code" + echo "Status: $status" + echo "Message: $message" + echo "" + + # Add troubleshooting information for failures + if [[ "$status" == "FAILED" ]]; then + echo "TROUBLESHOOTING" + echo "---------------" + echo "1. Check the execution log above for error details" + echo "2. Verify S3 paths are correct and accessible" + echo "3. Ensure AWS credentials have proper permissions" + echo "4. Check ATX transformation name is valid" + echo "5. Review workspace contents: $workspace" + echo "" + echo "For S3 errors:" + echo " - Verify bucket exists and is accessible" + echo " - Check IAM permissions (s3:GetObject, s3:PutObject, s3:ListBucket)" + echo " - Ensure network connectivity to S3" + echo "" + echo "For ATX errors:" + echo " - Verify ATX CLI is installed: atx --version" + echo " - Check transformation name is correct" + echo " - Review source code for syntax errors" + echo " - Check available memory and disk space" + echo "" + fi + + echo "Log file location: $log_file" + echo "========================================" + } >> "$log_file" + + # Return result + echo "$status|$folder_name|$message|$duration|$log_file" + return $exit_code +} + +####################################### +# Serial Execution Mode +# Requirements: 2.5 +####################################### + +execute_serial() { + log_info "Starting serial execution mode" + + local queue_size=$(get_queue_size) + log_info "Processing $queue_size folders sequentially" + + local current=0 + + # Process each folder one at a time + while [[ $current -lt $queue_size ]]; do + local item=$(get_queue_item "$current") + + # Parse item + IFS='|' read -r s3_path build_cmd transform_name output_s3_path <<< "$item" + + log_info "Processing folder $((current + 1))/$queue_size" + + # Process folder and capture result + local result + if result=$(process_single_folder "$s3_path" "$build_cmd" "$transform_name" "$output_s3_path" "$current"); then + TOTAL_SUCCESS=$((TOTAL_SUCCESS + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + + TOTAL_PROCESSED=$((TOTAL_PROCESSED + 1)) + PROCESSING_RESULTS+=("$result") + + current=$((current + 1)) + done + + log_success "Serial execution completed: $TOTAL_PROCESSED folders processed" +} + +####################################### +# Parallel Execution Mode +# Requirements: 6.1, 6.2, 6.3, 6.4 +####################################### + +execute_parallel() { + log_info "Starting parallel execution mode (max $MAX_PARALLEL_JOBS jobs)" + + local queue_size=$(get_queue_size) + log_info "Processing $queue_size folders in parallel" + + local current=0 + local active_jobs=0 + + # Process folders with parallel job limit + while [[ $current -lt $queue_size ]] || [[ $active_jobs -gt 0 ]]; do + # Start new jobs up to max limit + while [[ $current -lt $queue_size ]] && [[ $active_jobs -lt $MAX_PARALLEL_JOBS ]]; do + local item=$(get_queue_item "$current") + + # Parse item + IFS='|' read -r s3_path build_cmd transform_name output_s3_path <<< "$item" + + log_info "Starting parallel job $((current + 1))/$queue_size" + + # Start background job and capture result + ( + local result=$(process_single_folder "$s3_path" "$build_cmd" "$transform_name" "$output_s3_path" "$current") + local exit_code=$? + + # Write result to temp file for parent process + echo "$result" > "$TEMP_BASE/result_${current}.txt" + echo "$exit_code" > "$TEMP_BASE/exitcode_${current}.txt" + ) & + + local pid=$! + PROCESSING_PIDS+=("$pid:$current") + + active_jobs=$((active_jobs + 1)) + current=$((current + 1)) + done + + # Wait for at least one job to complete + if [[ $active_jobs -gt 0 ]]; then + wait_for_job_completion + active_jobs=${#PROCESSING_PIDS[@]} + fi + done + + log_success "Parallel execution completed: $TOTAL_PROCESSED folders processed" +} + +# Wait for job completion in parallel mode +wait_for_job_completion() { + local new_pids=() + + # Handle empty array case + if [[ ${#PROCESSING_PIDS[@]} -eq 0 ]]; then + return + fi + + for pid_info in "${PROCESSING_PIDS[@]}"; do + IFS=':' read -r pid index <<< "$pid_info" + + if kill -0 "$pid" 2>/dev/null; then + # Job still running + new_pids+=("$pid_info") + else + # Job completed + wait "$pid" 2>/dev/null || true + + # Read result from job output files + local result_file="$TEMP_BASE/result_${index}.txt" + local exitcode_file="$TEMP_BASE/exitcode_${index}.txt" + + if [[ -f "$result_file" ]]; then + local result=$(cat "$result_file") + PROCESSING_RESULTS+=("$result") + rm -f "$result_file" + fi + + local job_exit_code=1 + if [[ -f "$exitcode_file" ]]; then + job_exit_code=$(cat "$exitcode_file") + rm -f "$exitcode_file" + fi + + TOTAL_PROCESSED=$((TOTAL_PROCESSED + 1)) + + if [[ $job_exit_code -eq 0 ]]; then + TOTAL_SUCCESS=$((TOTAL_SUCCESS + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + + log_progress "Job completed: $((TOTAL_PROCESSED)) of $(get_queue_size)" + fi + done + + # Handle empty new_pids array + if [[ ${#new_pids[@]} -gt 0 ]]; then + PROCESSING_PIDS=("${new_pids[@]}") + else + PROCESSING_PIDS=() + fi + + # If still at max capacity, sleep briefly + if [[ ${#PROCESSING_PIDS[@]} -ge $MAX_PARALLEL_JOBS ]]; then + sleep 1 + fi +} + + +####################################### +# Result Aggregation +# Requirements: 4.1, 4.2, 4.4, 6.4 +####################################### + +aggregate_results() { + log_info "Aggregating results from all processed folders" + + local end_time=$(date +%s) + local total_duration=$((end_time - START_TIME)) + + # Recalculate statistics from actual results (in case parallel mode didn't update counters correctly) + TOTAL_PROCESSED=${#PROCESSING_RESULTS[@]} + TOTAL_SUCCESS=0 + TOTAL_FAILED=0 + + for result in "${PROCESSING_RESULTS[@]}"; do + IFS='|' read -r status folder_name message duration log_file <<< "$result" + if [[ "$status" == "SUCCESS" || "$status" == "SUCCESS_WITH_WARNINGS" ]]; then + TOTAL_SUCCESS=$((TOTAL_SUCCESS + 1)) + else + TOTAL_FAILED=$((TOTAL_FAILED + 1)) + fi + done + + # Calculate statistics + local success_rate=0 + if [[ $TOTAL_PROCESSED -gt 0 ]]; then + success_rate=$(( (TOTAL_SUCCESS * 100) / TOTAL_PROCESSED )) + fi + + # Generate human-readable summary + local summary_log="$OUTPUT_DIR/summary.log" + { + echo "" + echo "EXECUTION SUMMARY" + echo "=================" + echo "Execution completed at: $(date)" + echo "Total wall time: ${total_duration}s" + echo "" + echo "STATISTICS TABLE" + echo "==================" + printf "%-25s | %-10s\n" "Metric" "Value" + printf "%-25s-+-%-10s\n" "-------------------------" "----------" + printf "%-25s | %-10s\n" "Total Folders" "$TOTAL_PROCESSED" + printf "%-25s | %-10s\n" "Successful" "$TOTAL_SUCCESS" + printf "%-25s | %-10s\n" "Failed" "$TOTAL_FAILED" + printf "%-25s | %-10s%%\n" "Success Rate" "$success_rate" + printf "%-25s | %-10s\n" "Execution Mode" "$EXECUTION_MODE" + printf "%-25s | %-10s\n" "Max Parallel Jobs" "$MAX_PARALLEL_JOBS" + echo "" + + if [[ $TOTAL_FAILED -gt 0 && ${#PROCESSING_RESULTS[@]} -gt 0 ]]; then + echo "FAILED FOLDERS" + echo "==============" + for result in "${PROCESSING_RESULTS[@]}"; do + IFS='|' read -r status folder_name message duration log_file <<< "$result" + if [[ "$status" == "FAILED" ]]; then + printf "%-30s | %-40s | %s\n" "$folder_name" "$message" "$log_file" + fi + done + echo "" + + # Add preserved failure information + if [[ ${#FAILED_WORKSPACES[@]} -gt 0 ]]; then + echo "PRESERVED FAILURE ARTIFACTS" + echo "============================" + echo "Failure preservation: $PRESERVE_ON_FAILURE" + echo "Number of preserved failures: ${#FAILED_WORKSPACES[@]}" + echo "" + echo "Preserved workspace locations:" + for workspace in "${FAILED_WORKSPACES[@]}"; do + echo " - $workspace" + done + echo "" + echo "To debug failures:" + echo " 1. Navigate to preserved workspace directory" + echo " 2. Review README.txt for debugging steps" + echo " 3. Inspect execution.log for error details" + echo " 4. Examine workspace/ directory for source files" + echo "" + fi + fi + + echo "DETAILED RESULTS" + echo "================" + printf "%-10s | %-30s | %-40s | %-10s\n" "Status" "Folder" "Message" "Duration(s)" + printf "%-10s-+-%-30s-+-%-40s-+-%-10s\n" "----------" "------------------------------" "----------------------------------------" "----------" + if [[ ${#PROCESSING_RESULTS[@]} -gt 0 ]]; then + for result in "${PROCESSING_RESULTS[@]}"; do + IFS='|' read -r status folder_name message duration log_file <<< "$result" + printf "%-10s | %-30s | %-40s | %-10s\n" "$status" "$folder_name" "$message" "$duration" + done + fi + echo "" + + echo "LOG FILES" + echo "=========" + echo "Summary log: $summary_log" + echo "Individual logs: $OUTPUT_DIR/*_execution.log" + echo "Results JSON: $OUTPUT_DIR/results.json" + echo "" + + } >> "$summary_log" + + # Generate machine-readable JSON report + generate_json_report "$total_duration" + + # Display summary to console + if [[ "$QUIET" != true ]]; then + echo "" + echo "==========================================" + echo "ORCHESTRATION COMPLETED" + echo "==========================================" + echo "Total folders: $TOTAL_PROCESSED" + echo "Successful: $TOTAL_SUCCESS" + echo "Failed: $TOTAL_FAILED" + echo "Success rate: ${success_rate}%" + echo "Total time: ${total_duration}s" + echo "" + echo "Full summary available at: $summary_log" + echo "==========================================" + fi +} + +# Generate JSON report +generate_json_report() { + local total_duration="$1" + local results_file="$OUTPUT_DIR/results.json" + + # Calculate success rate + local success_rate=0 + if [[ $TOTAL_PROCESSED -gt 0 ]]; then + success_rate=$(( (TOTAL_SUCCESS * 100) / TOTAL_PROCESSED )) + fi + + # Start JSON structure + { + echo "{" + echo " \"summary\": {" + echo " \"total\": $TOTAL_PROCESSED," + echo " \"successful\": $TOTAL_SUCCESS," + echo " \"failed\": $TOTAL_FAILED," + echo " \"success_rate\": $success_rate," + echo " \"execution_time\": $total_duration," + echo " \"wall_time\": $total_duration," + echo " \"execution_mode\": \"$EXECUTION_MODE\"," + echo " \"max_parallel_jobs\": $MAX_PARALLEL_JOBS," + echo " \"completed_at\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"" + echo " }," + echo " \"tests\": [" + + # Add test results + local first=true + if [[ ${#PROCESSING_RESULTS[@]} -gt 0 ]]; then + for result in "${PROCESSING_RESULTS[@]}"; do + IFS='|' read -r status folder_name message duration log_file <<< "$result" + + if [[ "$first" == true ]]; then + first=false + else + echo "," + fi + + echo -n " {" + echo -n "\"name\": \"$folder_name\", " + echo -n "\"status\": \"$status\", " + echo -n "\"duration\": $duration, " + echo -n "\"message\": \"$message\", " + echo -n "\"log_file\": \"$log_file\"" + echo -n "}" + done + fi + + echo "" + echo " ]" + echo "}" + } > "$results_file" + + log_info "JSON report generated: $results_file" +} + +####################################### +# Smoke Test Functionality +# Requirements: 10.1, 10.2, 10.3, 10.4 +####################################### + +run_smoke_test() { + log_info "Running smoke test to verify container setup" + + local smoke_test_log="$OUTPUT_DIR/smoke_test.log" + local smoke_test_script="$SCRIPT_DIR/smoke-test.sh" + + # Check if smoke test script exists + if [[ ! -f "$smoke_test_script" ]]; then + log_error "Smoke test script not found: $smoke_test_script" + log_error "Expected location: scripts/smoke-test.sh" + return 1 + fi + + # Make sure script is executable + chmod +x "$smoke_test_script" 2>/dev/null || true + + # Run the smoke test script + log_info "Executing smoke test script: $smoke_test_script" + + local smoke_args=( + "--log-file" "$smoke_test_log" + "--temp-dir" "$TEMP_BASE/smoke_test" + ) + + if [[ "$VERBOSE" == true ]]; then + smoke_args+=("--verbose") + fi + + if [[ "$PRESERVE_ON_FAILURE" == false ]]; then + smoke_args+=("--no-preserve") + fi + + # Execute smoke test + if "$smoke_test_script" "${smoke_args[@]}"; then + log_success "Smoke test passed" + log_info "Container is properly configured for ATX transformations" + log_info "Smoke test log: $smoke_test_log" + return 0 + else + local smoke_exit_code=$? + log_error "Smoke test failed with exit code: $smoke_exit_code" + log_error "See $smoke_test_log for details" + + # Preserve smoke test artifacts on failure (handled by smoke-test.sh) + local smoke_failure_dir="$OUTPUT_DIR/smoke_test_failure" + local smoke_test_dir="$TEMP_BASE/smoke_test" + + # Additional preservation if smoke test script didn't handle it + if [[ "$PRESERVE_ON_FAILURE" == true && -d "$smoke_test_dir" ]]; then + mkdir -p "$smoke_failure_dir" + + log_warn "Preserving smoke test artifacts for debugging" + + # Copy smoke test directory if not already preserved + if [[ ! -d "$smoke_failure_dir/workspace" && -d "$smoke_test_dir" ]]; then + cp -r "$smoke_test_dir" "$smoke_failure_dir/workspace" 2>/dev/null || true + log_info "Smoke test workspace preserved at: $smoke_failure_dir/workspace" + fi + + # Copy smoke test log if not already there + if [[ ! -f "$smoke_failure_dir/smoke_test.log" && -f "$smoke_test_log" ]]; then + cp "$smoke_test_log" "$smoke_failure_dir/smoke_test.log" 2>/dev/null || true + fi + + # Create debugging guide if not already created + if [[ ! -f "$smoke_failure_dir/README.txt" ]]; then + { + echo "SMOKE TEST FAILURE" + echo "==================" + echo "The smoke test failed, indicating the container is not properly configured." + echo "" + echo "PRESERVED ARTIFACTS" + echo "===================" + echo "Workspace: $smoke_failure_dir/workspace" + echo "Log: $smoke_failure_dir/smoke_test.log" + echo "" + echo "DEBUGGING STEPS" + echo "===============" + echo "1. Review smoke test log: cat $smoke_failure_dir/smoke_test.log" + echo "2. Check ATX installation: atx --version" + echo "3. Check AWS CLI installation: aws --version" + echo "4. Verify test code: cat $smoke_failure_dir/workspace/test.p" + echo "5. Manually run ATX: cd $smoke_failure_dir/workspace && atx custom def exec ..." + echo "" + echo "COMMON ISSUES" + echo "=============" + echo "- ATX CLI not installed or not in PATH" + echo "- AWS CLI not installed" + echo "- Missing dependencies (curl, git, python3)" + echo "- Incorrect ATX installation" + echo "- Network connectivity issues" + echo "" + echo "EXIT CODE MEANINGS" + echo "==================" + echo "1 - ATX CLI not found" + echo "2 - AWS CLI not found" + echo "3 - Failed to create test code" + echo "4 - ATX transformation failed" + echo "5 - General error" + echo "" + } > "$smoke_failure_dir/README.txt" + fi + + log_info "Smoke test failure artifacts preserved at: $smoke_failure_dir" + log_info "Review $smoke_failure_dir/README.txt for debugging steps" + fi + + return $smoke_exit_code + fi +} + +####################################### +# Cleanup and Error Handling +####################################### + +cleanup_on_exit() { + local exit_code=$? + + log_debug "Cleanup on exit (code: $exit_code)" + + # Kill any remaining background jobs + if [[ ${#PROCESSING_PIDS[@]} -gt 0 ]]; then + log_info "Terminating remaining background jobs..." + for pid_info in "${PROCESSING_PIDS[@]}"; do + IFS=':' read -r pid index <<< "$pid_info" + if kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + fi + done + fi + + # Generate summary if we processed anything or have results + if [[ $TOTAL_PROCESSED -gt 0 || ${#PROCESSING_RESULTS[@]} -gt 0 ]]; then + aggregate_results + fi + + # Cleanup temp base (optional, keep for debugging) + # cleanup_safe "$TEMP_BASE" + + # Exit code propagation (Requirements 5.1, 5.2) + # If we have failures, exit with non-zero code + # Preserve original exit code if it was already non-zero + if [[ $exit_code -ne 0 ]]; then + exit $exit_code + elif [[ $TOTAL_FAILED -gt 0 ]]; then + exit 1 + else + exit 0 + fi +} + +handle_interrupt() { + log_warn "Received interrupt signal. Cleaning up..." + cleanup_on_exit +} + +# Set up signal handlers +trap cleanup_on_exit EXIT +trap handle_interrupt SIGINT SIGTERM + +####################################### +# Main Function +####################################### + +main() { + # Parse command-line arguments + parse_arguments "$@" + + # Handle smoke test mode (Requirements: 10.1, 10.4) + if [[ "$SMOKE_TEST" == true ]]; then + log_info "Smoke test mode enabled" + + # Initialize minimal environment for smoke test + mkdir -p "$OUTPUT_DIR" + mkdir -p "$TEMP_BASE" + + # Run smoke test + if run_smoke_test; then + log_success "Smoke test completed successfully" + exit 0 + else + local smoke_exit_code=$? + log_error "Smoke test failed" + exit $smoke_exit_code + fi + fi + + # Initialize environment + initialize_environment + + log_info "Starting ATX orchestration" + log_info "CSV file: $CSV_FILE" + log_info "Execution mode: $EXECUTION_MODE" + log_info "Output directory: $OUTPUT_DIR" + + if [[ "$DRY_RUN" == true ]]; then + log_info "DRY RUN MODE - No actual execution will occur" + fi + + # Check prerequisites + check_aws_cli + + if ! command -v atx &> /dev/null; then + die "ATX CLI not found. Please install ATX CLI first." 2 + fi + + log_info "ATX CLI version: $(atx --version 2>&1 || echo 'unknown')" + + # Parse CSV file + log_info "Parsing CSV file..." + if ! parse_csv_file "$CSV_FILE"; then + die "Failed to parse CSV file: $CSV_FILE" 1 + fi + + local queue_size=$(get_queue_size) + log_info "Found $queue_size folders to process" + + if [[ $queue_size -eq 0 ]]; then + die "No folders to process in CSV file" 1 + fi + + # Execute based on mode + if [[ "$EXECUTION_MODE" == "serial" ]]; then + execute_serial + else + execute_parallel + fi + + # Results are aggregated in cleanup_on_exit + log_success "Orchestration completed successfully!" + + # Exit code propagation (Requirements 5.1, 5.2) + # Return 0 for all successful transformations, non-zero for any failures + if [[ $TOTAL_FAILED -gt 0 ]]; then + log_error "Orchestration completed with $TOTAL_FAILED failed transformation(s)" + exit 1 + else + exit 0 + fi +} + +# Run main function +main "$@" diff --git a/src/agentic_platform/agent/code_transform/scripts/build-and-test.sh b/src/agentic_platform/agent/code_transform/scripts/build-and-test.sh new file mode 100755 index 0000000..0aef33c --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/build-and-test.sh @@ -0,0 +1,612 @@ +#!/bin/bash +# ATX Container Build and Test Script +# Builds the Docker image and runs comprehensive tests +# +# This script implements task 11: Build and test Docker image +# - Subtask 11.1: Build Docker image locally +# - Subtask 11.2: Run smoke test on built image +# - Subtask 11.3: Test with sample Progress code (requires S3 setup) +# +# Requirements: 7.5, 10.1, 10.3, 10.4, 1.3, 1.4 + +set -euo pipefail + +####################################### +# Script Configuration +####################################### +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +SCRIPT_NAME=$(basename "$0") + +# Default configuration +DEFAULT_IMAGE_NAME="atx-test-runner" +DEFAULT_IMAGE_TAG="latest" +DEFAULT_OUTPUT_DIR="./build_test_results" +DEFAULT_MAX_IMAGE_SIZE_MB=2000 # 2GB reasonable limit + +# Global variables +IMAGE_NAME="$DEFAULT_IMAGE_NAME" +IMAGE_TAG="$DEFAULT_IMAGE_TAG" +OUTPUT_DIR="$DEFAULT_OUTPUT_DIR" +MAX_IMAGE_SIZE_MB="$DEFAULT_MAX_IMAGE_SIZE_MB" +VERBOSE=false +SKIP_BUILD=false +SKIP_SMOKE_TEST=false +SKIP_S3_TEST=false +BUILD_NO_CACHE=false + +####################################### +# Logging Functions +####################################### + +log_info() { + echo "[INFO] $*" +} + +log_success() { + echo -e "\033[0;32m[SUCCESS]\033[0m $*" +} + +log_error() { + echo -e "\033[0;31m[ERROR]\033[0m $*" >&2 +} + +log_warn() { + echo -e "\033[0;33m[WARN]\033[0m $*" +} + +log_debug() { + if [[ "$VERBOSE" == true ]]; then + echo "[DEBUG] $*" + fi +} + +log_section() { + echo "" + echo "==========================================" + echo "$*" + echo "==========================================" +} + +####################################### +# Usage and Help +####################################### + +usage() { + cat << EOF +ATX Container Build and Test Script + +USAGE: + $SCRIPT_NAME [OPTIONS] + +OPTIONS: + --image-name Docker image name (default: $DEFAULT_IMAGE_NAME) + --image-tag Docker image tag (default: $DEFAULT_IMAGE_TAG) + --output-dir Output directory for test results (default: $DEFAULT_OUTPUT_DIR) + --max-size-mb Maximum acceptable image size in MB (default: $DEFAULT_MAX_IMAGE_SIZE_MB) + --no-cache Build without using cache + --skip-build Skip Docker build step + --skip-smoke-test Skip smoke test + --skip-s3-test Skip S3 integration test + --verbose Enable verbose output + --help Show this help message + +DESCRIPTION: + This script builds the ATX container Docker image and runs comprehensive tests + to verify the image is properly configured. It performs: + + 1. Docker image build with validation + 2. Image size verification + 3. Component installation verification (ATX CLI, AWS CLI) + 4. Smoke test execution + 5. Optional S3 integration test + + The script implements task 11 from the implementation plan. + +EXIT CODES: + 0 - All tests passed + 1 - Docker build failed + 2 - Image size exceeds limit + 3 - Component verification failed + 4 - Smoke test failed + 5 - S3 integration test failed + 10 - Docker daemon not running + 11 - Invalid arguments + +EXAMPLES: + # Build and test with defaults + $SCRIPT_NAME + + # Build without cache + $SCRIPT_NAME --no-cache + + # Build with custom image name + $SCRIPT_NAME --image-name my-atx-runner --image-tag v1.0 + + # Skip S3 test (requires AWS setup) + $SCRIPT_NAME --skip-s3-test + + # Only run tests on existing image + $SCRIPT_NAME --skip-build + +EOF +} + +####################################### +# Argument Parsing +####################################### + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case "$1" in + --image-name) + IMAGE_NAME="$2" + shift 2 + ;; + --image-tag) + IMAGE_TAG="$2" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="$2" + shift 2 + ;; + --max-size-mb) + MAX_IMAGE_SIZE_MB="$2" + shift 2 + ;; + --no-cache) + BUILD_NO_CACHE=true + shift + ;; + --skip-build) + SKIP_BUILD=true + shift + ;; + --skip-smoke-test) + SKIP_SMOKE_TEST=true + shift + ;; + --skip-s3-test) + SKIP_S3_TEST=true + shift + ;; + --verbose) + VERBOSE=true + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + log_error "Unknown option: $1" + usage + exit 11 + ;; + esac + done +} + +####################################### +# Pre-flight Checks +####################################### + +check_docker_available() { + log_info "Checking Docker availability..." + + if ! command -v docker &> /dev/null; then + log_error "Docker not found in PATH" + log_error "Please install Docker: https://docs.docker.com/get-docker/" + return 10 + fi + + if ! docker info &> /dev/null; then + log_error "Docker daemon is not running" + log_error "Please start Docker Desktop or the Docker daemon" + return 10 + fi + + local docker_version + docker_version=$(docker --version) + log_success "Docker is available: $docker_version" + + return 0 +} + +check_dockerfile_exists() { + log_info "Checking Dockerfile exists..." + + if [[ ! -f "$PROJECT_ROOT/Dockerfile" ]]; then + log_error "Dockerfile not found at: $PROJECT_ROOT/Dockerfile" + return 1 + fi + + log_success "Dockerfile found" + return 0 +} + +####################################### +# Build Functions +####################################### + +build_docker_image() { + log_section "STEP 1: Building Docker Image" + + local full_image_name="${IMAGE_NAME}:${IMAGE_TAG}" + log_info "Building image: $full_image_name" + log_info "Build context: $PROJECT_ROOT" + + # Build command + local build_cmd="docker build" + + if [[ "$BUILD_NO_CACHE" == true ]]; then + build_cmd+=" --no-cache" + log_info "Building without cache" + fi + + build_cmd+=" -t $full_image_name" + build_cmd+=" -f $PROJECT_ROOT/Dockerfile" + build_cmd+=" $PROJECT_ROOT" + + log_debug "Build command: $build_cmd" + + # Execute build + log_info "Starting Docker build (this may take several minutes)..." + local build_start=$(date +%s) + + if [[ "$VERBOSE" == true ]]; then + if ! eval "$build_cmd"; then + log_error "Docker build failed" + return 1 + fi + else + if ! eval "$build_cmd" > "$OUTPUT_DIR/build.log" 2>&1; then + log_error "Docker build failed" + log_error "See build log: $OUTPUT_DIR/build.log" + tail -n 20 "$OUTPUT_DIR/build.log" | sed 's/^/ /' + return 1 + fi + fi + + local build_end=$(date +%s) + local build_duration=$((build_end - build_start)) + + log_success "Docker image built successfully" + log_info " Image: $full_image_name" + log_info " Build time: ${build_duration}s" + + return 0 +} + +verify_image_size() { + log_section "STEP 2: Verifying Image Size" + + local full_image_name="${IMAGE_NAME}:${IMAGE_TAG}" + + # Get image size + local image_size_bytes + image_size_bytes=$(docker image inspect "$full_image_name" --format='{{.Size}}' 2>/dev/null || echo "0") + + if [[ "$image_size_bytes" == "0" ]]; then + log_error "Failed to get image size" + return 2 + fi + + local image_size_mb=$((image_size_bytes / 1024 / 1024)) + + log_info "Image size: ${image_size_mb} MB" + log_info "Maximum allowed: ${MAX_IMAGE_SIZE_MB} MB" + + if [[ $image_size_mb -gt $MAX_IMAGE_SIZE_MB ]]; then + log_error "Image size exceeds maximum allowed size" + log_error " Actual: ${image_size_mb} MB" + log_error " Maximum: ${MAX_IMAGE_SIZE_MB} MB" + log_warn "Consider optimizing the Dockerfile to reduce image size" + return 2 + fi + + local size_percentage=$((image_size_mb * 100 / MAX_IMAGE_SIZE_MB)) + log_success "Image size is acceptable (${size_percentage}% of maximum)" + + # Show layer information + if [[ "$VERBOSE" == true ]]; then + log_debug "Image layers:" + docker history "$full_image_name" --human --no-trunc | head -n 10 | sed 's/^/ /' + fi + + return 0 +} + +verify_components_installed() { + log_section "STEP 3: Verifying Component Installation" + + local full_image_name="${IMAGE_NAME}:${IMAGE_TAG}" + local all_checks_passed=true + + # Check ATX CLI + log_info "Checking ATX CLI installation..." + if docker run --rm "$full_image_name" sh -c "command -v atx && atx --version" > "$OUTPUT_DIR/atx_version.txt" 2>&1; then + local atx_version=$(cat "$OUTPUT_DIR/atx_version.txt") + log_success "ATX CLI is installed" + log_info " $(echo "$atx_version" | head -n 2 | tail -n 1)" + else + log_error "ATX CLI is not installed or not accessible" + cat "$OUTPUT_DIR/atx_version.txt" | sed 's/^/ /' + all_checks_passed=false + fi + + # Check AWS CLI + log_info "Checking AWS CLI installation..." + if docker run --rm "$full_image_name" sh -c "command -v aws && aws --version" > "$OUTPUT_DIR/aws_version.txt" 2>&1; then + local aws_version=$(cat "$OUTPUT_DIR/aws_version.txt" | tail -n 1) + log_success "AWS CLI is installed" + log_info " $aws_version" + else + log_error "AWS CLI is not installed or not accessible" + cat "$OUTPUT_DIR/aws_version.txt" | sed 's/^/ /' + all_checks_passed=false + fi + + # Check scripts are present + log_info "Checking scripts are installed..." + local scripts_to_check=( + "/usr/local/bin/atx-orchestrator.sh" + "/usr/local/bin/s3-integration.sh" + "/usr/local/bin/smoke-test.sh" + "/usr/local/bin/csv-parser.sh" + ) + + for script in "${scripts_to_check[@]}"; do + if docker run --rm "$full_image_name" sh -c "test -x $script" 2>/dev/null; then + log_success " $script is present and executable" + else + log_error " $script is missing or not executable" + all_checks_passed=false + fi + done + + if [[ "$all_checks_passed" == true ]]; then + log_success "All components are properly installed" + return 0 + else + log_error "Some components are missing or not properly installed" + return 3 + fi +} + +####################################### +# Test Functions +####################################### + +run_smoke_test() { + log_section "STEP 4: Running Smoke Test" + + local full_image_name="${IMAGE_NAME}:${IMAGE_TAG}" + + log_info "Executing smoke test in container..." + log_info "This verifies ATX can execute transformations" + + # Create output directory for smoke test + mkdir -p "$OUTPUT_DIR/smoke_test" + + # Run smoke test + local smoke_test_cmd="docker run --rm" + smoke_test_cmd+=" -v $OUTPUT_DIR/smoke_test:/workspace/results" + smoke_test_cmd+=" $full_image_name" + smoke_test_cmd+=" --smoke-test" + smoke_test_cmd+=" --output-dir /workspace/results" + + if [[ "$VERBOSE" == true ]]; then + smoke_test_cmd+=" --verbose" + fi + + log_debug "Smoke test command: $smoke_test_cmd" + + local smoke_start=$(date +%s) + + if eval "$smoke_test_cmd"; then + local smoke_end=$(date +%s) + local smoke_duration=$((smoke_end - smoke_start)) + + log_success "Smoke test passed" + log_info " Duration: ${smoke_duration}s" + log_info " Results: $OUTPUT_DIR/smoke_test/" + + # Show summary if available + if [[ -f "$OUTPUT_DIR/smoke_test/smoke_test.log" ]]; then + log_info " Log file: $OUTPUT_DIR/smoke_test/smoke_test.log" + + if [[ "$VERBOSE" == true ]]; then + log_debug "Smoke test log excerpt:" + grep -E "\[SUCCESS\]|\[ERROR\]" "$OUTPUT_DIR/smoke_test/smoke_test.log" | tail -n 5 | sed 's/^/ /' + fi + fi + + return 0 + else + local exit_code=$? + log_error "Smoke test failed (exit code: $exit_code)" + + # Show failure details + if [[ -f "$OUTPUT_DIR/smoke_test/smoke_test.log" ]]; then + log_error "Smoke test log excerpt:" + tail -n 20 "$OUTPUT_DIR/smoke_test/smoke_test.log" | sed 's/^/ /' + fi + + if [[ -d "$OUTPUT_DIR/smoke_test/smoke_test_failure" ]]; then + log_info "Failure artifacts preserved at: $OUTPUT_DIR/smoke_test/smoke_test_failure/" + fi + + return 4 + fi +} + +run_s3_integration_test() { + log_section "STEP 5: Running S3 Integration Test (Optional)" + + if [[ "$SKIP_S3_TEST" == true ]]; then + log_info "Skipping S3 integration test (--skip-s3-test specified)" + return 0 + fi + + log_warn "S3 integration test requires:" + log_warn " - AWS credentials configured" + log_warn " - S3 bucket with sample Progress code" + log_warn " - CSV file with S3 paths" + log_warn "" + log_warn "This test is optional and can be skipped with --skip-s3-test" + log_warn "For now, skipping S3 test (not yet implemented)" + + # TODO: Implement S3 integration test when S3 bucket is available + # This would: + # 1. Check AWS credentials + # 2. Create test S3 bucket or use existing + # 3. Upload sample Progress code + # 4. Create CSV with S3 paths + # 5. Run orchestrator with CSV + # 6. Verify results uploaded to S3 + # 7. Clean up test resources + + return 0 +} + +####################################### +# Summary and Reporting +####################################### + +generate_summary_report() { + log_section "Build and Test Summary" + + local full_image_name="${IMAGE_NAME}:${IMAGE_TAG}" + + cat > "$OUTPUT_DIR/summary.txt" << EOF +ATX Container Build and Test Summary +==================================== +Generated: $(date '+%Y-%m-%d %H:%M:%S') + +IMAGE INFORMATION +----------------- +Image Name: $full_image_name +Image ID: $(docker image inspect "$full_image_name" --format='{{.Id}}' 2>/dev/null || echo "N/A") +Image Size: $(docker image inspect "$full_image_name" --format='{{.Size}}' 2>/dev/null | awk '{print int($1/1024/1024)" MB"}' || echo "N/A") +Created: $(docker image inspect "$full_image_name" --format='{{.Created}}' 2>/dev/null || echo "N/A") + +COMPONENTS VERIFIED +------------------- +โœ“ ATX CLI installed and accessible +โœ“ AWS CLI installed and accessible +โœ“ Orchestrator script present +โœ“ S3 integration script present +โœ“ Smoke test script present +โœ“ CSV parser script present + +TEST RESULTS +------------ +โœ“ Docker build: PASSED +โœ“ Image size check: PASSED +โœ“ Component verification: PASSED +โœ“ Smoke test: PASSED + +OUTPUT FILES +------------ +- Build log: $OUTPUT_DIR/build.log +- ATX version: $OUTPUT_DIR/atx_version.txt +- AWS version: $OUTPUT_DIR/aws_version.txt +- Smoke test results: $OUTPUT_DIR/smoke_test/ +- Summary: $OUTPUT_DIR/summary.txt + +NEXT STEPS +---------- +1. Push image to ECR: + docker tag $full_image_name .dkr.ecr..amazonaws.com/$IMAGE_NAME:$IMAGE_TAG + docker push .dkr.ecr..amazonaws.com/$IMAGE_NAME:$IMAGE_TAG + +2. Deploy to ECS/EKS: + See docs/deployment.md for deployment instructions + +3. Run with sample data: + docker run --rm -v \$(pwd)/examples:/data $full_image_name --csv-file /data/single-customer.csv + +EOF + + cat "$OUTPUT_DIR/summary.txt" + + log_info "" + log_info "Full summary saved to: $OUTPUT_DIR/summary.txt" +} + +####################################### +# Main Function +####################################### + +main() { + # Parse arguments + parse_arguments "$@" + + # Create output directory + mkdir -p "$OUTPUT_DIR" + + log_section "ATX Container Build and Test" + log_info "Started: $(date '+%Y-%m-%d %H:%M:%S')" + log_info "Output directory: $OUTPUT_DIR" + log_info "" + + # Pre-flight checks + if ! check_docker_available; then + exit 10 + fi + + if ! check_dockerfile_exists; then + exit 1 + fi + + # Build image + if [[ "$SKIP_BUILD" == false ]]; then + if ! build_docker_image; then + log_error "Build failed" + exit 1 + fi + + if ! verify_image_size; then + log_error "Image size verification failed" + exit 2 + fi + else + log_info "Skipping build (--skip-build specified)" + fi + + # Verify components + if ! verify_components_installed; then + log_error "Component verification failed" + exit 3 + fi + + # Run smoke test + if [[ "$SKIP_SMOKE_TEST" == false ]]; then + if ! run_smoke_test; then + log_error "Smoke test failed" + exit 4 + fi + else + log_info "Skipping smoke test (--skip-smoke-test specified)" + fi + + # Run S3 integration test (optional) + if ! run_s3_integration_test; then + log_error "S3 integration test failed" + exit 5 + fi + + # Generate summary + generate_summary_report + + log_section "ALL TESTS PASSED" + log_success "Docker image is ready for deployment" + log_info "Completed: $(date '+%Y-%m-%d %H:%M:%S')" + + exit 0 +} + +# Run main function +main "$@" diff --git a/src/agentic_platform/agent/code_transform/scripts/csv-parser.sh b/src/agentic_platform/agent/code_transform/scripts/csv-parser.sh new file mode 100755 index 0000000..170571c --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/csv-parser.sh @@ -0,0 +1,327 @@ +#!/bin/bash +# CSV Parser for S3 Paths +# Parses CSV files with S3 paths and generates processing queue +# +# Requirements: 2.1, 2.2, 2.4, 2.5, 6.1, 8.2 + +set -euo pipefail + +# Global arrays to store parsed data +declare -a S3_PATHS=() +declare -a BUILD_COMMANDS=() +declare -a TRANSFORMATION_NAMES=() +declare -a OUTPUT_S3_PATHS=() + +# Parse a CSV line handling quoted fields and special characters +# Arguments: +# $1 - CSV line to parse +# Returns: +# Populates global array CSV_FIELDS with parsed values +parse_csv_line() { + local line="$1" + CSV_FIELDS=() + + # Handle empty lines + if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ ]]; then + return 1 + fi + + local field="" + local in_quotes=false + local i=0 + + while [ $i -lt ${#line} ]; do + local char="${line:$i:1}" + + if [ "$char" = '"' ]; then + if $in_quotes; then + # Check for escaped quote (double quote) + if [ $((i + 1)) -lt ${#line} ] && [ "${line:$((i + 1)):1}" = '"' ]; then + field="${field}\"" + i=$((i + 1)) + else + in_quotes=false + fi + else + in_quotes=true + fi + elif [ "$char" = ',' ] && ! $in_quotes; then + CSV_FIELDS+=("$field") + field="" + else + field="${field}${char}" + fi + + i=$((i + 1)) + done + + # Add the last field + CSV_FIELDS+=("$field") + + return 0 +} + +# Validate S3 URI format +# Arguments: +# $1 - S3 URI to validate +# Returns: +# 0 if valid, 1 if invalid +validate_s3_uri() { + local uri="$1" + + # Check if URI starts with s3:// + if [[ ! "$uri" =~ ^s3:// ]]; then + return 1 + fi + + # Extract bucket and key + local path="${uri#s3://}" + + # Check if bucket name exists (at least one character before /) + if [[ ! "$path" =~ ^[^/]+/ ]]; then + return 1 + fi + + return 0 +} + +# Parse CSV file and populate global arrays +# Arguments: +# $1 - Path to CSV file +# Returns: +# 0 on success, 1 on error +parse_csv_file() { + local csv_file="$1" + local line_number=0 + local header_found=false + + # Check if file exists + if [[ ! -f "$csv_file" ]]; then + echo "ERROR: CSV file not found: $csv_file" >&2 + return 1 + fi + + # Read CSV file line by line + while IFS= read -r line || [[ -n "$line" ]]; do + line_number=$((line_number + 1)) + + # Skip empty lines and comment lines + if [[ -z "$line" || "$line" =~ ^[[:space:]]*$ || "$line" =~ ^[[:space:]]*# ]]; then + continue + fi + + # Parse the line + if ! parse_csv_line "$line"; then + continue + fi + + # Check if this is the header row + if ! $header_found; then + # Validate header columns + local expected_cols=("s3_path" "build_command" "transformation_name" "output_s3_path") + local has_required_cols=true + + # Check we have at least 3 columns + if [[ ${#CSV_FIELDS[@]} -lt 3 ]]; then + echo "ERROR: Header must have at least 3 columns (s3_path, build_command, transformation_name) at line $line_number" >&2 + return 1 + fi + + # Check for required columns (first 3 are required) + if [[ "${CSV_FIELDS[0]}" != "s3_path" ]]; then + echo "ERROR: Missing required column 's3_path' in header at line $line_number" >&2 + return 1 + fi + + if [[ "${CSV_FIELDS[1]}" != "build_command" ]]; then + echo "ERROR: Missing required column 'build_command' in header at line $line_number" >&2 + return 1 + fi + + if [[ "${CSV_FIELDS[2]}" != "transformation_name" ]]; then + echo "ERROR: Missing required column 'transformation_name' in header at line $line_number" >&2 + return 1 + fi + + header_found=true + continue + fi + + # Validate we have at least 3 fields + if [[ ${#CSV_FIELDS[@]} -lt 3 ]]; then + echo "ERROR: Invalid CSV format at line $line_number: Expected at least 3 columns, got ${#CSV_FIELDS[@]}" >&2 + return 1 + fi + + local s3_path="${CSV_FIELDS[0]}" + local build_command="${CSV_FIELDS[1]}" + local transformation_name="${CSV_FIELDS[2]}" + local output_s3_path="" + + # Fourth column is optional + if [[ ${#CSV_FIELDS[@]} -ge 4 ]]; then + output_s3_path="${CSV_FIELDS[3]}" + fi + + # Validate S3 path + if ! validate_s3_uri "$s3_path"; then + echo "ERROR: Invalid S3 URI format at line $line_number: '$s3_path'" >&2 + echo " Expected format: s3://bucket/key" >&2 + return 1 + fi + + # Validate output S3 path if provided + if [[ -n "$output_s3_path" ]] && ! validate_s3_uri "$output_s3_path"; then + echo "ERROR: Invalid output S3 URI format at line $line_number: '$output_s3_path'" >&2 + echo " Expected format: s3://bucket/key" >&2 + return 1 + fi + + # Validate required fields are not empty + if [[ -z "$s3_path" ]]; then + echo "ERROR: Empty s3_path at line $line_number" >&2 + return 1 + fi + + if [[ -z "$build_command" ]]; then + echo "ERROR: Empty build_command at line $line_number" >&2 + return 1 + fi + + if [[ -z "$transformation_name" ]]; then + echo "ERROR: Empty transformation_name at line $line_number" >&2 + return 1 + fi + + # Add to arrays + S3_PATHS+=("$s3_path") + BUILD_COMMANDS+=("$build_command") + TRANSFORMATION_NAMES+=("$transformation_name") + OUTPUT_S3_PATHS+=("$output_s3_path") + + done < "$csv_file" + + # Check if we found a header + if ! $header_found; then + echo "ERROR: No valid header row found in CSV file" >&2 + return 1 + fi + + # Check if we have any data rows + if [[ ${#S3_PATHS[@]} -eq 0 ]]; then + echo "ERROR: No data rows found in CSV file" >&2 + return 1 + fi + + return 0 +} + +# Generate processing queue +# Arguments: +# None (uses global arrays) +# Returns: +# Prints processing queue to stdout +generate_processing_queue() { + local count=${#S3_PATHS[@]} + + echo "Processing queue generated: $count items" + echo "" + + for i in "${!S3_PATHS[@]}"; do + echo "Item $((i + 1)):" + echo " S3 Path: ${S3_PATHS[$i]}" + echo " Build Command: ${BUILD_COMMANDS[$i]}" + echo " Transformation: ${TRANSFORMATION_NAMES[$i]}" + if [[ -n "${OUTPUT_S3_PATHS[$i]}" ]]; then + echo " Output S3 Path: ${OUTPUT_S3_PATHS[$i]}" + fi + echo "" + done +} + +# Get the number of items in the processing queue +# Returns: +# Number of items +get_queue_size() { + echo "${#S3_PATHS[@]}" +} + +# Get item from queue by index +# Arguments: +# $1 - Index (0-based) +# Returns: +# Prints item details in format: s3_path|build_command|transformation_name|output_s3_path +get_queue_item() { + local index="$1" + + if [[ $index -lt 0 || $index -ge ${#S3_PATHS[@]} ]]; then + echo "ERROR: Invalid queue index: $index" >&2 + return 1 + fi + + echo "${S3_PATHS[$index]}|${BUILD_COMMANDS[$index]}|${TRANSFORMATION_NAMES[$index]}|${OUTPUT_S3_PATHS[$index]}" +} + +# Get all queue items as an array +# Returns: +# Prints all items, one per line in format: s3_path|build_command|transformation_name|output_s3_path +get_all_queue_items() { + for i in "${!S3_PATHS[@]}"; do + echo "${S3_PATHS[$i]}|${BUILD_COMMANDS[$i]}|${TRANSFORMATION_NAMES[$i]}|${OUTPUT_S3_PATHS[$i]}" + done +} + +# Process queue in serial mode +# This function is a placeholder for the orchestrator to implement +# Arguments: +# $1 - Processing function to call for each item +process_queue_serial() { + local process_func="$1" + + for i in "${!S3_PATHS[@]}"; do + local item="${S3_PATHS[$i]}|${BUILD_COMMANDS[$i]}|${TRANSFORMATION_NAMES[$i]}|${OUTPUT_S3_PATHS[$i]}" + $process_func "$item" "$i" + done +} + +# Process queue in parallel mode +# This function is a placeholder for the orchestrator to implement +# Arguments: +# $1 - Processing function to call for each item +# $2 - Maximum parallel jobs (optional, default: 4) +process_queue_parallel() { + local process_func="$1" + local max_jobs="${2:-4}" + + echo "Parallel processing with max $max_jobs jobs" + + # This will be implemented by the orchestrator using background jobs + # For now, just document the interface + for i in "${!S3_PATHS[@]}"; do + local item="${S3_PATHS[$i]}|${BUILD_COMMANDS[$i]}|${TRANSFORMATION_NAMES[$i]}|${OUTPUT_S3_PATHS[$i]}" + echo "Would process in parallel: $item" + done +} + +# Main function for testing +main() { + if [[ $# -lt 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 + fi + + local csv_file="$1" + + if ! parse_csv_file "$csv_file"; then + exit 1 + fi + + generate_processing_queue + + echo "Total items in queue: $(get_queue_size)" +} + +# Only run main if script is executed directly (not sourced) +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/src/agentic_platform/agent/code_transform/scripts/entrypoint.sh b/src/agentic_platform/agent/code_transform/scripts/entrypoint.sh new file mode 100644 index 0000000..aa6b0cc --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/entrypoint.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# ATX Container Entrypoint Script +# This script serves as the container entrypoint and delegates to the orchestrator + +set -euo pipefail + +# Filter out any arguments that look like script paths +filtered_args=() +for arg in "$@"; do + # Skip arguments that look like script paths + if [[ "$arg" != "/usr/local/bin/atx-orchestrator.sh" && "$arg" != "atx-orchestrator.sh" ]]; then + filtered_args+=("$arg") + fi +done + +# If no arguments provided after filtering, show help +if [[ ${#filtered_args[@]} -eq 0 ]]; then + exec /usr/local/bin/atx-orchestrator.sh --help +fi + +# Pass filtered arguments to the orchestrator +exec /usr/local/bin/atx-orchestrator.sh "${filtered_args[@]}" \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/scripts/push-to-ecr.sh b/src/agentic_platform/agent/code_transform/scripts/push-to-ecr.sh new file mode 100755 index 0000000..63a254b --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/push-to-ecr.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# +# push-to-ecr.sh - Tag and push ATX Container Test Runner image to Amazon ECR +# +# This script handles: +# - ECR authentication +# - Image tagging with version and latest tags +# - Pushing to ECR repository +# - Verification of successful push +# +# Usage: +# ./push-to-ecr.sh [repository-name] [version] +# +# Example: +# ./push-to-ecr.sh 123456789012 us-east-1 +# ./push-to-ecr.sh 123456789012 us-east-1 atx-test-runner 0.1.0 +# + +set -euo pipefail + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Default values +DEFAULT_REPO_NAME="atx-test-runner" +DEFAULT_VERSION=$(cat VERSION 2>/dev/null || echo "0.1.0") + +# Function to print colored messages +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Function to show usage +usage() { + cat << EOF +Usage: $0 [repository-name] [version] + +Arguments: + aws-account-id AWS account ID (required) + region AWS region (required, e.g., us-east-1) + repository-name ECR repository name (optional, default: ${DEFAULT_REPO_NAME}) + version Image version tag (optional, default: ${DEFAULT_VERSION}) + +Examples: + $0 123456789012 us-east-1 + $0 123456789012 us-east-1 atx-test-runner 0.2.0 + +Environment Variables: + AWS_PROFILE AWS CLI profile to use (optional) + AWS_REGION AWS region (overridden by command line argument) + +Prerequisites: + - AWS CLI installed and configured + - Docker installed and running + - Appropriate IAM permissions for ECR operations + - ECR repository must exist (or use --create-repo flag) + +EOF + exit 1 +} + +# Parse command line arguments +if [ $# -lt 2 ]; then + log_error "Missing required arguments" + usage +fi + +AWS_ACCOUNT_ID="$1" +AWS_REGION="$2" +REPO_NAME="${3:-${DEFAULT_REPO_NAME}}" +VERSION="${4:-${DEFAULT_VERSION}}" + +# Validate AWS account ID format +if ! [[ "$AWS_ACCOUNT_ID" =~ ^[0-9]{12}$ ]]; then + log_error "Invalid AWS account ID format. Must be 12 digits." + exit 1 +fi + +# Construct ECR repository URI +ECR_REPO_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPO_NAME}" + +log_info "Starting ECR push process..." +log_info "AWS Account: ${AWS_ACCOUNT_ID}" +log_info "Region: ${AWS_REGION}" +log_info "Repository: ${REPO_NAME}" +log_info "Version: ${VERSION}" +log_info "ECR URI: ${ECR_REPO_URI}" + +# Check if Docker is running +if ! docker info > /dev/null 2>&1; then + log_error "Docker is not running. Please start Docker and try again." + exit 1 +fi + +# Check if AWS CLI is installed +if ! command -v aws &> /dev/null; then + log_error "AWS CLI is not installed. Please install it first." + exit 1 +fi + +# Check if local image exists +LOCAL_IMAGE="${REPO_NAME}:latest" +if ! docker image inspect "${LOCAL_IMAGE}" > /dev/null 2>&1; then + log_error "Local image '${LOCAL_IMAGE}' not found. Please build the image first." + log_info "Run: docker build -t ${LOCAL_IMAGE} ." + exit 1 +fi + +# Authenticate Docker to ECR +log_info "Authenticating Docker to ECR..." +if aws ecr get-login-password --region "${AWS_REGION}" | \ + docker login --username AWS --password-stdin "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"; then + log_info "Successfully authenticated to ECR" +else + log_error "Failed to authenticate to ECR" + exit 1 +fi + +# Check if ECR repository exists +log_info "Checking if ECR repository exists..." +if aws ecr describe-repositories --repository-names "${REPO_NAME}" --region "${AWS_REGION}" > /dev/null 2>&1; then + log_info "ECR repository '${REPO_NAME}' exists" +else + log_warn "ECR repository '${REPO_NAME}' does not exist" + log_info "Creating ECR repository..." + if aws ecr create-repository \ + --repository-name "${REPO_NAME}" \ + --region "${AWS_REGION}" \ + --image-scanning-configuration scanOnPush=true \ + --encryption-configuration encryptionType=AES256 > /dev/null 2>&1; then + log_info "Successfully created ECR repository" + else + log_error "Failed to create ECR repository" + exit 1 + fi +fi + +# Tag the image with version +log_info "Tagging image with version ${VERSION}..." +docker tag "${LOCAL_IMAGE}" "${ECR_REPO_URI}:${VERSION}" +docker tag "${LOCAL_IMAGE}" "${ECR_REPO_URI}:latest" + +# Push version tag +log_info "Pushing image with version tag ${VERSION}..." +if docker push "${ECR_REPO_URI}:${VERSION}"; then + log_info "Successfully pushed ${ECR_REPO_URI}:${VERSION}" +else + log_error "Failed to push version tag" + exit 1 +fi + +# Push latest tag +log_info "Pushing image with latest tag..." +if docker push "${ECR_REPO_URI}:latest"; then + log_info "Successfully pushed ${ECR_REPO_URI}:latest" +else + log_error "Failed to push latest tag" + exit 1 +fi + +# Verify the image is accessible +log_info "Verifying image is accessible in ECR..." +if aws ecr describe-images \ + --repository-name "${REPO_NAME}" \ + --image-ids imageTag="${VERSION}" \ + --region "${AWS_REGION}" > /dev/null 2>&1; then + log_info "Successfully verified image ${VERSION} in ECR" +else + log_error "Failed to verify image in ECR" + exit 1 +fi + +# Get image details +IMAGE_DIGEST=$(aws ecr describe-images \ + --repository-name "${REPO_NAME}" \ + --image-ids imageTag="${VERSION}" \ + --region "${AWS_REGION}" \ + --query 'imageDetails[0].imageDigest' \ + --output text) + +IMAGE_SIZE=$(aws ecr describe-images \ + --repository-name "${REPO_NAME}" \ + --image-ids imageTag="${VERSION}" \ + --region "${AWS_REGION}" \ + --query 'imageDetails[0].imageSizeInBytes' \ + --output text) + +IMAGE_SIZE_MB=$((IMAGE_SIZE / 1024 / 1024)) + +# Print summary +echo "" +log_info "==========================================" +log_info "ECR Push Summary" +log_info "==========================================" +log_info "Repository URI: ${ECR_REPO_URI}" +log_info "Version Tag: ${VERSION}" +log_info "Latest Tag: latest" +log_info "Image Digest: ${IMAGE_DIGEST}" +log_info "Image Size: ${IMAGE_SIZE_MB} MB" +log_info "==========================================" +echo "" +log_info "Image successfully pushed to ECR!" +echo "" +log_info "To pull this image:" +echo " docker pull ${ECR_REPO_URI}:${VERSION}" +echo " docker pull ${ECR_REPO_URI}:latest" +echo "" +log_info "To use in ECS task definition:" +echo " \"image\": \"${ECR_REPO_URI}:${VERSION}\"" +echo "" + +exit 0 diff --git a/src/agentic_platform/agent/code_transform/scripts/s3-integration.sh b/src/agentic_platform/agent/code_transform/scripts/s3-integration.sh new file mode 100755 index 0000000..6d7bac8 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/s3-integration.sh @@ -0,0 +1,1058 @@ +#!/bin/bash +# S3 Integration Script +# Handles download/upload operations for ATX Container Test Runner +# +# This script provides functions for: +# - Downloading folders from S3 to local temp directories +# - Uploading results back to S3 +# - Cleanup of temporary files + +set -euo pipefail + +####################################### +# Global Variables +####################################### +SCRIPT_NAME=$(basename "$0") +LOG_LEVEL="${LOG_LEVEL:-INFO}" +TEMP_BASE_DIR="${TEMP_BASE_DIR:-/tmp/atx-workspace}" +MAX_RETRIES="${MAX_RETRIES:-3}" +RETRY_DELAY="${RETRY_DELAY:-5}" + +####################################### +# Logging Functions +####################################### + +# Log message with timestamp and level +# Arguments: +# $1 - Log level (INFO, WARN, ERROR, DEBUG) +# $2 - Message +log() { + local level="$1" + shift + local message="$*" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Only log if level is appropriate + case "$LOG_LEVEL" in + DEBUG) + echo "[$timestamp] [$level] $message" >&2 + ;; + INFO) + if [[ "$level" != "DEBUG" ]]; then + echo "[$timestamp] [$level] $message" >&2 + fi + ;; + WARN) + if [[ "$level" == "WARN" || "$level" == "ERROR" ]]; then + echo "[$timestamp] [$level] $message" >&2 + fi + ;; + ERROR) + if [[ "$level" == "ERROR" ]]; then + echo "[$timestamp] [$level] $message" >&2 + fi + ;; + esac +} + +log_info() { + log "INFO" "$@" +} + +log_warn() { + log "WARN" "$@" +} + +log_error() { + log "ERROR" "$@" +} + +log_debug() { + log "DEBUG" "$@" +} + +####################################### +# Error Handling +####################################### + +# Error codes for different failure types +readonly ERROR_S3_ACCESS=10 +readonly ERROR_S3_DOWNLOAD=11 +readonly ERROR_S3_UPLOAD=12 +readonly ERROR_NETWORK_TIMEOUT=13 +readonly ERROR_AWS_CREDENTIALS=14 +readonly ERROR_INVALID_PATH=15 +readonly ERROR_VALIDATION_FAILED=16 + +# Exit with error message +# Arguments: +# $1 - Error message +# $2 - Exit code (optional, defaults to 1) +die() { + local message="$1" + local exit_code="${2:-1}" + log_error "$message" + exit "$exit_code" +} + +# Handle S3 access errors with detailed diagnostics +# Arguments: +# $1 - S3 path that failed +# $2 - Operation type (download/upload) +# $3 - AWS CLI error output +# Returns: +# Appropriate error code +handle_s3_access_error() { + local s3_path="$1" + local operation="$2" + local error_output="$3" + + log_error "S3 $operation failed for: $s3_path" + + # Check for specific error types + if echo "$error_output" | grep -qi "NoSuchBucket"; then + log_error "Bucket does not exist in S3 path: $s3_path" + log_error "Verify the bucket name is correct and exists in your AWS account" + return $ERROR_S3_ACCESS + elif echo "$error_output" | grep -qi "NoSuchKey"; then + log_error "Key/folder does not exist in S3: $s3_path" + log_error "Verify the path is correct and the folder exists" + return $ERROR_S3_ACCESS + elif echo "$error_output" | grep -qi "AccessDenied\|Forbidden"; then + log_error "Access denied to S3 path: $s3_path" + log_error "Check IAM permissions for S3 $operation operations" + log_error "Required permissions: s3:GetObject, s3:ListBucket for download" + log_error "Required permissions: s3:PutObject for upload" + return $ERROR_S3_ACCESS + elif echo "$error_output" | grep -qi "InvalidAccessKeyId\|SignatureDoesNotMatch"; then + log_error "AWS credentials are invalid or expired" + log_error "Verify AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY" + log_error "Or ensure IAM role is properly attached to ECS/EC2 instance" + return $ERROR_AWS_CREDENTIALS + elif echo "$error_output" | grep -qi "RequestTimeout\|ConnectTimeout"; then + log_error "Network timeout while accessing S3" + log_error "Check network connectivity and retry" + return $ERROR_NETWORK_TIMEOUT + else + log_error "Unknown S3 error occurred" + log_error "Error details: $error_output" + return $ERROR_S3_ACCESS + fi +} + +# Handle network timeout errors +# Arguments: +# $1 - Operation description +# $2 - Attempt number +# $3 - Max attempts +# Returns: +# 0 to continue retrying, 1 to stop +handle_network_timeout() { + local operation="$1" + local attempt="$2" + local max_attempts="$3" + + log_warn "Network timeout during: $operation" + + if [[ $attempt -lt $max_attempts ]]; then + log_info "Will retry (attempt $((attempt + 1)) of $max_attempts)" + return 0 + else + log_error "Maximum retry attempts reached for: $operation" + log_error "Network connectivity issues persist" + log_error "Troubleshooting steps:" + log_error " 1. Check internet connectivity" + log_error " 2. Verify AWS service endpoints are accessible" + log_error " 3. Check for firewall or security group restrictions" + log_error " 4. Verify DNS resolution is working" + return 1 + fi +} + +# Trap errors and cleanup +cleanup_on_error() { + local exit_code=$? + if [[ $exit_code -ne 0 ]]; then + log_error "Script failed with exit code $exit_code" + + # Provide context-specific error messages + case $exit_code in + $ERROR_S3_ACCESS) + log_error "S3 access error - check permissions and paths" + ;; + $ERROR_S3_DOWNLOAD) + log_error "S3 download failed - check source path and connectivity" + ;; + $ERROR_S3_UPLOAD) + log_error "S3 upload failed - check destination path and connectivity" + ;; + $ERROR_NETWORK_TIMEOUT) + log_error "Network timeout - check connectivity and retry" + ;; + $ERROR_AWS_CREDENTIALS) + log_error "AWS credentials error - verify credentials are valid" + ;; + $ERROR_INVALID_PATH) + log_error "Invalid path - verify file/directory paths" + ;; + $ERROR_VALIDATION_FAILED) + log_error "Validation failed - check data integrity" + ;; + esac + fi +} + +trap cleanup_on_error EXIT + +####################################### +# Usage and Help +####################################### + +usage() { + cat << EOF +Usage: $SCRIPT_NAME [options] + +Commands: + download Download folder from S3 to local directory + upload Upload results from local directory to S3 + cleanup Clean up temporary files and directories + +Download Options: + --s3-path S3 path to download (e.g., s3://bucket/folder/) + --local-path Local destination path + --retry Number of retry attempts (default: $MAX_RETRIES) + +Upload Options: + --local-path Local source path + --s3-path S3 destination path + --retry Number of retry attempts (default: $MAX_RETRIES) + +Cleanup Options: + --path Path to clean up + --force Force cleanup without confirmation + +Environment Variables: + LOG_LEVEL Logging level (DEBUG, INFO, WARN, ERROR) + TEMP_BASE_DIR Base directory for temp files (default: /tmp/atx-workspace) + MAX_RETRIES Maximum retry attempts (default: 3) + RETRY_DELAY Delay between retries in seconds (default: 5) + +Examples: + # Download folder from S3 + $SCRIPT_NAME download --s3-path s3://bucket/customer1/folder1/ --local-path /tmp/workspace + + # Upload results to S3 + $SCRIPT_NAME upload --local-path /tmp/results --s3-path s3://bucket/results/customer1/folder1/ + + # Cleanup temp directory + $SCRIPT_NAME cleanup --path /tmp/workspace --force + +EOF +} + +####################################### +# Validation Functions +####################################### + +# Validate S3 path format +# Arguments: +# $1 - S3 path +# Returns: +# 0 if valid, 1 if invalid +validate_s3_path() { + local s3_path="$1" + + if [[ ! "$s3_path" =~ ^s3://[a-zA-Z0-9._-]+/.* ]]; then + log_error "Invalid S3 path format: $s3_path" + log_error "Expected format: s3://bucket-name/key/path/" + return 1 + fi + + return 0 +} + +# Check if AWS CLI is available +check_aws_cli() { + if ! command -v aws &> /dev/null; then + die "AWS CLI not found. Please install AWS CLI v2." 2 + fi + + log_debug "AWS CLI found: $(aws --version)" +} + +# Validate AWS credentials are configured +check_aws_credentials() { + if ! aws sts get-caller-identity &> /dev/null; then + log_warn "AWS credentials not configured or invalid" + log_warn "Ensure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set" + log_warn "Or use IAM role for ECS/EC2 instances" + return 1 + fi + + log_debug "AWS credentials validated" + return 0 +} + + +####################################### +# S3 Download Functions +####################################### + +# Download folder from S3 to local directory with retry logic +# Arguments: +# $1 - S3 path (e.g., s3://bucket/folder/) +# $2 - Local destination path +# $3 - Number of retries (optional, defaults to MAX_RETRIES) +# Returns: +# 0 on success, error code on failure +s3_download() { + local s3_path="$1" + local local_path="$2" + local max_retries="${3:-$MAX_RETRIES}" + + log_info "Starting S3 download: $s3_path -> $local_path" + + # Validate S3 path + if ! validate_s3_path "$s3_path"; then + return $ERROR_INVALID_PATH + fi + + # Create local directory if it doesn't exist + if ! mkdir -p "$local_path"; then + log_error "Failed to create local directory: $local_path" + log_error "Check permissions and disk space" + return $ERROR_INVALID_PATH + fi + + # Attempt download with retries + local attempt=1 + local error_log="/tmp/s3-download-error-$$.log" + + while [[ $attempt -le $max_retries ]]; do + log_info "Download attempt $attempt of $max_retries" + + # Capture both stdout and stderr + local aws_output + if aws_output=$(aws s3 sync "$s3_path" "$local_path" --no-progress 2>&1); then + log_info "Successfully downloaded from S3" + log_debug "AWS output: $aws_output" + + # Validate downloaded files exist + if ! validate_download "$local_path"; then + log_error "Download validation failed" + return $ERROR_VALIDATION_FAILED + fi + + # Cleanup error log + rm -f "$error_log" + return 0 + else + local exit_code=$? + echo "$aws_output" > "$error_log" + + log_warn "Download attempt $attempt failed with exit code $exit_code" + + # Handle specific error types + local error_type + error_type=$(handle_s3_access_error "$s3_path" "download" "$aws_output") + local error_code=$? + + # Check if we should retry based on error type + if [[ $error_code -eq $ERROR_NETWORK_TIMEOUT ]]; then + if ! handle_network_timeout "S3 download" "$attempt" "$max_retries"; then + rm -f "$error_log" + return $ERROR_NETWORK_TIMEOUT + fi + elif [[ $error_code -eq $ERROR_AWS_CREDENTIALS ]]; then + log_error "Credentials error - retrying will not help" + rm -f "$error_log" + return $ERROR_AWS_CREDENTIALS + elif [[ $attempt -lt $max_retries ]]; then + log_info "Retrying in $RETRY_DELAY seconds..." + sleep "$RETRY_DELAY" + fi + fi + + ((attempt++)) + done + + log_error "Failed to download from S3 after $max_retries attempts" + log_error "Last error log saved to: $error_log" + return $ERROR_S3_DOWNLOAD +} + +# Validate that downloaded files exist +# Arguments: +# $1 - Local path to validate +# Returns: +# 0 if files exist, 1 if directory is empty +validate_download() { + local local_path="$1" + + log_debug "Validating download at: $local_path" + + # Check if directory exists + if [[ ! -d "$local_path" ]]; then + log_error "Download directory does not exist: $local_path" + return 1 + fi + + # Check if directory has any files + local file_count=$(find "$local_path" -type f | wc -l) + + if [[ $file_count -eq 0 ]]; then + log_warn "No files found in downloaded directory: $local_path" + log_warn "This may indicate an empty S3 folder or download failure" + return 1 + fi + + log_info "Download validated: $file_count files found" + return 0 +} + +# Download with exponential backoff +# Arguments: +# $1 - S3 path +# $2 - Local path +# $3 - Max retries (optional) +# Returns: +# 0 on success, 1 on failure +s3_download_with_backoff() { + local s3_path="$1" + local local_path="$2" + local max_retries="${3:-$MAX_RETRIES}" + + log_info "Starting S3 download with exponential backoff" + + # Validate S3 path + if ! validate_s3_path "$s3_path"; then + return 1 + fi + + # Create local directory + mkdir -p "$local_path" || { + log_error "Failed to create directory: $local_path" + return 1 + } + + local attempt=1 + local delay=$RETRY_DELAY + + while [[ $attempt -le $max_retries ]]; do + log_info "Download attempt $attempt of $max_retries" + + if aws s3 sync "$s3_path" "$local_path" --no-progress; then + log_info "Successfully downloaded from S3" + + if validate_download "$local_path"; then + return 0 + else + log_error "Download validation failed" + return 1 + fi + fi + + if [[ $attempt -lt $max_retries ]]; then + log_info "Retrying in $delay seconds (exponential backoff)..." + sleep "$delay" + delay=$((delay * 2)) + fi + + ((attempt++)) + done + + log_error "Failed to download after $max_retries attempts" + return 1 +} + + +####################################### +# S3 Upload Functions +####################################### + +# Upload results from local directory to S3 with retry logic +# Arguments: +# $1 - Local source path +# $2 - S3 destination path +# $3 - Number of retries (optional, defaults to MAX_RETRIES) +# Returns: +# 0 on success, error code on failure +s3_upload() { + local local_path="$1" + local s3_path="$2" + local max_retries="${3:-$MAX_RETRIES}" + + log_info "Starting S3 upload: $local_path -> $s3_path" + + # Validate S3 path + if ! validate_s3_path "$s3_path"; then + return $ERROR_INVALID_PATH + fi + + # Validate local path exists + if [[ ! -d "$local_path" ]]; then + log_error "Local path does not exist: $local_path" + return $ERROR_INVALID_PATH + fi + + # Check if there are files to upload + local file_count=$(find "$local_path" -type f 2>/dev/null | wc -l) + if [[ $file_count -eq 0 ]]; then + log_warn "No files to upload in: $local_path" + log_info "Creating empty marker file to indicate processing completed" + echo "Processing completed at $(date)" > "$local_path/.processing_complete" + fi + + # Attempt upload with retries + local attempt=1 + local error_log="/tmp/s3-upload-error-$$.log" + + while [[ $attempt -le $max_retries ]]; do + log_info "Upload attempt $attempt of $max_retries" + + # Capture both stdout and stderr + local aws_output + if aws_output=$(aws s3 sync "$local_path" "$s3_path" --no-progress 2>&1); then + log_info "Successfully uploaded to S3" + log_debug "AWS output: $aws_output" + + # Validate upload by checking S3 + if validate_upload "$local_path" "$s3_path"; then + rm -f "$error_log" + return 0 + else + log_warn "Upload validation failed, but upload command succeeded" + # Still return success since upload command worked + rm -f "$error_log" + return 0 + fi + else + local exit_code=$? + echo "$aws_output" > "$error_log" + + log_warn "Upload attempt $attempt failed with exit code $exit_code" + + # Handle specific error types + local error_type + error_type=$(handle_s3_access_error "$s3_path" "upload" "$aws_output") + local error_code=$? + + # Check if we should retry based on error type + if [[ $error_code -eq $ERROR_NETWORK_TIMEOUT ]]; then + if ! handle_network_timeout "S3 upload" "$attempt" "$max_retries"; then + rm -f "$error_log" + return $ERROR_NETWORK_TIMEOUT + fi + elif [[ $error_code -eq $ERROR_AWS_CREDENTIALS ]]; then + log_error "Credentials error - retrying will not help" + rm -f "$error_log" + return $ERROR_AWS_CREDENTIALS + elif [[ $attempt -lt $max_retries ]]; then + log_info "Retrying in $RETRY_DELAY seconds..." + sleep "$RETRY_DELAY" + fi + fi + + ((attempt++)) + done + + log_error "Failed to upload to S3 after $max_retries attempts" + log_error "Last error log saved to: $error_log" + return $ERROR_S3_UPLOAD +} + +# Upload specific file types (e.g., .md files and logs) +# Arguments: +# $1 - Local source path +# $2 - S3 destination path +# $3 - File pattern (optional, defaults to all files) +# $4 - Number of retries (optional) +# Returns: +# 0 on success, 1 on failure +s3_upload_filtered() { + local local_path="$1" + local s3_path="$2" + local file_pattern="${3:-*}" + local max_retries="${4:-$MAX_RETRIES}" + + log_info "Starting filtered S3 upload: $local_path -> $s3_path (pattern: $file_pattern)" + + # Validate inputs + if ! validate_s3_path "$s3_path"; then + return 1 + fi + + if [[ ! -d "$local_path" ]]; then + log_error "Local path does not exist: $local_path" + return 1 + fi + + # Find matching files + local matching_files=() + while IFS= read -r -d '' file; do + matching_files+=("$file") + done < <(find "$local_path" -type f -name "$file_pattern" -print0) + + if [[ ${#matching_files[@]} -eq 0 ]]; then + log_warn "No files matching pattern '$file_pattern' found in $local_path" + return 0 + fi + + log_info "Found ${#matching_files[@]} files matching pattern '$file_pattern'" + + # Upload each file with retries + local failed_uploads=0 + for file in "${matching_files[@]}"; do + local relative_path="${file#$local_path/}" + local s3_file_path="${s3_path%/}/$relative_path" + + if ! s3_upload_file "$file" "$s3_file_path" "$max_retries"; then + log_error "Failed to upload: $file" + ((failed_uploads++)) + fi + done + + if [[ $failed_uploads -gt 0 ]]; then + log_error "Failed to upload $failed_uploads files" + return 1 + fi + + log_info "Successfully uploaded all matching files" + return 0 +} + +# Upload a single file to S3 +# Arguments: +# $1 - Local file path +# $2 - S3 destination path +# $3 - Number of retries (optional) +# Returns: +# 0 on success, 1 on failure +s3_upload_file() { + local local_file="$1" + local s3_path="$2" + local max_retries="${3:-$MAX_RETRIES}" + + log_debug "Uploading file: $local_file -> $s3_path" + + if [[ ! -f "$local_file" ]]; then + log_error "File does not exist: $local_file" + return 1 + fi + + local attempt=1 + while [[ $attempt -le $max_retries ]]; do + if aws s3 cp "$local_file" "$s3_path" --no-progress; then + log_debug "Successfully uploaded: $local_file" + return 0 + else + log_warn "Upload attempt $attempt failed for: $local_file" + + if [[ $attempt -lt $max_retries ]]; then + sleep "$RETRY_DELAY" + fi + fi + + ((attempt++)) + done + + log_error "Failed to upload file after $max_retries attempts: $local_file" + return 1 +} + +# Validate upload by comparing file counts +# Arguments: +# $1 - Local path +# $2 - S3 path +# Returns: +# 0 if validation passes, 1 otherwise +validate_upload() { + local local_path="$1" + local s3_path="$2" + + log_debug "Validating upload: $local_path -> $s3_path" + + # Count local files + local local_count=$(find "$local_path" -type f | wc -l) + + # Count S3 files (this is a basic check) + local s3_count=$(aws s3 ls "$s3_path" --recursive 2>/dev/null | wc -l) + + log_debug "Local files: $local_count, S3 files: $s3_count" + + if [[ $s3_count -ge $local_count ]]; then + log_debug "Upload validation passed" + return 0 + else + log_warn "Upload validation: S3 has fewer files than local" + return 1 + fi +} + +# Upload results with specific handling for .md files and logs +# Arguments: +# $1 - Local results directory +# $2 - S3 destination path +# Returns: +# 0 on success, 1 on failure +s3_upload_results() { + local local_path="$1" + local s3_path="$2" + + log_info "Uploading ATX results: $local_path -> $s3_path" + + # Upload all markdown files + log_info "Uploading markdown files..." + if ! s3_upload_filtered "$local_path" "$s3_path" "*.md"; then + log_error "Failed to upload markdown files" + return 1 + fi + + # Upload all log files + log_info "Uploading log files..." + if ! s3_upload_filtered "$local_path" "$s3_path" "*.log"; then + log_error "Failed to upload log files" + return 1 + fi + + # Upload any JSON files (summary, metadata) + log_info "Uploading JSON files..." + s3_upload_filtered "$local_path" "$s3_path" "*.json" || true + + # Upload any other files + log_info "Uploading remaining files..." + if ! s3_upload "$local_path" "$s3_path"; then + log_error "Failed to upload remaining files" + return 1 + fi + + log_info "Successfully uploaded all results" + return 0 +} + + +####################################### +# Cleanup Functions +####################################### + +# Cleanup temporary files and directories +# Arguments: +# $1 - Path to clean up +# $2 - Force flag (optional, "force" to skip confirmation) +# Returns: +# 0 on success, 1 on failure +cleanup_temp_files() { + local path="$1" + local force="${2:-}" + + log_info "Cleaning up temporary files: $path" + + # Validate path exists + if [[ ! -e "$path" ]]; then + log_debug "Path does not exist, nothing to clean: $path" + return 0 + fi + + # Safety check: don't delete root or home directories + if [[ "$path" == "/" || "$path" == "$HOME" || "$path" == "/home" || "$path" == "/tmp" ]]; then + log_error "Refusing to delete protected directory: $path" + return 1 + fi + + # Confirm deletion unless force flag is set + if [[ "$force" != "force" && "$force" != "--force" ]]; then + log_warn "About to delete: $path" + log_warn "Use --force flag to skip this confirmation" + return 1 + fi + + # Perform cleanup + log_info "Removing: $path" + if rm -rf "$path"; then + log_info "Successfully cleaned up: $path" + return 0 + else + log_error "Failed to clean up: $path" + return 1 + fi +} + +# Cleanup with error handling that ensures cleanup runs even on failure +# Arguments: +# $1 - Path to clean up +# Returns: +# Always returns 0 to allow script to continue +cleanup_safe() { + local path="$1" + + log_debug "Safe cleanup: $path" + + # Try to cleanup, but don't fail if it doesn't work + if cleanup_temp_files "$path" "force" 2>/dev/null; then + log_debug "Cleanup successful: $path" + else + log_warn "Cleanup failed (non-fatal): $path" + fi + + return 0 +} + +# Cleanup multiple paths +# Arguments: +# $@ - Paths to clean up +# Returns: +# 0 if all cleanups succeed, 1 if any fail +cleanup_multiple() { + local paths=("$@") + local failed=0 + + log_info "Cleaning up ${#paths[@]} paths" + + for path in "${paths[@]}"; do + if ! cleanup_safe "$path"; then + ((failed++)) + fi + done + + if [[ $failed -gt 0 ]]; then + log_warn "Failed to clean up $failed paths" + return 1 + fi + + log_info "Successfully cleaned up all paths" + return 0 +} + +# Register cleanup handler to run on script exit +# Arguments: +# $1 - Path to clean up on exit +register_cleanup_handler() { + local path="$1" + + log_debug "Registering cleanup handler for: $path" + + # Create a trap that will run cleanup on EXIT + trap "cleanup_safe '$path'" EXIT +} + +# Cleanup workspace after processing +# Arguments: +# $1 - Workspace directory +# $2 - Keep on failure flag (optional, "keep" to preserve on error) +# Returns: +# 0 on success +cleanup_workspace() { + local workspace="$1" + local keep_on_failure="${2:-}" + + log_info "Cleaning up workspace: $workspace" + + # Check if we should keep files on failure + if [[ "$keep_on_failure" == "keep" && $? -ne 0 ]]; then + log_warn "Preserving workspace due to failure: $workspace" + return 0 + fi + + # Remove workspace + cleanup_safe "$workspace" + + return 0 +} + +# Create a temporary workspace directory +# Arguments: +# $1 - Workspace name (optional) +# Returns: +# Prints the workspace path to stdout +# Returns 0 on success, 1 on failure +create_temp_workspace() { + local workspace_name="${1:-workspace-$(date +%s)}" + local workspace_path="$TEMP_BASE_DIR/$workspace_name" + + log_info "Creating temporary workspace: $workspace_path" + + if mkdir -p "$workspace_path"; then + log_info "Workspace created: $workspace_path" + echo "$workspace_path" + return 0 + else + log_error "Failed to create workspace: $workspace_path" + return 1 + fi +} + + +####################################### +# Main Command Dispatcher +####################################### + +# Main function to handle commands +main() { + # Check prerequisites + check_aws_cli + + # Parse command + if [[ $# -eq 0 ]]; then + usage + exit 1 + fi + + local command="$1" + shift + + case "$command" in + download) + handle_download_command "$@" + ;; + upload) + handle_upload_command "$@" + ;; + cleanup) + handle_cleanup_command "$@" + ;; + help|--help|-h) + usage + exit 0 + ;; + *) + log_error "Unknown command: $command" + usage + exit 1 + ;; + esac +} + +# Handle download command +handle_download_command() { + local s3_path="" + local local_path="" + local retry_count="$MAX_RETRIES" + + while [[ $# -gt 0 ]]; do + case "$1" in + --s3-path) + s3_path="$2" + shift 2 + ;; + --local-path) + local_path="$2" + shift 2 + ;; + --retry) + retry_count="$2" + shift 2 + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac + done + + # Validate required arguments + if [[ -z "$s3_path" || -z "$local_path" ]]; then + log_error "Missing required arguments for download command" + usage + exit 1 + fi + + # Execute download + if s3_download "$s3_path" "$local_path" "$retry_count"; then + log_info "Download completed successfully" + exit 0 + else + log_error "Download failed" + exit 1 + fi +} + +# Handle upload command +handle_upload_command() { + local local_path="" + local s3_path="" + local retry_count="$MAX_RETRIES" + + while [[ $# -gt 0 ]]; do + case "$1" in + --local-path) + local_path="$2" + shift 2 + ;; + --s3-path) + s3_path="$2" + shift 2 + ;; + --retry) + retry_count="$2" + shift 2 + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac + done + + # Validate required arguments + if [[ -z "$local_path" || -z "$s3_path" ]]; then + log_error "Missing required arguments for upload command" + usage + exit 1 + fi + + # Execute upload + if s3_upload "$local_path" "$s3_path" "$retry_count"; then + log_info "Upload completed successfully" + exit 0 + else + log_error "Upload failed" + exit 1 + fi +} + +# Handle cleanup command +handle_cleanup_command() { + local path="" + local force="" + + while [[ $# -gt 0 ]]; do + case "$1" in + --path) + path="$2" + shift 2 + ;; + --force) + force="force" + shift + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac + done + + # Validate required arguments + if [[ -z "$path" ]]; then + log_error "Missing required argument: --path" + usage + exit 1 + fi + + # Execute cleanup + if cleanup_temp_files "$path" "$force"; then + log_info "Cleanup completed successfully" + exit 0 + else + log_error "Cleanup failed" + exit 1 + fi +} + +# Run main function if script is executed directly +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/src/agentic_platform/agent/code_transform/scripts/smoke-test.sh b/src/agentic_platform/agent/code_transform/scripts/smoke-test.sh new file mode 100755 index 0000000..b356d54 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/smoke-test.sh @@ -0,0 +1,465 @@ +#!/bin/bash +# ATX Container Smoke Test Script +# Verifies ATX and AWS CLI installation and functionality +# +# This script: +# - Checks ATX CLI availability +# - Checks AWS CLI availability +# - Creates minimal test Progress code +# - Executes a simple ATX transformation +# - Verifies the transformation completes successfully +# +# Requirements: 10.1, 10.2, 10.3 + +set -euo pipefail + +####################################### +# Script Configuration +####################################### +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPT_NAME=$(basename "$0") + +# Default configuration +DEFAULT_TEMP_DIR="/tmp/atx-smoke-test" +DEFAULT_LOG_FILE="./smoke_test.log" +DEFAULT_TRANSFORMATION="Comprehensive-Codebase-Analysis" +DEFAULT_BUILD_COMMAND="noop" + +# Global variables +TEMP_DIR="$DEFAULT_TEMP_DIR" +LOG_FILE="$DEFAULT_LOG_FILE" +TRANSFORMATION="$DEFAULT_TRANSFORMATION" +BUILD_COMMAND="$DEFAULT_BUILD_COMMAND" +VERBOSE=false +PRESERVE_ON_FAILURE=true + +####################################### +# Logging Functions +####################################### + +log_info() { + echo "[INFO] $*" | tee -a "$LOG_FILE" +} + +log_success() { + echo -e "\033[0;32m[SUCCESS]\033[0m $*" | tee -a "$LOG_FILE" +} + +log_error() { + echo -e "\033[0;31m[ERROR]\033[0m $*" | tee -a "$LOG_FILE" >&2 +} + +log_warn() { + echo -e "\033[0;33m[WARN]\033[0m $*" | tee -a "$LOG_FILE" +} + +log_debug() { + if [[ "$VERBOSE" == true ]]; then + echo "[DEBUG] $*" | tee -a "$LOG_FILE" + fi +} + +####################################### +# Usage and Help +####################################### + +usage() { + cat << EOF +ATX Container Smoke Test Script + +USAGE: + $SCRIPT_NAME [OPTIONS] + +OPTIONS: + --temp-dir Temporary directory for test files (default: $DEFAULT_TEMP_DIR) + --log-file Log file path (default: $DEFAULT_LOG_FILE) + --transformation ATX transformation to test (default: $DEFAULT_TRANSFORMATION) + --build-command Build command to use (default: $DEFAULT_BUILD_COMMAND) + --verbose Enable verbose output + --no-preserve Don't preserve artifacts on failure + --help Show this help message + +DESCRIPTION: + This script performs a smoke test to verify that the ATX container is properly + configured and can execute transformations. It checks: + + 1. ATX CLI is installed and accessible + 2. AWS CLI is installed and accessible + 3. A minimal Progress code sample can be created + 4. ATX can execute a simple transformation + + The smoke test is designed to catch configuration issues early before running + large batch operations. + +EXIT CODES: + 0 - Smoke test passed + 1 - ATX CLI not found + 2 - AWS CLI not found + 3 - Failed to create test code + 4 - ATX transformation failed + 5 - General error + +EXAMPLES: + # Run smoke test with defaults + $SCRIPT_NAME + + # Run with custom temp directory + $SCRIPT_NAME --temp-dir /tmp/my-smoke-test + + # Run with verbose output + $SCRIPT_NAME --verbose + + # Run with custom transformation + $SCRIPT_NAME --transformation "My-Custom-Transformation" + +EOF +} + +####################################### +# Argument Parsing +####################################### + +parse_arguments() { + while [[ $# -gt 0 ]]; do + case "$1" in + --temp-dir) + TEMP_DIR="$2" + shift 2 + ;; + --log-file) + LOG_FILE="$2" + shift 2 + ;; + --transformation) + TRANSFORMATION="$2" + shift 2 + ;; + --build-command) + BUILD_COMMAND="$2" + shift 2 + ;; + --verbose) + VERBOSE=true + shift + ;; + --no-preserve) + PRESERVE_ON_FAILURE=false + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "ERROR: Unknown option: $1" >&2 + usage + exit 5 + ;; + esac + done +} + +####################################### +# Cleanup Functions +####################################### + +cleanup_on_exit() { + local exit_code=$? + + if [[ $exit_code -eq 0 ]]; then + log_debug "Smoke test passed, cleaning up temp directory" + rm -rf "$TEMP_DIR" 2>/dev/null || true + elif [[ "$PRESERVE_ON_FAILURE" == true ]]; then + log_warn "Smoke test failed, preserving artifacts at: $TEMP_DIR" + log_info "Review artifacts for debugging:" + log_info " - Test code: $TEMP_DIR/test.p" + log_info " - Log file: $LOG_FILE" + else + log_debug "Cleaning up temp directory (preserve disabled)" + rm -rf "$TEMP_DIR" 2>/dev/null || true + fi +} + +trap cleanup_on_exit EXIT + +####################################### +# Smoke Test Functions +####################################### + +# Check if ATX CLI is available +# Returns: 0 if available, 1 if not +check_atx_cli() { + log_info "Checking ATX CLI availability..." + + if ! command -v atx &> /dev/null; then + log_error "ATX CLI not found in PATH" + log_error "Please ensure ATX is installed correctly" + log_error "Expected location: /opt/atx/atx or in PATH" + return 1 + fi + + local atx_version + atx_version=$(atx --version 2>&1 || echo "unknown") + + log_success "ATX CLI found" + log_info " Version: $atx_version" + log_info " Location: $(which atx)" + + return 0 +} + +# Check if AWS CLI is available +# Returns: 0 if available, 2 if not +check_aws_cli() { + log_info "Checking AWS CLI availability..." + + if ! command -v aws &> /dev/null; then + log_error "AWS CLI not found in PATH" + log_error "Please ensure AWS CLI v2 is installed correctly" + return 2 + fi + + local aws_version + aws_version=$(aws --version 2>&1 || echo "unknown") + + log_success "AWS CLI found" + log_info " Version: $aws_version" + log_info " Location: $(which aws)" + + return 0 +} + +# Create minimal test Progress code +# Returns: 0 if successful, 3 if failed +create_test_code() { + log_info "Creating minimal test Progress code..." + + # Create temp directory + mkdir -p "$TEMP_DIR" || { + log_error "Failed to create temp directory: $TEMP_DIR" + return 3 + } + + # Initialize Git repository (required by ATX) + log_info "Initializing Git repository (required by ATX)..." + cd "$TEMP_DIR" + if ! git init --quiet; then + log_error "Failed to initialize Git repository" + return 3 + fi + + # Configure Git user (required for commits) + git config user.name "ATX Smoke Test" || true + git config user.email "atx-test@example.com" || true + + # Create a simple Progress test program + cat > "$TEMP_DIR/test.p" << 'PROGRESS_CODE' +/* ATX Smoke Test - Simple Progress Program */ +/* This is a minimal Progress code sample for testing ATX functionality */ + +DEFINE VARIABLE i AS INTEGER NO-UNDO. +DEFINE VARIABLE result AS CHARACTER NO-UNDO. + +/* Simple loop to demonstrate basic Progress syntax */ +DO i = 1 TO 5: + result = result + STRING(i) + " ". +END. + +/* Display result */ +MESSAGE "Smoke test program executed successfully" SKIP + "Loop result: " result + VIEW-AS ALERT-BOX INFORMATION. + +/* Return success */ +RETURN. +PROGRESS_CODE + + if [[ ! -f "$TEMP_DIR/test.p" ]]; then + log_error "Failed to create test Progress code file" + return 3 + fi + + # Add files to Git (required by ATX) + log_info "Adding files to Git repository..." + git add test.p || { + log_error "Failed to add files to Git repository" + return 3 + } + + git commit -m "Initial commit: ATX smoke test code" --quiet || { + log_error "Failed to commit files to Git repository" + return 3 + } + + local file_size + file_size=$(wc -c < "$TEMP_DIR/test.p") + + log_success "Test Progress code created and committed to Git" + log_info " File: $TEMP_DIR/test.p" + log_info " Size: $file_size bytes" + log_debug " Content preview:" + if [[ "$VERBOSE" == true ]]; then + head -n 5 "$TEMP_DIR/test.p" | sed 's/^/ /' | tee -a "$LOG_FILE" + fi + + return 0 +} + +# Execute ATX transformation +# Returns: 0 if successful, 4 if failed +execute_atx_transformation() { + log_info "Executing ATX transformation..." + log_info " Transformation: $TRANSFORMATION" + log_info " Build command: $BUILD_COMMAND" + log_info " Code path: $TEMP_DIR" + + # Check if AWS credentials are available + if ! aws sts get-caller-identity &>/dev/null; then + log_warn "AWS credentials not available - skipping ATX transformation test" + log_info "ATX CLI is installed and will work when AWS credentials are provided" + log_info "This is expected behavior in build environments without AWS access" + log_success "ATX installation verification completed (credentials required for full test)" + return 0 + fi + + # Build ATX command + local atx_cmd="atx custom def exec" + atx_cmd+=" --code-repository-path \"$TEMP_DIR\"" + atx_cmd+=" --transformation-name \"$TRANSFORMATION\"" + atx_cmd+=" --build-command \"$BUILD_COMMAND\"" + atx_cmd+=" --non-interactive" + atx_cmd+=" --trust-all-tools" + + log_debug "Command: $atx_cmd" + + # Execute ATX transformation + local atx_output + local atx_exit_code=0 + + log_info "Running ATX transformation (this may take a moment)..." + + if atx_output=$(eval "$atx_cmd" 2>&1); then + atx_exit_code=0 + log_success "ATX transformation completed successfully" + + if [[ "$VERBOSE" == true ]]; then + log_debug "ATX output:" + echo "$atx_output" | sed 's/^/ /' | tee -a "$LOG_FILE" + fi + + # Check for output files + local md_files + md_files=$(find "$TEMP_DIR" -name "*.md" 2>/dev/null | wc -l) + + if [[ $md_files -gt 0 ]]; then + log_info " Generated $md_files markdown file(s)" + fi + + return 0 + else + atx_exit_code=$? + log_error "ATX transformation failed (exit code: $atx_exit_code)" + + # Analyze error + log_error "ATX output:" + echo "$atx_output" | sed 's/^/ /' | tee -a "$LOG_FILE" + + # Provide specific error guidance + if [[ $atx_exit_code -eq 127 ]]; then + log_error "ATX command not found - check installation" + elif [[ $atx_exit_code -eq 126 ]]; then + log_error "ATX command not executable - check permissions" + elif echo "$atx_output" | grep -qi "permission denied"; then + log_error "Permission denied - check file/directory permissions" + elif echo "$atx_output" | grep -qi "not found\|no such file"; then + log_error "File not found - check paths and installation" + elif echo "$atx_output" | grep -qi "network\|connection"; then + log_error "Network error - check connectivity" + fi + + return 4 + fi +} + +####################################### +# Main Smoke Test Function +####################################### + +run_smoke_test() { + log_info "==========================================" + log_info "ATX Container Smoke Test" + log_info "==========================================" + log_info "Started: $(date '+%Y-%m-%d %H:%M:%S')" + log_info "" + + # Step 1: Check ATX CLI + if ! check_atx_cli; then + log_error "" + log_error "SMOKE TEST FAILED: ATX CLI not available" + return 1 + fi + log_info "" + + # Step 2: Check AWS CLI + if ! check_aws_cli; then + log_error "" + log_error "SMOKE TEST FAILED: AWS CLI not available" + return 2 + fi + log_info "" + + # Step 3: Create test code + if ! create_test_code; then + log_error "" + log_error "SMOKE TEST FAILED: Could not create test code" + return 3 + fi + log_info "" + + # Step 4: Execute ATX transformation + if ! execute_atx_transformation; then + log_error "" + log_error "SMOKE TEST FAILED: ATX transformation failed" + return 4 + fi + log_info "" + + # Success! + log_info "==========================================" + log_success "SMOKE TEST PASSED" + log_info "==========================================" + log_info "All checks completed successfully" + log_info "Container is properly configured for ATX transformations" + log_info "" + log_info "Completed: $(date '+%Y-%m-%d %H:%M:%S')" + log_info "" + + return 0 +} + +####################################### +# Main Function +####################################### + +main() { + # Parse arguments + parse_arguments "$@" + + # Initialize log file + mkdir -p "$(dirname "$LOG_FILE")" + echo "ATX Container Smoke Test Log" > "$LOG_FILE" + echo "Started: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE" + echo "" >> "$LOG_FILE" + + # Run smoke test + if run_smoke_test; then + exit 0 + else + local exit_code=$? + log_error "Smoke test failed with exit code: $exit_code" + exit $exit_code + fi +} + +# Run main function +main "$@" diff --git a/src/agentic_platform/agent/code_transform/scripts/test-orchestrator.sh b/src/agentic_platform/agent/code_transform/scripts/test-orchestrator.sh new file mode 100644 index 0000000..e927d21 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/scripts/test-orchestrator.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Minimal test orchestrator to debug argument parsing + +echo "[TEST] Script name: $0" +echo "[TEST] Number of arguments: $#" +echo "[TEST] All arguments: $*" + +for i in $(seq 1 $#); do + echo "[TEST] Arg $i: ${!i}" +done + +if [[ $# -eq 1 && "$1" == "--smoke-test" ]]; then + echo "[TEST] SUCCESS: Received --smoke-test argument correctly" + exit 0 +else + echo "[TEST] ERROR: Expected --smoke-test, got: $1" + exit 1 +fi \ No newline at end of file diff --git a/src/agentic_platform/agent/code_transform/setup-gitlab-ci.sh b/src/agentic_platform/agent/code_transform/setup-gitlab-ci.sh new file mode 100755 index 0000000..e9e1f81 --- /dev/null +++ b/src/agentic_platform/agent/code_transform/setup-gitlab-ci.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# +# ATX Container Test Runner - GitLab CI/CD Setup Script (OIDC) +# +# This script automates the setup of AWS resources and IAM OIDC role +# for secure GitLab CI/CD deployment without long-lived credentials. +# + +set -e + +# Colors +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +BLUE='\033[0;34m' +NC='\033[0m' + +echo "==========================================" +echo "ATX Container Test Runner" +echo "GitLab CI/CD Setup (OIDC)" +echo "==========================================" +echo "" + +# Check prerequisites +echo "Checking prerequisites..." +if ! command -v aws &> /dev/null; then + echo -e "${RED}Error: AWS CLI not found. Please install it first.${NC}" + exit 1 +fi + +if ! command -v jq &> /dev/null; then + echo -e "${YELLOW}Warning: jq not found. Install for better output formatting.${NC}" +fi + +# Get AWS account info +echo "Getting AWS account information..." +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +export AWS_REGION="${AWS_REGION:-us-east-1}" + +echo -e "${GREEN}AWS Account ID:${NC} ${AWS_ACCOUNT_ID}" +echo -e "${GREEN}AWS Region:${NC} ${AWS_REGION}" +echo "" + +# Prompt for GitLab project info +echo -e "${BLUE}GitLab Project Information:${NC}" +read -p "Enter your GitLab project ID (found in Project Settings โ†’ General): " GITLAB_PROJECT_ID +if [[ -z "$GITLAB_PROJECT_ID" ]]; then + echo -e "${RED}Error: GitLab project ID is required.${NC}" + exit 1 +fi + +read -p "Enter GitLab instance URL [https://gitlab.com]: " GITLAB_URL +GITLAB_URL=${GITLAB_URL:-https://gitlab.com} + +echo "" + +# Prompt for bucket names +read -p "Enter source bucket name [atx-test-source-${AWS_ACCOUNT_ID}]: " SOURCE_BUCKET +SOURCE_BUCKET=${SOURCE_BUCKET:-atx-test-source-${AWS_ACCOUNT_ID}} + +read -p "Enter results bucket name [atx-test-results-${AWS_ACCOUNT_ID}]: " RESULTS_BUCKET +RESULTS_BUCKET=${RESULTS_BUCKET:-atx-test-results-${AWS_ACCOUNT_ID}} + +echo "" +echo "Configuration:" +echo " GitLab Project ID: ${GITLAB_PROJECT_ID}" +echo " GitLab URL: ${GITLAB_URL}" +echo " Source Bucket: ${SOURCE_BUCKET}" +echo " Results Bucket: ${RESULTS_BUCKET}" +echo "" + +read -p "Continue with setup? (y/n) " -n 1 -r +echo +if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Setup cancelled." + exit 0 +fi + +# Create S3 buckets +echo "" +echo "Creating S3 buckets..." +aws s3 mb s3://${SOURCE_BUCKET} --region ${AWS_REGION} 2>/dev/null || echo " Source bucket already exists" +aws s3 mb s3://${RESULTS_BUCKET} --region ${AWS_REGION} 2>/dev/null || echo " Results bucket already exists" + +# Enable versioning and encryption +echo "Configuring S3 buckets..." +aws s3api put-bucket-versioning --bucket ${SOURCE_BUCKET} --versioning-configuration Status=Enabled +aws s3api put-bucket-versioning --bucket ${RESULTS_BUCKET} --versioning-configuration Status=Enabled + +aws s3api put-bucket-encryption --bucket ${SOURCE_BUCKET} --server-side-encryption-configuration '{ + "Rules": [{ + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "AES256" + } + }] +}' + +aws s3api put-bucket-encryption --bucket ${RESULTS_BUCKET} --server-side-encryption-configuration '{ + "Rules": [{ + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "AES256" + } + }] +}' + +echo -e "${GREEN}โœ“ S3 buckets configured${NC}" + +# Create OIDC Identity Provider +echo "" +echo "Creating OIDC Identity Provider..." + +# Check if GitLab OIDC provider already exists +OIDC_ARN="" +GITLAB_DOMAIN_FOR_ARN=${GITLAB_URL#https://} +GITLAB_DOMAIN_FOR_ARN=${GITLAB_DOMAIN_FOR_ARN%%/*} +if aws iam get-open-id-connect-provider --open-id-connect-provider-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${GITLAB_DOMAIN_FOR_ARN}" 2>/dev/null; then + echo " GitLab OIDC provider already exists" + OIDC_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${GITLAB_DOMAIN_FOR_ARN}" +else + # Get GitLab's OIDC thumbprint + GITLAB_DOMAIN=${GITLAB_URL#https://} + # Remove any path components to get just the domain + GITLAB_DOMAIN=${GITLAB_DOMAIN%%/*} + + echo " Getting SSL certificate for ${GITLAB_DOMAIN}..." + THUMBPRINT=$(echo | openssl s_client -servername ${GITLAB_DOMAIN} -connect ${GITLAB_DOMAIN}:443 2>/dev/null | openssl x509 -fingerprint -sha1 -noout | cut -d'=' -f2 | tr -d ':' | tr '[:upper:]' '[:lower:]') + + if [[ -z "$THUMBPRINT" ]]; then + echo -e "${RED}Error: Could not get SSL certificate thumbprint for ${GITLAB_DOMAIN}${NC}" + echo "Using GitLab.com default thumbprint..." + THUMBPRINT="f879abce0008e4eb126e0097e46620f5aaae26ad" + fi + + echo " Using thumbprint: ${THUMBPRINT}" + + # Create OIDC provider + OIDC_ARN=$(aws iam create-open-id-connect-provider \ + --url ${GITLAB_URL} \ + --thumbprint-list ${THUMBPRINT} \ + --client-id-list ${GITLAB_URL} \ + --query 'OpenIDConnectProviderArn' \ + --output text) + + echo " โœ“ OIDC provider created: ${OIDC_ARN}" +fi + +# Create IAM role for GitLab CI +echo "" +echo "Creating IAM role for GitLab CI..." + +ROLE_NAME="GitLabCIRole" +ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" + +# Create trust policy for OIDC +GITLAB_DOMAIN_FOR_POLICY=${GITLAB_URL#https://} +GITLAB_DOMAIN_FOR_POLICY=${GITLAB_DOMAIN_FOR_POLICY%%/*} + +cat > /tmp/trust-policy.json << EOF +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "${OIDC_ARN}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${GITLAB_DOMAIN_FOR_POLICY}:aud": "${GITLAB_URL}", + "${GITLAB_DOMAIN_FOR_POLICY}:sub": "project_path:*:ref_type:branch:ref:main" + }, + "StringLike": { + "${GITLAB_DOMAIN_FOR_POLICY}:project_id": "${GITLAB_PROJECT_ID}" + } + } + } + ] +} +EOF + +# Create or update IAM role +if aws iam get-role --role-name ${ROLE_NAME} >/dev/null 2>&1; then + echo " Updating existing IAM role..." + aws iam update-assume-role-policy --role-name ${ROLE_NAME} --policy-document file:///tmp/trust-policy.json +else + echo " Creating new IAM role..." + aws iam create-role --role-name ${ROLE_NAME} --assume-role-policy-document file:///tmp/trust-policy.json +fi + +# Create permissions policy +cat > /tmp/permissions-policy.json << 'EOF' +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "ECRAccess", + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:PutImage", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:DescribeRepositories", + "ecr:CreateRepository", + "ecr:DescribeImages", + "ecr:ListImages" + ], + "Resource": "*" + }, + { + "Sid": "ECSAccess", + "Effect": "Allow", + "Action": [ + "ecs:*" + ], + "Resource": "*" + }, + { + "Sid": "EC2Access", + "Effect": "Allow", + "Action": [ + "ec2:DescribeVpcs", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeNetworkInterfaces", + "ec2:CreateSecurityGroup", + "ec2:CreateTags", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:AuthorizeSecurityGroupEgress" + ], + "Resource": "*" + }, + { + "Sid": "CloudFormationAccess", + "Effect": "Allow", + "Action": [ + "cloudformation:*" + ], + "Resource": "*" + }, + { + "Sid": "IAMAccess", + "Effect": "Allow", + "Action": [ + "iam:GetRole", + "iam:CreateRole", + "iam:DeleteRole", + "iam:AttachRolePolicy", + "iam:DetachRolePolicy", + "iam:PutRolePolicy", + "iam:DeleteRolePolicy", + "iam:GetRolePolicy", + "iam:PassRole", + "iam:CreateServiceLinkedRole" + ], + "Resource": "*" + }, + { + "Sid": "LogsAccess", + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:DescribeLogGroups", + "logs:DescribeLogStreams" + ], + "Resource": "*" + }, + { + "Sid": "S3Access", + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + "arn:aws:s3:::atx-test-*", + "arn:aws:s3:::atx-test-*/*" + ] + } + ] +} +EOF + +# Attach permissions policy +aws iam put-role-policy \ + --role-name ${ROLE_NAME} \ + --policy-name GitLabCIPermissions \ + --policy-document file:///tmp/permissions-policy.json + +echo -e "${GREEN}โœ“ IAM role configured${NC}" + +# Display results +echo "" +echo "==========================================" +echo -e "${GREEN}Setup Complete!${NC}" +echo "==========================================" +echo "" +echo -e "${BLUE}Add these variables to GitLab CI/CD Settings:${NC}" +echo "(Settings โ†’ CI/CD โ†’ Variables)" +echo "" +echo "Variable Name | Value | Protected | Masked" +echo "---------------------------|----------------------------|-----------|-------" +echo "AWS_REGION | ${AWS_REGION} | |" +echo "AWS_ACCOUNT_ID | ${AWS_ACCOUNT_ID} | |" +echo "AWS_ROLE_ARN | ${ROLE_ARN} | |" +echo "SOURCE_BUCKET | ${SOURCE_BUCKET} | |" +echo "RESULTS_BUCKET | ${RESULTS_BUCKET} | |" +echo "" +echo -e "${GREEN}โœ“ No access keys needed! OIDC provides secure, temporary credentials.${NC}" +echo "" +echo -e "${BLUE}Next steps:${NC}" +echo "1. Add the variables above to your GitLab project" +echo "2. Push this repository to GitLab:" +echo " git remote add origin https://gitlab.com/your-username/atx-container-test-runner.git" +echo " git push origin main" +echo "3. Watch the pipeline run in GitLab CI/CD โ†’ Pipelines" +echo "" +echo -e "${YELLOW}Benefits of OIDC:${NC}" +echo "โ€ข No long-lived access keys to manage" +echo "โ€ข Automatic credential rotation" +echo "โ€ข Enhanced security with temporary tokens" +echo "โ€ข Easier compliance and auditing" +echo "" + +# Cleanup +rm -f /tmp/trust-policy.json /tmp/permissions-policy.json + +echo -e "${GREEN}โœ“ Setup complete! Your GitLab CI/CD is now configured for secure OIDC authentication.${NC}"