diff --git a/.gcloudignore b/.gcloudignore new file mode 100644 index 0000000..a1b502c --- /dev/null +++ b/.gcloudignore @@ -0,0 +1,53 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +.venv/ +venv/ +.pytest_cache/ + +# Jupyter +.ipynb_checkpoints/ +jupyterbook/_build/ +jupyterbook/.jupyter_cache/ + +# Node +node_modules/ +policy-impact-dashboard/node_modules/ +policy-impact-dashboard/build/ + +# Data files +data/*.csv +*.h5 +*.hdf5 + +# Git +.git/ +.gitignore + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Documentation builds +docs/ +_build/ +site/ + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db + +# Only include what we need +!src/ +!batch/ diff --git a/.gitignore b/.gitignore index 23d0bc3..54a0471 100644 --- a/.gitignore +++ b/.gitignore @@ -1,28 +1,19 @@ -# Jupyter Book build outputs -jupyterbook/_build/ -_build/ - -# Jupyter Notebook checkpoints -.ipynb_checkpoints/ - -# Python cache __pycache__/ -*.pyc -*.pyo - -# Environment files +_build/ +.DS_Store .env +.idea/ +.ipynb_checkpoints/ +.pytest_cache/ .venv/ - -# IDE files .vscode/ -.idea/ +*.log +*.pyc +*.pyo +*.tempresults/ +*.tmp +jupyterbook/_build/ +results/ settings.local.json - -# OS files -.DS_Store Thumbs.db - -# Temporary files -*.tmp -*.temp \ No newline at end of file +venv/ diff --git a/CLOUD_BATCH_GUIDE.md b/CLOUD_BATCH_GUIDE.md new file mode 100644 index 0000000..8be69ff --- /dev/null +++ b/CLOUD_BATCH_GUIDE.md @@ -0,0 +1,112 @@ +# Google Cloud Batch Guide + +## How It Works + +**Architecture:** +- Runs 75 parallel tasks (one per year, 2026-2100) on Google Cloud VMs +- Each VM executes `batch/compute_year.py` with PolicyEngine microsimulation +- Results saved to Cloud Storage as individual CSVs +- Combined into final results using `combine_results.sh` + +**Automatic VM Sizing:** +- **Years 2026-2027:** `e2-highmem-8` (64GB RAM) - larger datasets require more memory +- **Years 2028-2100:** `e2-highmem-4` (32GB RAM) - standard configuration +- System automatically splits into 2 separate jobs when 2026-2027 are included +- Saves ~97% of extra costs by using expensive VMs only for 2/75 years + +**Cost:** ~$2-3 per job (~$0.03 per year analyzed) + +## Complete Workflow + +### 1. Submit Job (Automatic Splitting) + +```bash +# Submit for all 75 years (2026-2100) +PYTHONPATH=src python3 batch/submit_years.py \ + --years $(seq -s, 2026 2100) \ + --reforms option5 \ + --scoring static + +# System automatically creates 2 jobs: +# - Job 1: Years 2026-2027 with 64GB VMs +# - Job 2: Years 2028-2100 with 32GB VMs +# +# Output shows both job IDs and monitoring commands +``` + +### 2. Monitor Progress + +```bash +# Use commands from submit output, e.g.: +./monitor_job.sh years-20251101-123456-abc123 option5 static & +./monitor_job.sh years-20251101-123457-def456 option5 static & + +# Or check status directly: +gcloud batch jobs describe JOB_ID --location=us-central1 +``` + +### 3. Combine Results + +```bash +# After jobs complete, merge all CSVs into 2 final files +./combine_results.sh option5 JOB_ID_1 JOB_ID_2 + +# Output: option5_static_results.csv (all 75 years, sorted) +``` + +### 4. Repeat for Dynamic Scoring + +```bash +# Same workflow with --scoring dynamic +PYTHONPATH=src python3 batch/submit_years.py \ + --years $(seq -s, 2026 2100) \ + --reforms option5 \ + --scoring dynamic + +# Monitor and combine same way +./combine_results.sh option5 JOB_ID_3 JOB_ID_4 +# Output: option5_dynamic_results.csv +``` + +## Key Files + +| File | Purpose | +|------|---------| +| `batch/submit_years.py` | Submits jobs with automatic VM sizing | +| `batch/compute_year.py` | Runs PolicyEngine simulation on each VM | +| `src/reforms.py` | Defines reform parameters | +| `combine_results.sh` | Merges individual CSVs into final output | +| `monitor_job.sh` | Tracks job progress | + +## Storage Locations + +- **Cloud Storage:** `gs://crfb-ss-analysis-results/results//` +- **Local Results:** `{reform}_{scoring}_results.csv` + +## Common Commands + +```bash +# Check job status +gcloud batch jobs describe JOB_ID --location=us-central1 + +# List running jobs +gcloud batch jobs list --location=us-central1 --filter="state:RUNNING" + +# Delete completed job (results already saved) +gcloud batch jobs delete JOB_ID --location=us-central1 --quiet + +# Download CSVs manually +gsutil -m cp "gs://crfb-ss-analysis-results/results/JOB_ID/*.csv" ./temp/ +``` + +## Troubleshooting + +**Job shows FAILED but has results:** +- Check actual file count: `gsutil ls gs://.../results/JOB_ID/ | wc -l` +- Cloud Batch marks job failed if ANY task fails (even 2/75) +- Process results if 73+ years completed + +**Tasks stuck in PENDING:** +- Check quota: `gcloud compute project-info describe | grep -A2 "CPUS"` +- Each job uses 300 CPUs (can run ~10 jobs simultaneously with 3,000 limit) +- Delete completed jobs to free resources diff --git a/all_reforms_dynamic_2026_2027.csv b/all_reforms_dynamic_2026_2027.csv new file mode 100644 index 0000000..8eb988c --- /dev/null +++ b/all_reforms_dynamic_2026_2027.csv @@ -0,0 +1,17 @@ +reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type +option1,2026,2178.0,2088.47,-89.53,dynamic +option1,2027,2293.8,2197.06,-96.74,dynamic +option2,2026,2178.0,2204.2,26.2,dynamic +option2,2027,2293.8,2320.62,26.82,dynamic +option3,2026,2178.0,2204.2,26.2,dynamic +option3,2027,2293.8,2320.62,26.82,dynamic +option4,2026,2178.0,2211.29,33.29,dynamic +option4,2027,2293.8,2327.75,33.95,dynamic +option5,2026,2178.0,2221.31,43.31,dynamic +option5,2027,2293.8,2338.62,44.82,dynamic +option6,2026,2178.0,2195.1,17.1,dynamic +option6,2027,2293.8,2329.7,35.9,dynamic +option7,2026,2178.0,2200.97,22.96,dynamic +option7,2027,2293.8,2317.38,23.58,dynamic +option8,2026,2178.0,2232.59,54.59,dynamic +option8,2027,2293.8,2350.33,56.53,dynamic diff --git a/all_reforms_static_2026_2027.csv b/all_reforms_static_2026_2027.csv new file mode 100644 index 0000000..9c2f822 --- /dev/null +++ b/all_reforms_static_2026_2027.csv @@ -0,0 +1,17 @@ +reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type +option1,2026,2178.0,2087.55,-90.45,static +option1,2027,2293.8,2196.31,-97.49,static +option2,2026,2178.0,2203.72,25.72,static +option2,2027,2293.8,2320.23,26.43,static +option3,2026,2178.0,2203.72,25.72,static +option3,2027,2293.8,2320.23,26.43,static +option4,2026,2178.0,2210.79,32.79,static +option4,2027,2293.8,2327.46,33.66,static +option5,2026,2178.0,2232.03,54.03,static +option5,2027,2293.8,2349.14,55.34,static +option6,2026,2178.0,2196.68,18.68,static +option6,2027,2293.8,2333.75,39.95,static +option7,2026,2178.0,2201.06,23.06,static +option7,2027,2293.8,2317.57,23.77,static +option8,2026,2178.0,2232.15,54.14,static +option8,2027,2293.8,2350.51,56.71,static diff --git a/batch/Dockerfile b/batch/Dockerfile new file mode 100644 index 0000000..64e9dc6 --- /dev/null +++ b/batch/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.13-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy requirements and install Python dependencies +COPY batch/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy source code (will be available when we build from parent directory) +COPY src/ /app/src/ + +# Copy batch worker scripts +COPY batch/compute_baseline.py /app/ +COPY batch/compute_reform.py /app/ +COPY batch/compute_year.py /app/batch/ + +# Make scripts executable +RUN chmod +x /app/compute_baseline.py /app/compute_reform.py /app/batch/compute_year.py + +# Set PYTHONPATH for imports +ENV PYTHONPATH=/app/src + +# NOTE: Dataset pre-caching was causing OOM issues in cloud (28GB+ RAM usage) +# even though local execution only uses 0.9GB. Letting datasets download at runtime instead. +# Each dataset download adds ~30-60 seconds but avoids the memory issue. + +# Set Python to run in unbuffered mode (see output in real-time) +ENV PYTHONUNBUFFERED=1 + +# Default command (will be overridden by Cloud Batch) +CMD ["python", "--version"] diff --git a/batch/README.md b/batch/README.md new file mode 100644 index 0000000..2ef9ac5 --- /dev/null +++ b/batch/README.md @@ -0,0 +1,230 @@ +# Google Cloud Batch - Social Security Policy Impact Analysis + +This directory contains scripts to run PolicyEngine simulations at scale using Google Cloud Batch. + +## Overview + +**Problem:** Running 1,200 simulations (8 reforms × 75 years × 2 scoring types) sequentially takes ~200 hours. + +**Solution:** Use Google Cloud Batch to run simulations in parallel, completing in ~2-3 hours. + +## Architecture + +### Two-Phase Approach + +**Phase 1: Compute Baselines (75 tasks, ~20 mins)** +- Calculate baseline income tax for each year (2026-2100) +- Each year uses PolicyEngine's year-specific dataset +- Results saved to Cloud Storage for reuse + +**Phase 2: Compute Reforms (1,200 tasks, ~2-4 hrs)** +- Calculate reform impacts for all combinations: + - 8 reforms × 75 years × 2 scoring types = 1,200 calculations +- Each task downloads its year's baseline from Phase 1 +- Supports both static and dynamic scoring +- Results saved to Cloud Storage + +**Phase 3: Download & Combine (2 mins)** +- Download all 1,200 result files +- Combine into CSV files for analysis + +## Files + +``` +batch/ +├── README.md # This file +├── requirements.txt # Python dependencies +├── Dockerfile # Container definition +├── compute_baseline.py # Phase 1 worker script +├── compute_reform.py # Phase 2 worker script +├── submit_baselines.py # Phase 1 job submission +├── submit_reforms.py # Phase 2 job submission +└── download_results.py # Results aggregation +``` + +## Setup (One-Time) + +### 1. Build and Push Docker Container (~10-15 mins) + +```bash +cd /Users/pavelmakarchuk/crfb-tob-impacts +gcloud config set project policyengine-api +gcloud builds submit --tag gcr.io/policyengine-api/ss-calculator:latest batch/ +``` + +This builds the container with Python 3.13, PolicyEngine, and your reform definitions. + +### 2. Verify Cloud Storage Bucket + +```bash +gsutil ls gs://crfb-ss-analysis-results/ +``` + +Should show the bucket was created. If not: +```bash +gsutil mb -l us-central1 gs://crfb-ss-analysis-results/ +``` + +## Running Jobs + +### Test Run (Recommended First!) + +Test with 2 years, 2 reforms, both scoring types = 8 simulations: + +```bash +cd batch + +# Phase 1: Compute baselines for 2 years (~1-2 mins) +python submit_baselines.py --years 2026,2027 + +# Wait for completion, then Phase 2 +python submit_reforms.py --reforms option1,option2 --years 2026,2027 + +# Download results +python download_results.py --job-id reforms-YYYYMMDD-HHMMSS-xxxxxx +``` + +**Verify:** +- Results match your notebook values for 2026-2027 +- Both static and dynamic results present +- No errors in Cloud Batch logs + +### Full Run (All 1,200 Simulations) + +```bash +cd batch + +# Phase 1: Compute all 75 baselines (~20 mins) +python submit_baselines.py --years 2026-2100 +# Output: Job ID for monitoring + +# Monitor Phase 1 +gcloud batch jobs describe baselines-YYYYMMDD-HHMMSS-xxxxxx --location=us-central1 + +# When Phase 1 completes, run Phase 2 (~2-4 hrs with 200 workers) +python submit_reforms.py --years 2026-2100 +# Output: Job ID for monitoring + +# Monitor Phase 2 +gcloud batch jobs describe reforms-YYYYMMDD-HHMMSS-xxxxxx --location=us-central1 + +# When complete, download results +python download_results.py --job-id reforms-YYYYMMDD-HHMMSS-xxxxxx +``` + +**Output files:** +- `../data/policy_impacts_static.csv` - All static scoring results +- `../data/policy_impacts_dynamic.csv` - All dynamic scoring results +- `../data/policy_impacts_all.csv` - Combined results + +## Command Options + +### submit_baselines.py + +```bash +python submit_baselines.py \ + --years 2026-2100 # Years to compute (range or comma-separated) + --project policyengine-api # Google Cloud project + --region us-central1 # Google Cloud region + --bucket crfb-ss-analysis-results # Cloud Storage bucket +``` + +### submit_reforms.py + +```bash +python submit_reforms.py \ + --reforms all # Reforms: "all", "option1,option2", etc. + --years 2026-2100 # Years to compute + --scoring all # Scoring: "all", "static", or "dynamic" + --workers 200 # Number of parallel workers (default: 200) + --project policyengine-api + --region us-central1 + --bucket crfb-ss-analysis-results +``` + +**Faster completion:** Use `--workers 400` or `--workers 600` (may require quota increase) + +### download_results.py + +```bash +python download_results.py \ + --job-id reforms-YYYYMMDD-HHMMSS-xxxxxx # From submit_reforms.py output + --bucket crfb-ss-analysis-results + --output-dir ../data # Where to save CSV files +``` + +## Monitoring Jobs + +### Check Job Status + +```bash +# List all jobs +gcloud batch jobs list --location=us-central1 + +# Describe specific job +gcloud batch jobs describe JOB_ID --location=us-central1 + +# View logs +gcloud logging read "resource.type=batch.googleapis.com/Job AND resource.labels.job_uid=JOB_ID" --limit=50 +``` + +### Web Console + +https://console.cloud.google.com/batch?project=policyengine-api + +## Cost + +**Per full run (1,200 simulations):** +- Phase 1: 75 workers × 20 mins = ~$0.25 +- Phase 2: 200 workers × 2 hrs = ~$4.00 +- Storage: ~$0.01 +- **Total: ~$4.25** + +Using spot/preemptible instances (60-80% cheaper than regular VMs). + +## Troubleshooting + +### "No baseline found for year YYYY" +- Phase 1 didn't complete or failed +- Check Phase 1 job logs +- Re-run `submit_baselines.py` if needed + +### "Container image not found" +- Docker container wasn't built or pushed +- Re-run: `gcloud builds submit --tag gcr.io/policyengine-api/ss-calculator:latest batch/` + +### "Quota exceeded" +- Need more than default concurrent VMs +- Request quota increase: https://console.cloud.google.com/iam-admin/quotas?project=policyengine-api +- Search for "CPUs" or "Batch API" + +### Results don't match notebook +- Check that reforms in `src/reforms.py` match notebook +- Verify PolicyEngine version matches +- Test with single year first to debug + +## Customization + +### Change Reforms + +Edit `src/reforms.py` and rebuild container: +```bash +gcloud builds submit --tag gcr.io/policyengine-api/ss-calculator:latest batch/ +``` + +### Use Different Dataset + +PolicyEngine automatically uses year-specific datasets based on the `period` parameter in simulations. No changes needed. + +### Add More Workers + +Edit `submit_reforms.py` and change `default=200` to `default=400` in the `--workers` argument, or use the flag when submitting: +```bash +python submit_reforms.py --workers 400 --years 2026-2100 +``` + +## References + +- Google Cloud Batch docs: https://cloud.google.com/batch/docs +- PolicyEngine US: https://github.com/PolicyEngine/policyengine-us +- Project notebook: `jupyterbook/policy-impacts-dynamic.ipynb` diff --git a/batch/cloudbuild.yaml b/batch/cloudbuild.yaml new file mode 100644 index 0000000..3ea1cbd --- /dev/null +++ b/batch/cloudbuild.yaml @@ -0,0 +1,5 @@ +steps: +- name: 'gcr.io/cloud-builders/docker' + args: ['build', '-f', 'batch/Dockerfile', '-t', 'gcr.io/policyengine-api/ss-calculator:latest', '.'] +images: +- 'gcr.io/policyengine-api/ss-calculator:latest' diff --git a/batch/compute_year.py b/batch/compute_year.py new file mode 100644 index 0000000..a1d010f --- /dev/null +++ b/batch/compute_year.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +""" +Year-based Worker: Compute all reforms for a single year in one task. + +This is the CORRECT architecture: +- Download dataset ONCE per year +- Calculate baseline ONCE per year +- Run ALL reforms for that year +- Parallelize by YEAR, not by reform + +Usage: + python compute_year.py YEAR SCORING_TYPE BUCKET_NAME JOB_ID [REFORMS...] + +Arguments: + YEAR: Year to compute (e.g., 2026) + SCORING_TYPE: 'static' or 'dynamic' + BUCKET_NAME: Cloud Storage bucket name + JOB_ID: Unique job identifier + REFORMS: Space-separated list of reform IDs (e.g., 'option1 option2 option3 option4') +""" + +import sys +import time +import gc +import warnings +warnings.filterwarnings('ignore') + +# Add src to path for imports +sys.path.insert(0, '/app/src') + +from policyengine_us import Microsimulation +from policyengine_core.reforms import Reform +from google.cloud import storage +import pandas as pd + +# Import reform functions (for static scoring - return Reform classes) +from reforms import ( + get_option1_reform, + get_option2_reform, + get_option3_reform, + get_option4_reform, + get_option5_reform, + get_option6_reform, + get_option7_reform, + get_option8_reform +) + +# Import dict-returning functions (for dynamic scoring) from reforms.py +from reforms import ( + get_option1_dict, + get_option2_dict, + get_option3_dict, + get_option4_dict, + get_option5_dict, + get_option6_dict, + get_option7_dict, + get_option8_dict, + # Complete dynamic dicts with CBO elasticities pre-merged + get_option1_dynamic_dict, + get_option2_dynamic_dict, + get_option3_dynamic_dict, + get_option4_dynamic_dict, + get_option5_dynamic_dict, + get_option6_dynamic_dict, + get_option7_dynamic_dict, + get_option8_dynamic_dict, +) + +# Reform functions for static scoring (return Reform classes) +REFORM_FUNCTIONS = { + 'option1': get_option1_reform, + 'option2': get_option2_reform, + 'option3': get_option3_reform, + 'option4': get_option4_reform, + 'option5': get_option5_reform, + 'option6': get_option6_reform, + 'option7': get_option7_reform, + 'option8': get_option8_reform, +} + +# Dict-returning functions for dynamic scoring with CBO elasticities +REFORM_DYNAMIC_DICT_FUNCTIONS = { + 'option1': get_option1_dynamic_dict, + 'option2': get_option2_dynamic_dict, + 'option3': get_option3_dynamic_dict, + 'option4': get_option4_dynamic_dict, + 'option5': get_option5_dynamic_dict, + 'option6': get_option6_dynamic_dict, + 'option7': get_option7_dynamic_dict, + 'option8': get_option8_dynamic_dict, +} + +# CBO labor supply elasticities for dynamic scoring +CBO_LABOR_PARAMS = { + "gov.simulation.labor_supply_responses.elasticities.income.all": { + "2024-01-01.2100-12-31": -0.05 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.1": { + "2024-01-01.2100-12-31": 0.31 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.2": { + "2024-01-01.2100-12-31": 0.28 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.3": { + "2024-01-01.2100-12-31": 0.27 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.4": { + "2024-01-01.2100-12-31": 0.27 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.5": { + "2024-01-01.2100-12-31": 0.25 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.6": { + "2024-01-01.2100-12-31": 0.25 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.7": { + "2024-01-01.2100-12-31": 0.22 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.8": { + "2024-01-01.2100-12-31": 0.19 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.9": { + "2024-01-01.2100-12-31": 0.15 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.10": { + "2024-01-01.2100-12-31": 0.10 + } +} + + +def get_reform_dict(reform_func): + """Extract reform dictionary from a reform function.""" + reform_obj = reform_func() + if isinstance(reform_obj, dict): + return reform_obj + elif isinstance(reform_obj, type) and issubclass(reform_obj, Reform): + # Reform class - need to get parameter_values + return reform_obj.parameter_values + else: + raise ValueError(f"Unexpected reform type: {type(reform_obj)}") + + +def main(): + print("\n" + "="*80) + print("DIAGNOSTIC LOGGING: Script started") + print(f"System time: {time.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Python version: {sys.version}") + print("="*80 + "\n") + + if len(sys.argv) < 5: + print("Usage: python compute_year.py YEAR SCORING_TYPE BUCKET_NAME JOB_ID [REFORMS...]") + sys.exit(1) + + year = int(sys.argv[1]) + scoring_type = sys.argv[2] + bucket_name = sys.argv[3] + job_id = sys.argv[4] + reform_ids = sys.argv[5:] if len(sys.argv) > 5 else list(REFORM_FUNCTIONS.keys()) + + print(f"\n{'='*80}") + print(f"YEAR-BASED WORKER: {year} ({scoring_type.upper()} scoring)") + print(f"{'='*80}") + print(f"Reforms to compute: {', '.join(reform_ids)}") + print(f"Total reforms: {len(reform_ids)}") + print(f"Job ID: {job_id}") + print(f"Bucket: {bucket_name}") + print() + + # Step 1: Download dataset ONCE + print(f"[1/{3+len(reform_ids)}] Downloading dataset for {year}...") + print(f" DIAGNOSTIC: About to create dataset reference...") + dataset_start = time.time() + dataset_name = f"hf://policyengine/test/{year}.h5" + print(f" Dataset: {dataset_name}") + dataset_time = time.time() - dataset_start + print(f" ✓ Dataset reference prepared ({dataset_time:.1f}s)") + print(f" DIAGNOSTIC: Dataset reference created successfully") + print() + + # Step 2: Calculate baseline ONCE (with detailed timing) + print(f"[2/{3+len(reform_ids)}] Creating baseline simulation for {year}...") + print(f" DIAGNOSTIC: About to create Microsimulation object...") + baseline_start = time.time() + try: + create_start = time.time() + print(f" DIAGNOSTIC: Calling Microsimulation(dataset='{dataset_name}')...") + baseline_sim = Microsimulation(dataset=dataset_name) + create_time = time.time() - create_start + print(f" - Microsimulation created: {create_time:.1f}s") + print(f" DIAGNOSTIC: Microsimulation object created successfully") + + calc_start = time.time() + print(f" DIAGNOSTIC: About to calculate income_tax...") + baseline_income_tax = baseline_sim.calculate("income_tax", map_to="household", period=year) + calc_time = time.time() - calc_start + print(f" - Income tax calculated: {calc_time:.1f}s") + print(f" DIAGNOSTIC: Income tax calculation complete") + + print(f" DIAGNOSTIC: About to sum income tax...") + baseline_revenue = float(baseline_income_tax.sum()) + baseline_time = time.time() - baseline_start + print(f" ✓ Baseline calculated: ${baseline_revenue/1e9:.2f}B (total: {baseline_time:.1f}s)") + + # Clean up baseline objects immediately after extracting the value + print(f" DIAGNOSTIC: Cleaning up baseline objects...") + del baseline_sim + del baseline_income_tax + gc.collect() + print(f" ✓ Baseline objects cleaned up") + except Exception as e: + print(f" ✗ BASELINE CALCULATION FAILED: {e}") + import traceback + print(" DIAGNOSTIC: Full traceback:") + traceback.print_exc() + sys.exit(1) + print() + + # Step 3: Run ALL reforms for this year (save each result incrementally) + results = [] + client = storage.Client() + bucket = client.bucket(bucket_name) + + # If only one reform, include reform name in filename (for parallelized jobs) + # If multiple reforms, use year-based filename (for year-based jobs) + if len(reform_ids) == 1: + csv_path = f"results/{job_id}/{year}_{reform_ids[0]}_{scoring_type}_results.csv" + else: + csv_path = f"results/{job_id}/{year}_{scoring_type}_results.csv" + + for i, reform_id in enumerate(reform_ids, start=1): + print(f"\n[{2+i}/{3+len(reform_ids)}] Computing {reform_id} for {year}...") + print(f" DIAGNOSTIC: Starting reform {reform_id} at {time.strftime('%H:%M:%S')}") + reform_start = time.time() + + try: + # Get reform function + print(f" DIAGNOSTIC: Looking up reform function for '{reform_id}'...") + reform_func = REFORM_FUNCTIONS.get(reform_id) + if not reform_func: + print(f" ✗ Unknown reform: {reform_id}") + continue + + # Create reform based on scoring type + if scoring_type == 'static': + print(f" DIAGNOSTIC: Creating static reform...") + reform = reform_func() + print(f" ✓ Static reform created") + elif scoring_type == 'dynamic': + print(f" DIAGNOSTIC: Starting dynamic reform creation...") + # Get the complete dynamic dict function (with CBO elasticities pre-merged) + print(f" DIAGNOSTIC: Looking up dynamic dict function for '{reform_id}'...") + dynamic_dict_func = REFORM_DYNAMIC_DICT_FUNCTIONS.get(reform_id) + if not dynamic_dict_func: + print(f" ✗ No dynamic dict function for {reform_id}") + continue + + print(f" DIAGNOSTIC: Found dynamic dict function: {dynamic_dict_func.__name__}") + # Get the complete parameter dictionary + print(f" DIAGNOSTIC: Calling {dynamic_dict_func.__name__}()...") + reform_params = dynamic_dict_func() + print(f" DIAGNOSTIC: Got reform parameters dictionary with {len(reform_params)} keys") + + # Create single reform from complete parameters + print(f" DIAGNOSTIC: Creating Reform.from_dict() with {len(reform_params)} parameters...") + reform = Reform.from_dict(reform_params, country_id="us") + print(f" ✓ Dynamic reform with CBO elasticities (pre-merged)") + print(f" DIAGNOSTIC: Reform object created successfully") + else: + print(f" ✗ Invalid scoring type: {scoring_type}") + continue + + # Run simulation with detailed timing + print(f" Running PolicyEngine simulation...") + print(f" DIAGNOSTIC: About to create reform Microsimulation...") + sim_start = time.time() + + create_start = time.time() + print(f" DIAGNOSTIC: Calling Microsimulation(reform=, dataset='{dataset_name}')...") + reform_sim = Microsimulation(reform=reform, dataset=dataset_name) + create_time = time.time() - create_start + print(f" - Microsimulation object created: {create_time:.1f}s") + print(f" DIAGNOSTIC: Reform Microsimulation created successfully") + + calc_start = time.time() + print(f" DIAGNOSTIC: About to calculate reform income_tax...") + reform_income_tax = reform_sim.calculate("income_tax", map_to="household", period=year) + calc_time = time.time() - calc_start + print(f" - Income tax calculated: {calc_time:.1f}s") + print(f" DIAGNOSTIC: Reform income_tax calculated successfully") + + reform_revenue = float(reform_income_tax.sum()) + sim_time = time.time() - sim_start + + # Calculate impact + impact = reform_revenue - baseline_revenue + + reform_time = time.time() - reform_start + print(f" ✓ Reform revenue: ${reform_revenue/1e9:.2f}B") + print(f" ✓ Impact: ${impact/1e9:+.2f}B ({reform_time:.1f}s total, {sim_time:.1f}s simulation)") + + # Store result (include baseline for reference) + result = { + 'reform_name': reform_id, + 'year': year, + 'baseline_revenue': baseline_revenue, + 'reform_revenue': reform_revenue, + 'revenue_impact': impact, + 'scoring_type': scoring_type + } + results.append(result) + + # CRITICAL: Clean up reform objects immediately to prevent memory accumulation + del reform_sim + del reform_income_tax + del reform + gc.collect() + print(f" ✓ Memory cleaned up") + + # Save incrementally to Cloud Storage (only for multi-reform jobs) + if len(reform_ids) > 1: + try: + df = pd.DataFrame(results) + blob = bucket.blob(csv_path) + blob.upload_from_string(df.to_csv(index=False), content_type='text/csv') + print(f" ✓ Incremental save to gs://{bucket_name}/{csv_path} ({len(results)}/{len(reform_ids)} reforms)") + except Exception as save_error: + print(f" ⚠ Warning: Failed to save intermediate results: {save_error}") + # Don't fail the whole job if intermediate save fails + pass + else: + print(f" (Skipping incremental save - single reform job)") + + except Exception as e: + print(f" ✗ Reform calculation failed: {e}") + import traceback + traceback.print_exc() + + print() + + # Step 4: Final verification + print(f"[{3+len(reform_ids)}/{3+len(reform_ids)}] Verifying final results...") + + if not results: + print(" ✗ No results computed!") + sys.exit(1) + + # Final save to Cloud Storage (already saved incrementally, but do one final write) + try: + df = pd.DataFrame(results) + blob = bucket.blob(csv_path) + blob.upload_from_string(df.to_csv(index=False), content_type='text/csv') + print(f" ✓ Final results saved to gs://{bucket_name}/{csv_path}") + print(f" ✓ Total reforms: {len(results)}") + + except Exception as e: + print(f" ✗ Failed to save final results: {e}") + import traceback + traceback.print_exc() + sys.exit(1) # Exit with error so Cloud Batch marks task as FAILED + + print() + print(f"{'='*80}") + print(f"✓ YEAR {year} COMPLETE") + print(f"{'='*80}") + print(f"Total reforms computed: {len(results)}") + print(f"{'='*80}") + + +if __name__ == "__main__": + main() diff --git a/batch/monitor_job.sh b/batch/monitor_job.sh new file mode 100755 index 0000000..b0ddfc9 --- /dev/null +++ b/batch/monitor_job.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Usage: ./monitor_job.sh JOB_ID RESULTS_DIR +# Example: ./monitor_job.sh years-20251031-110502-jw6uio results/75years_test + +if [ "$#" -lt 2 ]; then + echo "Usage: $0 JOB_ID RESULTS_DIR" + echo "Example: $0 years-20251031-110502-jw6uio results/75years_test" + exit 1 +fi + +JOB_ID="$1" +RESULTS_DIR="$2" + +mkdir -p "$RESULTS_DIR" + +echo "Monitoring job: $JOB_ID" +echo "Results will be downloaded to: $RESULTS_DIR" +echo "Only all_results.csv will be kept locally (individual CSVs auto-deleted)" +echo "" + +for i in {1..30}; do + echo "=== CHECK #$i - $(date '+%H:%M:%S') ===" + + # Get job status + STATE=$(gcloud batch jobs describe $JOB_ID --location=us-central1 --format="value(status.state)" 2>/dev/null) + DURATION=$(gcloud batch jobs describe $JOB_ID --location=us-central1 --format="value(status.runDuration)" 2>/dev/null | sed 's/s$//') + + if [ -n "$DURATION" ]; then + MINUTES=$((DURATION / 60)) + SECS=$((DURATION % 60)) + echo "Job State: $STATE (${MINUTES}m ${SECS}s)" + else + echo "Job State: $STATE" + fi + + # Get task counts + RUNNING=$(gcloud batch tasks list --location=us-central1 --job=$JOB_ID --format="value(status.state)" 2>/dev/null | grep -c "RUNNING" || echo 0) + SUCCEEDED=$(gcloud batch tasks list --location=us-central1 --job=$JOB_ID --format="value(status.state)" 2>/dev/null | grep -c "SUCCEEDED" || echo 0) + FAILED=$(gcloud batch tasks list --location=us-central1 --job=$JOB_ID --format="value(status.state)" 2>/dev/null | grep -c "FAILED" || echo 0) + + echo "Tasks - Running: $RUNNING, Succeeded: $SUCCEEDED, Failed: $FAILED" + + # Download new results to temp directory + echo "Downloading results..." + TEMP_DIR="$RESULTS_DIR/.temp" + mkdir -p "$TEMP_DIR" + gsutil -m cp -n "gs://crfb-ss-analysis-results/results/${JOB_ID}/*.csv" "$TEMP_DIR/" 2>/dev/null + + # Count results + RESULT_COUNT=$(ls "$TEMP_DIR"/*.csv 2>/dev/null | wc -l | tr -d ' ') + echo "Results downloaded: $RESULT_COUNT files" + + # Merge all CSVs into one and format to billions + if [ "$RESULT_COUNT" -gt 0 ]; then + # Create temporary merged file with raw data + echo "reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type" > "$TEMP_DIR/raw_merged.csv" + tail -n +2 -q "$TEMP_DIR"/*_static_results.csv "$TEMP_DIR"/*_dynamic_results.csv 2>/dev/null >> "$TEMP_DIR/raw_merged.csv" + + # Format to billions using Python + python3 << PYEOF +import pandas as pd +import sys +try: + df = pd.read_csv('$TEMP_DIR/raw_merged.csv') + if len(df) > 0: + df['baseline_revenue'] = (df['baseline_revenue'] / 1e9).round(2) + df['reform_revenue'] = (df['reform_revenue'] / 1e9).round(2) + df['revenue_impact'] = (df['revenue_impact'] / 1e9).round(2) + df = df.sort_values(['year', 'reform_name', 'scoring_type']) + df.to_csv('$RESULTS_DIR/all_results.csv', index=False) + print(f"Formatted {len(df)} rows") + else: + print("No data to format") +except Exception as e: + print(f"Error formatting: {e}", file=sys.stderr) +PYEOF + + TOTAL_ROWS=$(tail -n +2 "$RESULTS_DIR/all_results.csv" 2>/dev/null | wc -l | tr -d ' ') + echo "Total results in all_results.csv: $TOTAL_ROWS rows" + + # Clean up temp directory + rm -rf "$TEMP_DIR" + fi + + echo "" + + # Check if done + if [ "$STATE" = "SUCCEEDED" ] || [ "$STATE" = "FAILED" ]; then + echo "✓ Job finished with state: $STATE" + break + fi + + # Wait 2 minutes + sleep 120 +done + +echo "" +echo "Final results saved to: $RESULTS_DIR/all_results.csv" +echo "Revenue values are in billions (rounded to 2 decimals)" diff --git a/batch/requirements.txt b/batch/requirements.txt new file mode 100644 index 0000000..0799a5e --- /dev/null +++ b/batch/requirements.txt @@ -0,0 +1,6 @@ +policyengine-us>=1.0.0 +policyengine-core>=3.0.0 +pandas>=2.0.0 +numpy>=1.24.0 +google-cloud-storage>=2.10.0 +tqdm>=4.65.0 diff --git a/batch/submit_years.py b/batch/submit_years.py new file mode 100644 index 0000000..4307bd3 --- /dev/null +++ b/batch/submit_years.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +""" +Submit year-based parallel jobs to Google Cloud Batch. + +This uses the CORRECT architecture: +- Parallelize by YEAR, not by reform-year combination +- Each year-worker downloads dataset once, calculates baseline once, runs all reforms +- Much more efficient and faster! + +Usage: + python submit_years.py --years 2026,2027 --scoring static --reforms option1,option2,option3,option4 +""" + +import argparse +import datetime +import os +import random +import string +from google.cloud import batch_v1 + +def generate_job_id(prefix="years"): + """Generate unique job ID with timestamp and random suffix.""" + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + random_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=6)) + return f"{prefix}-{timestamp}-{random_suffix}" + +def submit_single_job(years, reforms, scoring_type, bucket_name, machine_type, memory_mib, cpu_milli, memory_label, job_id=None): + """Submit a single Cloud Batch job with specified VM configuration.""" + + if job_id is None: + job_id = generate_job_id() + + project_id = "policyengine-api" + region = "us-central1" + + # Job configuration + num_tasks = len(years) + + print("="*80) + print("SUBMITTING YEAR-BASED JOB") + print("="*80) + print(f"Job ID: {job_id}") + print(f"Years: {len(years)} ({min(years)}-{max(years) if len(years) > 1 else min(years)})") + print(f"Machine: {machine_type} ({memory_label} RAM)") + print(f"Reforms per year: {len(reforms)} ({', '.join(reforms)})") + print(f"Scoring: {scoring_type}") + print(f"Total tasks: {num_tasks} (one per year)") + print(f"Total reforms to compute: {num_tasks * len(reforms)}") + print(f"Bucket: gs://{bucket_name}/") + print(f"Container: gcr.io/policyengine-api/ss-calculator:latest") + print("="*80) + print() + + # Create batch client + client = batch_v1.BatchServiceClient() + + # Define the task: run compute_year.py for each year + task_spec = batch_v1.TaskSpec() + + # Build command that maps BATCH_TASK_INDEX to year + years_array = ' '.join(map(str, years)) + reforms_args = ' '.join(reforms) + + script = f""" + set -e # Exit immediately if any command fails + + YEARS=({years_array}) + YEAR=${{YEARS[$BATCH_TASK_INDEX]}} + echo "Task $BATCH_TASK_INDEX processing year $YEAR with {len(reforms)} reforms" + echo "=== Starting computation at $(date) ===" + + python /app/batch/compute_year.py $YEAR {scoring_type} {bucket_name} {job_id} {reforms_args} + + # Only reach here if python succeeded + echo "=== Finished at $(date) ===" + """ + + runnable = batch_v1.Runnable() + runnable.container = batch_v1.Runnable.Container() + runnable.container.image_uri = "gcr.io/policyengine-api/ss-calculator:latest" + runnable.container.entrypoint = "/bin/bash" + runnable.container.commands = ["-c", script] + + task_spec.runnables = [runnable] + task_spec.max_retry_count = 1 # Allow one retry per task + task_spec.max_run_duration = "3600s" # 1 hour timeout per year + + # Resource allocation - adaptive based on years + resources = batch_v1.ComputeResource() + resources.cpu_milli = cpu_milli + resources.memory_mib = memory_mib + task_spec.compute_resource = resources + + # Create task group + task_group = batch_v1.TaskGroup() + task_group.task_count = num_tasks + task_group.parallelism = num_tasks # Run all years in parallel + task_group.task_spec = task_spec + + # Configure allocation policy + allocation_policy = batch_v1.AllocationPolicy() + instance_policy = batch_v1.AllocationPolicy.InstancePolicy() + instance_policy.provisioning_model = batch_v1.AllocationPolicy.ProvisioningModel.STANDARD + instance_policy.machine_type = machine_type + + instance_policy_or_template = batch_v1.AllocationPolicy.InstancePolicyOrTemplate() + instance_policy_or_template.policy = instance_policy + allocation_policy.instances = [instance_policy_or_template] + + # Service account + service_account = batch_v1.ServiceAccount() + service_account.email = f"{project_id}@appspot.gserviceaccount.com" + allocation_policy.service_account = service_account + + # Logging policy + logs_policy = batch_v1.LogsPolicy() + logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING + + # Create job + job = batch_v1.Job() + job.task_groups = [task_group] + job.allocation_policy = allocation_policy + job.logs_policy = logs_policy + job.labels = { + "job_type": "year_based", + "scoring": scoring_type + } + + # Submit job + print("Submitting job to Cloud Batch...") + print() + + create_request = batch_v1.CreateJobRequest() + create_request.job = job + create_request.job_id = job_id + create_request.parent = f"projects/{project_id}/locations/{region}" + + response = client.create_job(create_request) + + print("="*80) + print("✓ JOB SUBMITTED SUCCESSFULLY") + print("="*80) + print(f"Job ID: {job_id}") + print(f"Status: {response.status.state.name}") + print() + print("Monitor progress:") + print(f" Command: gcloud batch jobs describe {job_id} --location={region}") + print(f" Console: https://console.cloud.google.com/batch/jobs/{job_id}?project={project_id}") + print() + print(f"Results will be saved to: gs://{bucket_name}/results/{job_id}/") + print() + print("When complete, check results:") + print(f" gsutil ls gs://{bucket_name}/results/{job_id}/") + print("="*80) + + return job_id + +def submit_job(years, reforms, scoring_type, bucket_name): + """ + Submit Cloud Batch jobs with automatic VM sizing. + + Automatically splits into two jobs if 2026/2027 are included: + - Job 1: Years 2026-2027 with e2-highmem-8 (64GB RAM) + - Job 2: Other years with e2-highmem-4 (32GB RAM) + + This saves ~97% of the extra cost vs using 64GB for all years. + """ + + # Split years by memory requirements + high_memory_years = sorted([y for y in years if y in [2026, 2027]]) + standard_memory_years = sorted([y for y in years if y not in [2026, 2027]]) + + job_ids = [] + + # Submit high-memory job if needed (2026-2027) + if high_memory_years: + print("\n" + "="*80) + print("COST OPTIMIZATION: Submitting separate job for high-memory years (2026-2027)") + print("="*80) + print(f"Years requiring 64GB RAM: {', '.join(map(str, high_memory_years))}") + print("="*80 + "\n") + + job_id = submit_single_job( + years=high_memory_years, + reforms=reforms, + scoring_type=scoring_type, + bucket_name=bucket_name, + machine_type="e2-highmem-8", # 8 vCPU, 64GB RAM + memory_mib=65536, # 64GB + cpu_milli=8000, # 8 CPUs + memory_label="64GB" + ) + job_ids.append((job_id, high_memory_years)) + print() + + # Submit standard-memory job if needed (all other years) + if standard_memory_years: + if high_memory_years: + print("\n" + "="*80) + print("COST OPTIMIZATION: Submitting separate job for standard-memory years") + print("="*80) + print(f"Years using 32GB RAM: {min(standard_memory_years)}-{max(standard_memory_years)}") + print("="*80 + "\n") + + job_id = submit_single_job( + years=standard_memory_years, + reforms=reforms, + scoring_type=scoring_type, + bucket_name=bucket_name, + machine_type="e2-highmem-4", # 4 vCPU, 32GB RAM + memory_mib=32768, # 32GB + cpu_milli=4000, # 4 CPUs + memory_label="32GB" + ) + job_ids.append((job_id, standard_memory_years)) + print() + + # Print summary if multiple jobs + if len(job_ids) > 1: + print("\n" + "="*80) + print("✓ SUBMITTED 2 JOBS (COST-OPTIMIZED)") + print("="*80) + for i, (job_id, job_years) in enumerate(job_ids, 1): + year_range = f"{min(job_years)}-{max(job_years)}" if len(job_years) > 1 else str(job_years[0]) + print(f"Job {i}: {job_id}") + print(f" Years: {year_range} ({len(job_years)} years)") + print() + print("Monitor both jobs:") + for job_id, job_years in job_ids: + print(f" ./monitor_job.sh {job_id} {reforms[0]} {scoring_type}") + print("="*80 + "\n") + + return job_ids[0][0] if len(job_ids) == 1 else [jid for jid, _ in job_ids] + +def main(): + parser = argparse.ArgumentParser(description="Submit year-based parallel jobs") + parser.add_argument("--years", required=True, help="Comma-separated years (e.g., 2026,2027)") + parser.add_argument("--reforms", required=True, help="Comma-separated reform IDs (e.g., option1,option2,option3,option4)") + parser.add_argument("--scoring", required=True, choices=["static", "dynamic"], help="Scoring type") + parser.add_argument("--bucket", default="crfb-ss-analysis-results", help="Cloud Storage bucket") + + args = parser.parse_args() + + years = [int(y.strip()) for y in args.years.split(",")] + reforms = [r.strip() for r in args.reforms.split(",")] + + submit_job(years, reforms, args.scoring, args.bucket) + +if __name__ == "__main__": + main() diff --git a/combine_results.sh b/combine_results.sh new file mode 100755 index 0000000..db632fb --- /dev/null +++ b/combine_results.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# +# Combine all year CSVs into 2 final files (static and dynamic) +# +# Usage: ./combine_results.sh +# Example: ./combine_results.sh option3 years-20251101-010116-uwtkfv years-20251101-010118-sxyrk4 + +set -e + +if [ $# -lt 2 ]; then + echo "Usage: $0 [job_id2] [job_id3] ..." + echo "" + echo "Example:" + echo " $0 option3 years-20251101-010116-uwtkfv years-20251101-010118-sxyrk4" + echo "" + echo "This will download and combine all CSVs from the specified jobs" + echo "into 2 final files: {reform}_static_results.csv and {reform}_dynamic_results.csv" + exit 1 +fi + +REFORM=$1 +shift +JOB_IDS=("$@") + +BUCKET="gs://crfb-ss-analysis-results" +TEMP_DIR="temp_results_$$" + +echo "================================================================================" +echo "COMBINING RESULTS FOR $REFORM" +echo "================================================================================" +echo "Job IDs: ${JOB_IDS[@]}" +echo "Bucket: $BUCKET" +echo "================================================================================" +echo "" + +# Create temp directory +mkdir -p "$TEMP_DIR" + +# Download all CSVs from all job IDs +echo "📥 Downloading CSVs from Cloud Storage..." +for JOB_ID in "${JOB_IDS[@]}"; do + echo " Downloading from $JOB_ID..." + gsutil -m cp -r "$BUCKET/results/$JOB_ID/*.csv" "$TEMP_DIR/" 2>/dev/null || echo " (No files found for $JOB_ID)" +done +echo "" + +# Combine static results +STATIC_FILE="${REFORM}_static_results.csv" +if ls "$TEMP_DIR"/*_${REFORM}_static_results.csv 1> /dev/null 2>&1; then + echo "📊 Combining static scoring results..." + + # Write header + echo "reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type" > "$STATIC_FILE" + + # Append all data (skip headers) + for file in "$TEMP_DIR"/*_${REFORM}_static_results.csv; do + tail -n +2 "$file" >> "$STATIC_FILE" + done + + # Sort by year + (head -n 1 "$STATIC_FILE" && tail -n +2 "$STATIC_FILE" | sort -t',' -k2 -n) > "${STATIC_FILE}.tmp" + mv "${STATIC_FILE}.tmp" "$STATIC_FILE" + + STATIC_COUNT=$(tail -n +2 "$STATIC_FILE" | wc -l | tr -d ' ') + echo " ✓ Combined $STATIC_COUNT years into $STATIC_FILE" +else + echo " ℹ️ No static results found" +fi +echo "" + +# Combine dynamic results +DYNAMIC_FILE="${REFORM}_dynamic_results.csv" +if ls "$TEMP_DIR"/*_${REFORM}_dynamic_results.csv 1> /dev/null 2>&1; then + echo "📊 Combining dynamic scoring results..." + + # Write header + echo "reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type" > "$DYNAMIC_FILE" + + # Append all data (skip headers) + for file in "$TEMP_DIR"/*_${REFORM}_dynamic_results.csv; do + tail -n +2 "$file" >> "$DYNAMIC_FILE" + done + + # Sort by year + (head -n 1 "$DYNAMIC_FILE" && tail -n +2 "$DYNAMIC_FILE" | sort -t',' -k2 -n) > "${DYNAMIC_FILE}.tmp" + mv "${DYNAMIC_FILE}.tmp" "$DYNAMIC_FILE" + + DYNAMIC_COUNT=$(tail -n +2 "$DYNAMIC_FILE" | wc -l | tr -d ' ') + echo " ✓ Combined $DYNAMIC_COUNT years into $DYNAMIC_FILE" +else + echo " ℹ️ No dynamic results found" +fi +echo "" + +# Clean up temp directory +rm -rf "$TEMP_DIR" + +echo "================================================================================" +echo "✅ RESULTS COMBINED" +echo "================================================================================" +if [ -f "$STATIC_FILE" ]; then + echo "Static: $STATIC_FILE ($STATIC_COUNT years)" +fi +if [ -f "$DYNAMIC_FILE" ]; then + echo "Dynamic: $DYNAMIC_FILE ($DYNAMIC_COUNT years)" +fi +echo "================================================================================" diff --git a/job_ids_2026_2027.txt b/job_ids_2026_2027.txt new file mode 100644 index 0000000..2a7746d --- /dev/null +++ b/job_ids_2026_2027.txt @@ -0,0 +1,26 @@ +# Job IDs for 2026-2027 runs (all 8 reforms, static and dynamic) + +## Static Jobs +option1_static: years-20251101-043318-mt66r9 +option2_static: years-20251101-043321-tbqxyt +option3_static: years-20251101-043323-83klak +option4_static: years-20251101-043325-9t8njw +option5_static: years-20251101-044502-t46hwy +option6_static: years-20251101-044505-eijff3 +option7_static: years-20251101-044507-wqwanu +option8_static: years-20251101-044509-rewqyd + +## Dynamic Jobs +option1_dynamic: years-20251101-050409-i6i0hv +option2_dynamic: years-20251101-050412-pr24xn +option3_dynamic: years-20251101-050414-4n2cm4 +option4_dynamic: years-20251101-050416-1tpt9b +option5_dynamic: years-20251101-050435-2qheno +option6_dynamic: years-20251101-050438-lvmxxv +option7_dynamic: years-20251101-050440-avlouz +option8_dynamic: years-20251101-050442-crz5za + +## Combine commands when all complete: +# Static: ./combine_results.sh all_reforms_static years-20251101-043318-mt66r9 years-20251101-043321-tbqxyt years-20251101-043323-83klak years-20251101-043325-9t8njw years-20251101-044502-t46hwy years-20251101-044505-eijff3 years-20251101-044507-wqwanu years-20251101-044509-rewqyd + +# Dynamic: ./combine_results.sh all_reforms_dynamic years-20251101-050409-i6i0hv years-20251101-050412-pr24xn years-20251101-050414-4n2cm4 years-20251101-050416-1tpt9b years-20251101-050435-2qheno years-20251101-050438-lvmxxv years-20251101-050440-avlouz years-20251101-050442-crz5za diff --git a/monitor_75years.log b/monitor_75years.log new file mode 100644 index 0000000..c5fbd2d --- /dev/null +++ b/monitor_75years.log @@ -0,0 +1,16 @@ +Monitoring 75-year test job: years-20251031-110502-jw6uio +Results will be downloaded to: results/75years_test + +=== CHECK #1 - 11:06:50 === +Job State: SCHEDULED (0m 0s) +Tasks - Running: 0 +0, Succeeded: 0 +0, Failed: 0 +0 +Downloading results... +Results downloaded: 0 files + +=== CHECK #2 - 11:08:58 === +./monitor_75years.sh: line 19: 139.671831969: syntax error: invalid arithmetic operator (error token is ".671831969") + +Final results saved to: results/75years_test/all_results.csv diff --git a/monitor_job.sh b/monitor_job.sh new file mode 100755 index 0000000..3eeeb95 --- /dev/null +++ b/monitor_job.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# General-purpose monitoring script for any Cloud Batch job +JOB_ID="$1" +REFORM="$2" +SCORING="$3" +REGION="${4:-us-central1}" + +if [ -z "$JOB_ID" ] || [ -z "$REFORM" ] || [ -z "$SCORING" ]; then + echo "Usage: $0 [region]" + echo "Example: $0 years-20251031-123456-abc123 option5 dynamic us-central1" + exit 1 +fi + +RESULTS_DIR="results/${REFORM}_75years_${SCORING}" +mkdir -p "$RESULTS_DIR" + +echo "================================================================================" +echo "MONITORING CLOUD BATCH JOB" +echo "================================================================================" +echo "Job ID: $JOB_ID" +echo "Region: $REGION" +echo "Reform: $REFORM" +echo "Scoring: $SCORING" +echo "Results: $RESULTS_DIR" +echo "================================================================================" +echo "" + +for i in {1..120}; do + echo "=== CHECK #$i - $(date '+%H:%M:%S') ===" + + STATE=$(gcloud batch jobs describe $JOB_ID --location=$REGION --format="value(status.state)" 2>/dev/null) + DURATION=$(gcloud batch jobs describe $JOB_ID --location=$REGION --format="value(status.runDuration)" 2>/dev/null | sed 's/s$//') + + if [ -n "$DURATION" ]; then + MINUTES=$(echo "$DURATION" | awk '{print int($1/60)}') + SECS=$(echo "$DURATION" | awk '{print int($1%60)}') + echo "Job State: $STATE (${MINUTES}m ${SECS}s)" + else + echo "Job State: $STATE" + fi + + TASK_STATES=$(gcloud batch tasks list --location=$REGION --job=$JOB_ID --format="value(status.state)" 2>/dev/null) + PENDING=$(echo "$TASK_STATES" | grep -c "PENDING" || echo 0) + RUNNING=$(echo "$TASK_STATES" | grep -c "RUNNING" || echo 0) + SUCCEEDED=$(echo "$TASK_STATES" | grep -c "SUCCEEDED" || echo 0) + FAILED=$(echo "$TASK_STATES" | grep -c "FAILED" || echo 0) + + echo "Tasks: RUNNING=$RUNNING, SUCCEEDED=$SUCCEEDED, FAILED=$FAILED, PENDING=$PENDING" + + TEMP_DIR="$RESULTS_DIR/.temp" + mkdir -p "$TEMP_DIR" + gsutil -m cp -n "gs://crfb-ss-analysis-results/results/${JOB_ID}/*.csv" "$TEMP_DIR/" 2>/dev/null + + RESULT_FILES=$(ls "$TEMP_DIR"/*_${REFORM}_${SCORING}_results.csv 2>/dev/null | wc -l | tr -d ' ') + + if [ "$RESULT_FILES" -gt 0 ]; then + echo "reform_name,year,baseline_revenue,reform_revenue,revenue_impact,scoring_type" > "$TEMP_DIR/merged.csv" + tail -n +2 -q "$TEMP_DIR"/*_${REFORM}_${SCORING}_results.csv 2>/dev/null | sort -t',' -k2 -n >> "$TEMP_DIR/merged.csv" + + python3 << PYEOF +import pandas as pd +try: + df = pd.read_csv('$TEMP_DIR/merged.csv') + if len(df) > 0: + df = df.sort_values('year') + # Convert to billions + df['baseline_revenue'] = (df['baseline_revenue'] / 1e9).round(2) + df['reform_revenue'] = (df['reform_revenue'] / 1e9).round(2) + df['revenue_impact'] = (df['revenue_impact'] / 1e9).round(2) + + total_impact = df['revenue_impact'].sum() + df.to_csv('$RESULTS_DIR/all_results.csv', index=False) + + print(f"Results: {len(df)} years completed") + print(f"Years: {df['year'].min()}-{df['year'].max()}") + if len(df[df['year'] <= 2035]) > 0: + print(f"10-year impact (2026-2035): \${df[df['year'] <= 2035]['revenue_impact'].sum():+.2f}B") + print(f"Total impact: \${total_impact:+.2f}B") +except Exception as e: + print(f"Results: {RESULT_FILES} files downloaded") +PYEOF + + rm -rf "$TEMP_DIR" + else + echo "Results: None yet" + fi + + if [ "$FAILED" -gt 0 ]; then + echo "⚠️ WARNING: $FAILED tasks failed" + fi + + echo "" + + if [ "$STATE" = "SUCCEEDED" ]; then + echo "✅ JOB COMPLETED! ($SUCCEEDED succeeded, $FAILED failed)" + break + fi + + if [ "$STATE" = "FAILED" ]; then + echo "❌ JOB FAILED ($SUCCEEDED succeeded, $FAILED failed)" + break + fi + + sleep 60 +done + +echo "Final results: $RESULTS_DIR/all_results.csv" diff --git a/src/reforms.py b/src/reforms.py index a9ef5d0..363d6b1 100644 --- a/src/reforms.py +++ b/src/reforms.py @@ -230,6 +230,229 @@ def enable_employer_payroll_tax(percentage=1.0): } +# CBO labor supply elasticities (for dynamic scoring) +CBO_ELASTICITIES = { + "gov.simulation.labor_supply_responses.elasticities.income.all": { + "2024-01-01.2100-12-31": -0.05 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.1": { + "2024-01-01.2100-12-31": 0.31 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.2": { + "2024-01-01.2100-12-31": 0.28 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.3": { + "2024-01-01.2100-12-31": 0.27 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.4": { + "2024-01-01.2100-12-31": 0.27 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.5": { + "2024-01-01.2100-12-31": 0.25 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.6": { + "2024-01-01.2100-12-31": 0.25 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.7": { + "2024-01-01.2100-12-31": 0.22 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.8": { + "2024-01-01.2100-12-31": 0.19 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.9": { + "2024-01-01.2100-12-31": 0.15 + }, + "gov.simulation.labor_supply_responses.elasticities.substitution.by_position_and_decile.primary.10": { + "2024-01-01.2100-12-31": 0.10 + } +} + + +# Dict-returning functions for each option (used for dynamic scoring) +# These return complete parameter dictionaries with CBO elasticities pre-merged + +def get_option1_dict(): + """Return parameter dict for Option 1 (static scoring only - no elasticities).""" + return eliminate_ss_taxation() + +def get_option2_dict(): + """Return parameter dict for Option 2 (static scoring only - no elasticities).""" + return tax_85_percent_ss() + +def get_option3_dict(): + """Return parameter dict for Option 3 (static scoring only - no elasticities).""" + result = {} + result.update(tax_85_percent_ss()) + result.update(extend_senior_deduction()) + return result + +def get_option4_dict(credit_amount=500): + """Return parameter dict for Option 4 (static scoring only - no elasticities).""" + result = {} + result.update(tax_85_percent_ss()) + result.update(add_ss_tax_credit(credit_amount)) + result.update(eliminate_senior_deduction()) + return result + +def get_option5_dict(): + """Return parameter dict for Option 5 (static scoring only - no elasticities).""" + result = {} + result.update(eliminate_ss_taxation()) + result.update(enable_employer_payroll_tax(1.0)) + return result + +def get_option6_dict(): + """Return parameter dict for Option 6 (static scoring only - no elasticities).""" + reform_dict = { + "gov.contrib.crfb.tax_employer_payroll_tax.in_effect": { + "2026-01-01.2100-12-31": True + }, + "gov.contrib.crfb.tax_employer_payroll_tax.percentage": { + "2026": 0.1307, + "2027": 0.2614, + "2028": 0.3922, + "2029": 0.5229, + "2030": 0.6536, + "2031": 0.7843, + "2032": 0.9150, + "2033-01-01.2100-12-31": 1.0 + }, + } + + # Phase down base rate parameters + base_years = [2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037] + base_values = [0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] + + for param_name in ["benefit_cap", "excess"]: + param_path = f"gov.irs.social_security.taxability.rate.base.{param_name}" + reform_dict[param_path] = {} + for year, value in zip(base_years, base_values): + reform_dict[param_path][str(year)] = value + reform_dict[param_path]["2038-01-01.2100-12-31"] = 0 + + # Phase down additional rate parameters + add_years = list(range(2029, 2045)) + add_values = [0.80, 0.75, 0.70, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, + 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] + + for param_name in ["benefit_cap", "bracket", "excess"]: + param_path = f"gov.irs.social_security.taxability.rate.additional.{param_name}" + reform_dict[param_path] = {} + for year, value in zip(add_years, add_values): + reform_dict[param_path][str(year)] = value + reform_dict[param_path]["2045-01-01.2100-12-31"] = 0 + + return reform_dict + +def get_option7_dict(): + """Return parameter dict for Option 7 (static scoring only - no elasticities).""" + return eliminate_senior_deduction() + +def get_option8_dict(): + """Return parameter dict for Option 8 (static scoring only - no elasticities).""" + return tax_100_percent_ss() + + +# Complete dynamic scoring dictionaries with CBO elasticities pre-merged +def get_option1_dynamic_dict(): + """Return complete parameter dict for Option 1 with CBO elasticities.""" + result = {} + result.update(eliminate_ss_taxation()) + result.update(CBO_ELASTICITIES) + return result + +def get_option2_dynamic_dict(): + """Return complete parameter dict for Option 2 with CBO elasticities.""" + result = {} + result.update(tax_85_percent_ss()) + result.update(CBO_ELASTICITIES) + return result + +def get_option3_dynamic_dict(): + """Return complete parameter dict for Option 3 with CBO elasticities.""" + result = {} + result.update(tax_85_percent_ss()) + result.update(extend_senior_deduction()) + result.update(CBO_ELASTICITIES) + return result + +def get_option4_dynamic_dict(credit_amount=500): + """Return complete parameter dict for Option 4 with CBO elasticities.""" + result = {} + result.update(tax_85_percent_ss()) + result.update(add_ss_tax_credit(credit_amount)) + result.update(eliminate_senior_deduction()) + result.update(CBO_ELASTICITIES) + return result + +def get_option5_dynamic_dict(): + """Return complete parameter dict for Option 5 with CBO elasticities.""" + result = {} + result.update(eliminate_ss_taxation()) + result.update(enable_employer_payroll_tax(1.0)) + result.update(CBO_ELASTICITIES) + return result + +def get_option6_dynamic_dict(): + """Return complete parameter dict for Option 6 with CBO elasticities.""" + reform_dict = { + "gov.contrib.crfb.tax_employer_payroll_tax.in_effect": { + "2026-01-01.2100-12-31": True + }, + "gov.contrib.crfb.tax_employer_payroll_tax.percentage": { + "2026": 0.1307, + "2027": 0.2614, + "2028": 0.3922, + "2029": 0.5229, + "2030": 0.6536, + "2031": 0.7843, + "2032": 0.9150, + "2033-01-01.2100-12-31": 1.0 + }, + } + + # Phase down base rate parameters + base_years = [2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037] + base_values = [0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] + + for param_name in ["benefit_cap", "excess"]: + param_path = f"gov.irs.social_security.taxability.rate.base.{param_name}" + reform_dict[param_path] = {} + for year, value in zip(base_years, base_values): + reform_dict[param_path][str(year)] = value + reform_dict[param_path]["2038-01-01.2100-12-31"] = 0 + + # Phase down additional rate parameters + add_years = list(range(2029, 2045)) + add_values = [0.80, 0.75, 0.70, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, + 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] + + for param_name in ["benefit_cap", "bracket", "excess"]: + param_path = f"gov.irs.social_security.taxability.rate.additional.{param_name}" + reform_dict[param_path] = {} + for year, value in zip(add_years, add_values): + reform_dict[param_path][str(year)] = value + reform_dict[param_path]["2045-01-01.2100-12-31"] = 0 + + # Add CBO elasticities + reform_dict.update(CBO_ELASTICITIES) + return reform_dict + +def get_option7_dynamic_dict(): + """Return complete parameter dict for Option 7 with CBO elasticities.""" + result = {} + result.update(eliminate_senior_deduction()) + result.update(CBO_ELASTICITIES) + return result + +def get_option8_dynamic_dict(): + """Return complete parameter dict for Option 8 with CBO elasticities.""" + result = {} + result.update(tax_100_percent_ss()) + result.update(CBO_ELASTICITIES) + return result + + # Policy reform functions using modular components def get_option1_reform(): @@ -238,7 +461,7 @@ def get_option1_reform(): Completely eliminates federal income taxation of Social Security benefits, returning to the pre-1984 policy where benefits were not subject to income tax. """ - return Reform.from_dict(eliminate_ss_taxation(), country_id="us") + return Reform.from_dict(get_option1_dict(), country_id="us") def get_option2_reform(): @@ -247,7 +470,7 @@ def get_option2_reform(): Taxes 85% of Social Security benefits for all recipients, regardless of income level, eliminating the current threshold system. """ - return Reform.from_dict(tax_85_percent_ss(), country_id="us") + return Reform.from_dict(get_option2_dict(), country_id="us") def get_option3_reform(): @@ -256,11 +479,7 @@ def get_option3_reform(): Combines taxation of 85% of benefits with a permanent extension of the senior deduction that would otherwise expire in 2028. """ - # Combine parametric SS reform with senior deduction extension - return Reform.from_dict({ - **tax_85_percent_ss(), - **extend_senior_deduction() - }, country_id="us") + return Reform.from_dict(get_option3_dict(), country_id="us") def get_option4_reform(credit_amount=500): @@ -272,12 +491,7 @@ def get_option4_reform(credit_amount=500): Args: credit_amount: The credit amount in dollars (default: 500) """ - # Combine parametric SS reform with credit and deduction changes - return Reform.from_dict({ - **tax_85_percent_ss(), - **add_ss_tax_credit(credit_amount), - **eliminate_senior_deduction() - }, country_id="us") + return Reform.from_dict(get_option4_dict(credit_amount), country_id="us") def get_option5_reform(): @@ -286,10 +500,7 @@ def get_option5_reform(): Eliminates Social Security benefit taxation while making employer payroll contributions taxable income. """ - return Reform.from_dict({ - **eliminate_ss_taxation(), - **enable_employer_payroll_tax(1.0) - }, country_id="us") + return Reform.from_dict(get_option5_dict(), country_id="us") def get_option6_reform(): @@ -301,49 +512,7 @@ def get_option6_reform(): Note: This reform is complex and may need further refinement for the SS taxation phase-down to work properly with PolicyEngine's parameter structure. """ - reform_dict = { - # Enable employer payroll tax inclusion - "gov.contrib.crfb.tax_employer_payroll_tax.in_effect": { - "2026-01-01.2100-12-31": True - }, - # Phase in employer payroll tax (year by year from 2026 to 2033) - "gov.contrib.crfb.tax_employer_payroll_tax.percentage": { - "2026": 0.1307, # 1/7.65 - "2027": 0.2614, # 2/7.65 - "2028": 0.3922, # 3/7.65 - "2029": 0.5229, # 4/7.65 - "2030": 0.6536, # 5/7.65 - "2031": 0.7843, # 6/7.65 - "2032": 0.9150, # 7/7.65 - "2033-01-01.2100-12-31": 1.0 # Full amount from 2033 onward - }, - } - - # For the SS taxation phase-down, we need to set each leaf parameter - # Phase down base rate parameters (benefit_cap and excess) - base_years = [2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037] - base_values = [0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] - - for param_name in ["benefit_cap", "excess"]: - param_path = f"gov.irs.social_security.taxability.rate.base.{param_name}" - reform_dict[param_path] = {} - for year, value in zip(base_years, base_values): - reform_dict[param_path][str(year)] = value - reform_dict[param_path]["2038-01-01.2100-12-31"] = 0 - - # Phase down additional rate parameters (benefit_cap, bracket, excess) - add_years = list(range(2029, 2045)) - add_values = [0.80, 0.75, 0.70, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, - 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05] - - for param_name in ["benefit_cap", "bracket", "excess"]: - param_path = f"gov.irs.social_security.taxability.rate.additional.{param_name}" - reform_dict[param_path] = {} - for year, value in zip(add_years, add_values): - reform_dict[param_path][str(year)] = value - reform_dict[param_path]["2045-01-01.2100-12-31"] = 0 - - return Reform.from_dict(reform_dict, country_id="us") + return Reform.from_dict(get_option6_dict(), country_id="us") def get_option7_reform(): @@ -353,7 +522,7 @@ def get_option7_reform(): that has a 6% phase-out beginning at $75k/$150k for single/joint filers. The deduction expires in 2029, so there's only impact from 2026-2028. """ - return Reform.from_dict(eliminate_senior_deduction(), country_id="us") + return Reform.from_dict(get_option7_dict(), country_id="us") def get_option8_reform(): @@ -363,7 +532,7 @@ def get_option8_reform(): regardless of income level. This is more comprehensive than Option 2 which taxes only 85% of benefits. """ - return Reform.from_dict(tax_100_percent_ss(), country_id="us") + return Reform.from_dict(get_option8_dict(), country_id="us") # Dictionary mapping reform IDs to configurations diff --git a/submit_option2_dynamic.sh b/submit_option2_dynamic.sh new file mode 100755 index 0000000..35ae240 --- /dev/null +++ b/submit_option2_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option2 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option2 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option2_static.sh b/submit_option2_static.sh new file mode 100755 index 0000000..1bcfa9d --- /dev/null +++ b/submit_option2_static.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option2 for 75 years (2026-2100) STATIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option2 \ + --scoring static \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option3_dynamic.sh b/submit_option3_dynamic.sh new file mode 100755 index 0000000..bfef567 --- /dev/null +++ b/submit_option3_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option3 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option3 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option4_dynamic.sh b/submit_option4_dynamic.sh new file mode 100755 index 0000000..94c3c8b --- /dev/null +++ b/submit_option4_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option4 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option4 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option5_dynamic.sh b/submit_option5_dynamic.sh new file mode 100755 index 0000000..7b2043e --- /dev/null +++ b/submit_option5_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option5 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option5 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option6_dynamic.sh b/submit_option6_dynamic.sh new file mode 100755 index 0000000..81fd030 --- /dev/null +++ b/submit_option6_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option6 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option6 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option6_static.sh b/submit_option6_static.sh new file mode 100755 index 0000000..9018e43 --- /dev/null +++ b/submit_option6_static.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option6 for 75 years (2026-2100) STATIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option6 \ + --scoring static \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option7_dynamic.sh b/submit_option7_dynamic.sh new file mode 100755 index 0000000..4ca01f6 --- /dev/null +++ b/submit_option7_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option7 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option7 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option7_static.sh b/submit_option7_static.sh new file mode 100755 index 0000000..c5277b1 --- /dev/null +++ b/submit_option7_static.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option7 for 75 years (2026-2100) STATIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option7 \ + --scoring static \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option8_dynamic.sh b/submit_option8_dynamic.sh new file mode 100755 index 0000000..1b9117a --- /dev/null +++ b/submit_option8_dynamic.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option8 for 75 years (2026-2100) DYNAMIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option8 \ + --scoring dynamic \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions" diff --git a/submit_option8_static.sh b/submit_option8_static.sh new file mode 100755 index 0000000..8808a4e --- /dev/null +++ b/submit_option8_static.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Submit option8 for 75 years (2026-2100) STATIC scoring +YEARS=$(python3 -c "print(','.join(map(str, range(2026, 2101))))") +/usr/bin/python3 batch/submit_years.py \ + --years "$YEARS" \ + --reforms option8 \ + --scoring static \ + --bucket crfb-ss-analysis-results 2>&1 | grep -v "FutureWarning\|NotOpenSSLWarning\|urllib3\|warnings\|ssl\|packages_distributions"