diff --git a/.github/workflows/lint-and-test.yaml b/.github/workflows/lint-and-test.yaml index 36f7360..8aadbc0 100644 --- a/.github/workflows/lint-and-test.yaml +++ b/.github/workflows/lint-and-test.yaml @@ -9,10 +9,10 @@ on: branches: - main - dev - - '!f/pypi_release' jobs: test-integration: + if: github.event.pull_request.head.ref != 'f/pypi_release' runs-on: ubuntu-latest steps: diff --git a/.github/workflows/publish_testpypi.yaml b/.github/workflows/publish_testpypi.yaml index 7f6799a..b3da109 100644 --- a/.github/workflows/publish_testpypi.yaml +++ b/.github/workflows/publish_testpypi.yaml @@ -5,10 +5,11 @@ on: tags: - 'test-v*' branches: - - 'dev' + - 'f/pypi_release' jobs: deploy_testpypi: + if: false runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -32,53 +33,74 @@ jobs: pip install build twine - name: Build and publish env: - TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }} + #TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }} + TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }} + #ACTIONS_STEP_DEBUG: true run: | - echo "TWINE_USERNAME is set to $TWINE_USERNAME" - echo "TWINE_PASSWORD is set to $TWINE_PASSWORD" + # echo "TWINE_PASSWORD first 5 chars: ${TWINE_PASSWORD:0:184}" + # echo "TWINE_PASSWORD length: ${#TWINE_PASSWORD}" python -m build - twine upload --repository-url https://test.pypi.org/legacy/ dist/* + twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/* - jupyter_test: - needs: deploy_testpypi + test-colab: + #needs: deploy_testpypi runs-on: ubuntu-latest + #a Public "Colab-like" Image + container: + image: jupyter/minimal-notebook:latest + options: --user root # Run as root to avoid permission issues permissions: - contents: write # Required for deleting the tag + contents: write steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11.9 - uses: actions/setup-python@v4 - with: - python-version: '3.11.9' - - name: Install Colab-like environment - run: | - pip install --index-url https://test.pypi.org/simple/ \ - --extra-index-url https://pypi.org/simple \ - starfish-core - pip install ipython==8.10.0 \ - ipykernel==6.22.0 \ - jupyter_http_over_ws>=0.0.8 \ - pandas==1.5.3 \ - numpy==1.23.5 \ - jupyter \ - pytest \ - nbval - - name: Configure Colab simulation - run: | - jupyter serverextension enable --py jupyter_http_over_ws - - name: Run Colab-style tests - run: | - pytest --nbval \ - --nbval-kernel-name=python3 \ - --nbval-cell-timeout=120 \ - --nbval-sanitize-with ./tests/sanitize.cfg \ - tests/data_factory/factory/test_resume_index.ipynb + - uses: actions/checkout@v3 + with: + sparse-checkout: | + tests/* + sparse-checkout-cone-mode: false + - name: Update system packages + run: | + apt-get update + apt-get install -y libssl3 # Removed sudo since we're running as root + - name: Print Python and Jupyter versions + run: | + python --version + pip list | grep -E 'jupyter|ipykernel|nbconvert|notebook' + # Authenticate to GCP + # - name: Authenticate to GCP + # uses: google-github-actions/auth@v1 + # with: + # credentials_json: ${{ secrets.GCP_SA_KEY }} - # Add tag deletion step - - name: Delete triggering tag after successful test - if: success() && startsWith(github.ref, 'refs/tags/test-v') - run: | - gh api -X DELETE /repos/$GITHUB_REPOSITORY/git/refs/tags/${GITHUB_REF#refs/tags/} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + # # Configure Docker to use GCR credentials + # - name: Configure Docker for GCR + # uses: google-github-actions/docker-auth@v1 + + # # Now you can pull the image + # - name: Use Colab base image + # run: docker pull gcr.io/colab-images/base:latest + + # --no-prompt --no-input \ suppress the output + - name: Run Colab-style tests + run: | + if ! jupyter nbconvert --execute --to notebook --inplace \ + --ExecutePreprocessor.kernel_name=python3 \ + --ExecutePreprocessor.timeout=120 \ + --no-prompt --no-input \ + --stdout \ + tests/data_factory/factory/test_resume_index_1.ipynb; then + echo "::error::Notebook execution failed" + + fi + echo "Notebook executed successfully. Summary:" && \ + jupyter nbconvert --to markdown --stdout \ + tests/data_factory/factory/test_resume_index_1.ipynb | \ + grep -E '^#|^##' || true + + # Add tag deletion step + - name: Delete triggering tag after successful test + if: startsWith(github.ref, 'refs/tags/test-v') + run: | + gh api -X DELETE /repos/$GITHUB_REPOSITORY/git/refs/tags/${GITHUB_REF#refs/tags/} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/tests/data_factory/factory/test_resume_index_1.ipynb b/tests/data_factory/factory/test_resume_index_1.ipynb new file mode 100644 index 0000000..a4b9806 --- /dev/null +++ b/tests/data_factory/factory/test_resume_index_1.ipynb @@ -0,0 +1,572 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --index-url https://test.pypi.org/simple/ \\\n", + " --extra-index-url https://pypi.org/simple \\\n", + " starfish-core" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import time\n", + "import signal\n", + "import threading\n", + "from typing import List, Dict, Any, Optional, Set, Tuple\n", + "\n", + "from starfish import data_factory\n", + "from starfish.common.env_loader import load_env_file\n", + "from starfish.data_factory.utils.mock import mock_llm_call\n", + "from starfish.common.logger import get_logger\n", + "logger = get_logger(__name__)\n", + "# Load environment variables\n", + "load_env_file()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Apply nest_asyncio for use in Jupyter notebooks\n", + "try:\n", + " import nest_asyncio\n", + " nest_asyncio.apply()\n", + "except ImportError:\n", + " print(\"nest_asyncio not found, skipping. This may cause issues if run in a notebook.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Define the data factory function at module level\n", + "@data_factory(max_concurrency=10)\n", + "async def mock_llm_processor(city_name: str, num_records_per_city: int):\n", + " \"\"\"Mock LLM processor that simulates processing with a delay\"\"\"\n", + " # Added sleep to make the process take longer for demonstration\n", + " await asyncio.sleep(0.5)\n", + " return await mock_llm_call(city_name=city_name, num_records_per_city=num_records_per_city, fail_rate=0)\n", + "\n", + "class TestRunner:\n", + " def __init__(self, total_time_limit=15, checkpoint_interval=3, max_checkpoints=10):\n", + " \"\"\"\n", + " Initialize the test runner\n", + " \n", + " Args:\n", + " total_time_limit: Maximum time allowed for the whole test in seconds\n", + " checkpoint_interval: Time between checkpoints (stop/resume) in seconds\n", + " max_checkpoints: Maximum number of checkpoints before forced termination\n", + " \"\"\"\n", + " self.total_time_limit = total_time_limit\n", + " self.checkpoint_interval = checkpoint_interval\n", + " self.max_checkpoints = max_checkpoints\n", + " self.errors = [] # Each item will be a tuple of (step, error_message)\n", + " self.results = None\n", + " self.stop_events = []\n", + " self.job = None\n", + " self.timeout_triggered = False\n", + " self.all_checkpoint_errors = {} # Dictionary to track errors per checkpoint\n", + " \n", + " def add_error(self, step: str, error_message: str):\n", + " \"\"\"Add an error with the associated step information\"\"\"\n", + " self.errors.append((step, error_message))\n", + " \n", + " # Also track errors by checkpoint\n", + " if step not in self.all_checkpoint_errors:\n", + " self.all_checkpoint_errors[step] = []\n", + " self.all_checkpoint_errors[step].append(error_message)\n", + " \n", + " def validate_completion_indices(self, indices, step_name=\"Validation\", is_final=False):\n", + " \"\"\"\n", + " Validate that the completion indices are correct\n", + " \n", + " Args:\n", + " indices: The indices to validate\n", + " step_name: The name of the step for error reporting\n", + " is_final: Whether this is the final validation (expecting all indices to be complete)\n", + " \n", + " Returns:\n", + " List of errors found\n", + " \"\"\"\n", + " errors = []\n", + " \n", + " # Safety check for None\n", + " if indices is None:\n", + " error = \"Indices are None\"\n", + " self.add_error(step_name, error)\n", + " return [error]\n", + " \n", + " # Get the completed count\n", + " completed_values = [idx for idx in indices if idx is not None]\n", + " completed_count = len(completed_values)\n", + " \n", + " # For final validation, check if all indices are completed\n", + " if is_final:\n", + " # Check length\n", + " if len(indices) != 100:\n", + " error = f\"Expected 100 indices total, but found {len(indices)}\"\n", + " self.add_error(step_name, error)\n", + " errors.append(error)\n", + " \n", + " # Check that all are completed (no None values)\n", + " if completed_count != 100:\n", + " error = f\"Expected 100 completed indices, but found {completed_count}\"\n", + " self.add_error(step_name, error)\n", + " errors.append(error)\n", + " \n", + " # Check for uniqueness among completed indices (always important)\n", + " unique_indices = set(completed_values)\n", + " if len(unique_indices) != len(completed_values):\n", + " duplicates = [idx for idx in unique_indices if indices.count(idx) > 1]\n", + " error = f\"Found duplicate values: {duplicates}\"\n", + " self.add_error(step_name, error)\n", + " errors.append(error)\n", + " \n", + " # Check range of indices (0-99)\n", + " expected_range = set(range(100))\n", + " extra = unique_indices - expected_range\n", + " \n", + " if extra:\n", + " error = f\"Unexpected indices: {sorted(extra)}\"\n", + " self.add_error(step_name, error)\n", + " errors.append(error)\n", + " \n", + " # For final validation, check if any indices are missing\n", + " if is_final:\n", + " missing = expected_range - unique_indices\n", + " if missing:\n", + " error = f\"Missing indices: {sorted(missing)}\"\n", + " self.add_error(step_name, error)\n", + " errors.append(error)\n", + " \n", + " return errors\n", + "\n", + " def interrupt_execution(self):\n", + " \"\"\"Schedule an interruption after the checkpoint interval\"\"\"\n", + " print(f\"ā±ļø Scheduling interruption in {self.checkpoint_interval} seconds\")\n", + " timer = threading.Timer(self.checkpoint_interval, self.raise_interrupt)\n", + " self.stop_events.append(timer)\n", + " timer.start()\n", + "\n", + " def raise_interrupt(self):\n", + " \"\"\"Raise a KeyboardInterrupt to stop the execution\"\"\"\n", + " print(\"šŸ›‘ Raising interruption signal\")\n", + " signal.raise_signal(signal.SIGINT)\n", + "\n", + " def setup_timeout(self):\n", + " \"\"\"Set up the overall timeout for the test\"\"\"\n", + " print(f\"ā±ļø Setting up timeout limit of {self.total_time_limit} seconds\")\n", + " timeout_timer = threading.Timer(self.total_time_limit, self.handle_timeout)\n", + " self.stop_events.append(timeout_timer)\n", + " timeout_timer.start()\n", + "\n", + " def handle_timeout(self):\n", + " \"\"\"Handle the timeout by setting a flag instead of forcefully exiting\"\"\"\n", + " print(\"ā° Timeout reached! Stopping the job gracefully.\")\n", + " self.add_error(\"Timeout\", f\"Test exceeded maximum time limit of {self.total_time_limit} seconds\")\n", + " # Set a flag instead of hard exiting - this is more Jupyter-friendly\n", + " self.timeout_triggered = True\n", + " # Signal the main thread to stop\n", + " signal.raise_signal(signal.SIGINT)\n", + "\n", + " def cleanup_timers(self):\n", + " \"\"\"Clean up all running timers\"\"\"\n", + " for timer in self.stop_events:\n", + " if timer.is_alive():\n", + " timer.cancel()\n", + " self.stop_events = []\n", + " \n", + " def check_progress_and_validate(self, checkpoint_name):\n", + " \"\"\"\n", + " Check the current progress and validate indices for the current checkpoint\n", + " \n", + " Returns:\n", + " Tuple of (progress_info, completed)\n", + " \"\"\"\n", + " progress_info = \"Unknown\"\n", + " completed = False\n", + " \n", + " try:\n", + " # Safely get job status - avoid calling methods directly on potentially None objects\n", + " if hasattr(self.job, 'get_index_completed') and callable(getattr(self.job, 'get_index_completed')):\n", + " indices = self.job.get_index_completed()\n", + " \n", + " # Safety check\n", + " if indices is not None:\n", + " # Determine if this is final validation based on completion status\n", + " completed_count = len([i for i in indices if i is not None])\n", + " is_final = completed_count == 100\n", + " \n", + " # Perform validation for this checkpoint\n", + " validation_errors = self.validate_completion_indices(\n", + " indices, \n", + " checkpoint_name + \" Validation\",\n", + " is_final=is_final\n", + " )\n", + " if validation_errors:\n", + " print(f\"āŒ {checkpoint_name} validation failed:\")\n", + " for err in validation_errors:\n", + " print(f\" - {err}\")\n", + " elif is_final:\n", + " print(f\"āœ… {checkpoint_name} validation passed: All indices are correct\")\n", + " else:\n", + " print(f\"āœ… {checkpoint_name} partial validation passed: {completed_count} indices processed\")\n", + " \n", + " progress_info = f\"{completed_count}/100\"\n", + " \n", + " # Check if all tasks are completed\n", + " if completed_count == 100:\n", + " completed = True\n", + " else:\n", + " self.add_error(checkpoint_name, \"Failed to get indices: indices is None\")\n", + " print(f\"āš ļø {checkpoint_name}: Failed to get indices: indices is None\")\n", + " else:\n", + " self.add_error(checkpoint_name, \"Job does not have get_index_completed method\")\n", + " print(f\"āš ļø {checkpoint_name}: Job does not have get_index_completed method\")\n", + " \n", + " except Exception as e:\n", + " self.add_error(checkpoint_name, f\"Error getting indices: {str(e)}\")\n", + " print(f\"āŒ {checkpoint_name}: Error getting indices: {str(e)}\")\n", + " \n", + " return progress_info, completed\n", + "\n", + " def _finish_test(self, start_time):\n", + " \"\"\"Finish the test by cleaning up and returning results\"\"\"\n", + " # Clean up timers\n", + " self.cleanup_timers()\n", + " \n", + " # Final validation if we have a job\n", + " if self.job and hasattr(self.job, 'get_index_completed'):\n", + " try:\n", + " final_indices = self.job.get_index_completed()\n", + " # Always perform full validation in the final step\n", + " validation_errors = self.validate_completion_indices(final_indices, \"Final Validation\", is_final=True)\n", + " if validation_errors:\n", + " print(\"āŒ Final validation failed:\")\n", + " for err in validation_errors:\n", + " print(f\" - {err}\")\n", + " else:\n", + " print(\"āœ… Final validation passed: All indices are correct\")\n", + " except Exception as e:\n", + " self.add_error(\"Final Validation\", f\"Error getting final indices: {str(e)}\")\n", + " print(f\"āŒ Error in final validation: {str(e)}\")\n", + "\n", + " def run_test(self):\n", + " \"\"\"Run the complete test with interruptions and resumptions\"\"\"\n", + " # Create input data\n", + " cities = [\"New York\", \"London\", \"Tokyo\", \"Paris\", \"Sydney\"] * 20 # 100 cities\n", + " \n", + " print(\"=== Starting Initial Run ===\")\n", + " start_time = time.time()\n", + " \n", + " try:\n", + " # Setup timers\n", + " self.setup_timeout()\n", + " self.interrupt_execution()\n", + " logger.info(\"start to setup the job\")\n", + " # Start initial run - use the module level decorated function\n", + " self.job = mock_llm_processor # Use the module-level function\n", + " logger.info(\"finish to setup the job\")\n", + " logger.info(self.job.get_index_completed)\n", + " try:\n", + " self.results = self.job.run(city_name=cities, num_records_per_city=1)\n", + " print(\"āœ… Initial run completed without interruption\")\n", + " \n", + " # Check progress and validate after initial run\n", + " progress_info, completed = self.check_progress_and_validate(\"Initial Run\")\n", + " if completed:\n", + " print(\"āœ… All tasks completed in initial run\")\n", + " return self._finish_test(start_time)\n", + " \n", + " except Exception as e:\n", + " self.add_error(\"Initial Run\", f\"Error: {str(e)}\")\n", + " print(f\"āŒ Error in Initial Run: {str(e)}\")\n", + " # Don't return here, continue with checkpoint attempts\n", + " except KeyboardInterrupt:\n", + " print(\"āš ļø Initial run interrupted\")\n", + " \n", + " # Check progress and validate after interruption\n", + " progress_info, completed = self.check_progress_and_validate(\"Initial Run (Interrupted)\")\n", + " if completed:\n", + " print(\"āœ… All tasks completed after initial interruption\")\n", + " return self._finish_test(start_time)\n", + " \n", + " except Exception as e:\n", + " self.add_error(\"Initial Run Setup\", f\"Error: {str(e)}\")\n", + " print(f\"āŒ Error in Initial Run setup: {str(e)}\")\n", + " # Don't return here, continue with checkpoint attempts\n", + " \n", + " # Resume until complete\n", + " checkpoint_count = 1\n", + " \n", + " # Add a safety counter to prevent infinite loops\n", + " while checkpoint_count <= self.max_checkpoints:\n", + " checkpoint_name = f\"Checkpoint {checkpoint_count}\"\n", + " \n", + " # Check if timeout was triggered\n", + " if self.timeout_triggered:\n", + " print(\"ā° Test timed out - stopping testing loop\")\n", + " break\n", + " \n", + " # Check if we have reached the total time limit\n", + " if time.time() - start_time >= self.total_time_limit:\n", + " self.add_error(checkpoint_name, f\"Test exceeded maximum time limit of {self.total_time_limit} seconds\")\n", + " print(f\"ā° Test timed out after {self.total_time_limit} seconds\")\n", + " break\n", + " \n", + " # Check if we've hit the max checkpoint count\n", + " if checkpoint_count == self.max_checkpoints:\n", + " self.add_error(checkpoint_name, f\"Test reached maximum checkpoint count of {self.max_checkpoints}\")\n", + " print(f\"āš ļø Test reached maximum checkpoint count of {self.max_checkpoints}\")\n", + " break\n", + " \n", + " # Check if we have a job to resume\n", + " if self.job is None:\n", + " self.add_error(checkpoint_name, \"Cannot continue: job is None\")\n", + " print(\"āŒ Cannot continue: job is None\")\n", + " break\n", + " \n", + " # Check progress before resuming\n", + " progress_info, completed = self.check_progress_and_validate(f\"Before {checkpoint_name}\")\n", + " if completed:\n", + " print(f\"āœ… All tasks completed before {checkpoint_name}\")\n", + " break\n", + " \n", + " print(f\"=== Starting {checkpoint_name} ({progress_info}) ===\")\n", + " \n", + " # Resume the job\n", + " try:\n", + " # Setup interruption for the next checkpoint\n", + " self.interrupt_execution()\n", + " \n", + " # Try to resume if the method exists\n", + " if hasattr(self.job, 'resume') and callable(getattr(self.job, 'resume')):\n", + " try:\n", + " self.results = self.job.resume()\n", + " print(f\"āœ… {checkpoint_name} completed without interruption\")\n", + " \n", + " # Check progress after resumption\n", + " progress_info, completed = self.check_progress_and_validate(f\"After {checkpoint_name}\")\n", + " if completed:\n", + " print(f\"āœ… All tasks completed after {checkpoint_name}\")\n", + " break\n", + " \n", + " except Exception as e:\n", + " self.add_error(checkpoint_name, f\"Error: {str(e)}\")\n", + " print(f\"āŒ Error in {checkpoint_name}: {str(e)}\")\n", + " # Continue to the next checkpoint\n", + " else:\n", + " self.add_error(checkpoint_name, \"Job does not have resume method\")\n", + " print(\"āš ļø Job does not have resume method\")\n", + " break # Can't continue without resume method\n", + " \n", + " except KeyboardInterrupt:\n", + " print(f\"āš ļø {checkpoint_name} interrupted\")\n", + " \n", + " # Check progress after interruption\n", + " progress_info, completed = self.check_progress_and_validate(f\"After {checkpoint_name} (Interrupted)\")\n", + " if completed:\n", + " print(f\"āœ… All tasks completed after {checkpoint_name} interruption\")\n", + " break\n", + " \n", + " checkpoint_count += 1\n", + "\n", + " # Finish the test\n", + " return self._finish_test(start_time)\n", + " \n", + " def _finish_test(self, start_time):\n", + " \"\"\"Finish the test by cleaning up and returning results\"\"\"\n", + " # Clean up timers\n", + " self.cleanup_timers()\n", + " \n", + " # Final validation if we have a job\n", + " if self.job and hasattr(self.job, 'get_index_completed'):\n", + " try:\n", + " final_indices = self.job.get_index_completed()\n", + " validation_errors = self.validate_completion_indices(final_indices, \"Final Validation\")\n", + " if validation_errors:\n", + " print(\"āŒ Final validation failed:\")\n", + " for err in validation_errors:\n", + " print(f\" - {err}\")\n", + " else:\n", + " print(\"āœ… Final validation passed: All indices are correct\")\n", + " except Exception as e:\n", + " self.add_error(\"Final Validation\", f\"Error getting final indices: {str(e)}\")\n", + " print(f\"āŒ Error in final validation: {str(e)}\")\n", + " \n", + " # Report final status\n", + " total_time = time.time() - start_time\n", + " print(f\"\\n=== Test Summary ===\")\n", + " print(f\"Total execution time: {total_time:.2f} seconds\")\n", + " \n", + " # Group errors by phase type for summary\n", + " validation_phases = [p for p in self.all_checkpoint_errors.keys() if \"Validation\" in p]\n", + " checkpoint_phases = [p for p in self.all_checkpoint_errors.keys() if \"Checkpoint\" in p and \"Validation\" not in p]\n", + " timeout_phases = [p for p in self.all_checkpoint_errors.keys() if \"Timeout\" in p]\n", + " other_phases = [p for p in self.all_checkpoint_errors.keys() \n", + " if p not in validation_phases and p not in checkpoint_phases and p not in timeout_phases]\n", + " \n", + " # Report errors by category\n", + " print(\"\\n=== Errors by Phase ===\")\n", + " \n", + " # Show timeout errors first\n", + " if timeout_phases:\n", + " print(\"\\nTimeout Errors:\")\n", + " for phase in timeout_phases:\n", + " for err in self.all_checkpoint_errors[phase]:\n", + " print(f\" - {err}\")\n", + " \n", + " # Show checkpoint execution errors\n", + " if checkpoint_phases:\n", + " print(\"\\nCheckpoint Execution Errors:\")\n", + " for phase in sorted(checkpoint_phases):\n", + " if phase in self.all_checkpoint_errors and self.all_checkpoint_errors[phase]:\n", + " print(f\" {phase}:\")\n", + " for err in self.all_checkpoint_errors[phase]:\n", + " print(f\" - {err}\")\n", + " \n", + " # Show validation errors for each checkpoint\n", + " if validation_phases:\n", + " print(\"\\nValidation Errors:\")\n", + " for phase in sorted(validation_phases):\n", + " if phase in self.all_checkpoint_errors and self.all_checkpoint_errors[phase]:\n", + " print(f\" {phase}:\")\n", + " for err in self.all_checkpoint_errors[phase]:\n", + " print(f\" - {err}\")\n", + " \n", + " # Show other errors\n", + " if other_phases:\n", + " print(\"\\nOther Errors:\")\n", + " for phase in sorted(other_phases):\n", + " if phase in self.all_checkpoint_errors and self.all_checkpoint_errors[phase]:\n", + " print(f\" {phase}:\")\n", + " for err in self.all_checkpoint_errors[phase]:\n", + " print(f\" - {err}\")\n", + " \n", + " if not self.errors:\n", + " print(\"\\nāœ… Test completed successfully with no errors\")\n", + " else:\n", + " validation_error_count = sum(len(self.all_checkpoint_errors[p]) for p in validation_phases if p in self.all_checkpoint_errors)\n", + " checkpoint_error_count = sum(len(self.all_checkpoint_errors[p]) for p in checkpoint_phases if p in self.all_checkpoint_errors)\n", + " timeout_error_count = sum(len(self.all_checkpoint_errors[p]) for p in timeout_phases if p in self.all_checkpoint_errors)\n", + " other_error_count = sum(len(self.all_checkpoint_errors[p]) for p in other_phases if p in self.all_checkpoint_errors)\n", + " \n", + " print(f\"\\nāŒ Test completed with {len(self.errors)} errors:\")\n", + " print(f\" - {timeout_error_count} timeout errors\")\n", + " print(f\" - {checkpoint_error_count} checkpoint execution errors\")\n", + " print(f\" - {validation_error_count} validation errors\")\n", + " print(f\" - {other_error_count} other errors\")\n", + " \n", + " return {\n", + " \"success\": len(self.errors) == 0,\n", + " \"errors\": self.errors,\n", + " \"errors_by_checkpoint\": self.all_checkpoint_errors,\n", + " \"total_time\": total_time,\n", + " \"results\": self.results\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Run the test\n", + "import sys\n", + "print(f\"Python version: {sys.version}\")\n", + "runner = TestRunner(total_time_limit=20, checkpoint_interval=3, max_checkpoints=10)\n", + "result = runner.run_test()\n", + "if not result[\"success\"]:\n", + " # Format error message to include all errors organized by category\n", + " error_parts = []\n", + " \n", + " # Categorize phases\n", + " validation_phases = [p for p in result[\"errors_by_checkpoint\"].keys() if \"Validation\" in p]\n", + " checkpoint_phases = [p for p in result[\"errors_by_checkpoint\"].keys() if \"Checkpoint\" in p and \"Validation\" not in p]\n", + " timeout_phases = [p for p in result[\"errors_by_checkpoint\"].keys() if \"Timeout\" in p]\n", + " other_phases = [p for p in result[\"errors_by_checkpoint\"].keys() \n", + " if p not in validation_phases and p not in checkpoint_phases and p not in timeout_phases]\n", + " \n", + " # Add timeout errors first\n", + " if timeout_phases:\n", + " error_parts.append(\"\\n=== TIMEOUT ERRORS ===\")\n", + " for phase in timeout_phases:\n", + " for err in result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"- {err}\")\n", + " \n", + " # Add checkpoint execution errors\n", + " if checkpoint_phases:\n", + " error_parts.append(\"\\n=== CHECKPOINT EXECUTION ERRORS ===\")\n", + " for phase in sorted(checkpoint_phases):\n", + " if phase in result[\"errors_by_checkpoint\"] and result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"\\n-- {phase} --\")\n", + " for err in result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"- {err}\")\n", + " \n", + " # Add validation errors for each checkpoint\n", + " if validation_phases:\n", + " error_parts.append(\"\\n=== VALIDATION ERRORS ===\")\n", + " for phase in sorted(validation_phases):\n", + " if phase in result[\"errors_by_checkpoint\"] and result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"\\n-- {phase} --\")\n", + " for err in result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"- {err}\")\n", + " \n", + " # Add other errors\n", + " if other_phases:\n", + " error_parts.append(\"\\n=== OTHER ERRORS ===\")\n", + " for phase in sorted(other_phases):\n", + " if phase in result[\"errors_by_checkpoint\"] and result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"\\n-- {phase} --\")\n", + " for err in result[\"errors_by_checkpoint\"][phase]:\n", + " error_parts.append(f\"- {err}\")\n", + " \n", + " error_message = \"\\n\".join(error_parts)\n", + " validation_error_count = sum(len(result[\"errors_by_checkpoint\"][p]) for p in validation_phases if p in result[\"errors_by_checkpoint\"])\n", + " checkpoint_error_count = sum(len(result[\"errors_by_checkpoint\"][p]) for p in checkpoint_phases if p in result[\"errors_by_checkpoint\"])\n", + " timeout_error_count = sum(len(result[\"errors_by_checkpoint\"][p]) for p in timeout_phases if p in result[\"errors_by_checkpoint\"])\n", + " other_error_count = sum(len(result[\"errors_by_checkpoint\"][p]) for p in other_phases if p in result[\"errors_by_checkpoint\"])\n", + " \n", + " raise RuntimeError(f\"Test failed with {len(result['errors'])} total errors ({timeout_error_count} timeout, {checkpoint_error_count} execution, {validation_error_count} validation, {other_error_count} other):{error_message}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "starfish-core-T7IInzTH-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}