diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..a2aa5f3 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,18 @@ +{ + "name": "FFRD Specs devcontainer", + "image": "mcr.microsoft.com/devcontainers/base:bookworm", + "features": { + "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { + "enableNonRootDocker": "true" + }, + "ghcr.io/devcontainers/features/python:1": {} + }, + "postCreateCommand": ".devcontainer/setup-argo.sh", + "customizations": { + "vscode": { + "extensions": [ + "ms-kubernetes-tools.vscode-kubernetes-tools" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/setup-argo.sh b/.devcontainer/setup-argo.sh new file mode 100755 index 0000000..4a3f3aa --- /dev/null +++ b/.devcontainer/setup-argo.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e + +echo "๐Ÿš€ Setting up Argo Workflows development environment..." + +echo "๐Ÿ“ฆ Installing kubectl..." +curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" +chmod +x kubectl +sudo mv kubectl /usr/local/bin/ + +echo "๐Ÿ“ฆ Installing Argo CLI..." +curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.7.0/argo-linux-amd64.gz +gunzip argo-linux-amd64.gz +chmod +x argo-linux-amd64 +sudo mv argo-linux-amd64 /usr/local/bin/argo + +echo "๐Ÿงน Cleaning up any existing k3s-server container..." +docker rm -f k3s-server 2>/dev/null || true + +echo "๐Ÿ”ง Starting k3s Kubernetes cluster..." +docker run -d --name k3s-server --privileged -p 6443:6443 rancher/k3s:latest server --disable=traefik + +echo "โณ Waiting for k3s to start..." +sleep 10 + +echo "๐Ÿ”ง Configuring kubectl..." +mkdir -p ~/.kube +docker exec k3s-server cat /etc/rancher/k3s/k3s.yaml | sed 's/127.0.0.1/172.17.0.2/g' > ~/.kube/config +CONTAINER_IP=$(docker inspect k3s-server | grep '"IPAddress"' | head -1 | cut -d'"' -f4) +docker exec k3s-server cat /etc/rancher/k3s/k3s.yaml | sed "s/127.0.0.1/$CONTAINER_IP/g" > ~/.kube/config +sed -i '/certificate-authority-data:/d' ~/.kube/config +sed -i '/server:/a\ insecure-skip-tls-verify: true' ~/.kube/config + +echo "๐Ÿ“ฆ Installing Argo Workflows..." +kubectl create namespace argo || true +kubectl apply -n argo -f https://github.com/argoproj/argo-workflows/releases/download/v3.7.0/install.yaml + +echo "โณ Waiting for Argo Workflows to be ready..." +kubectl wait --for=condition=available --timeout=300s deployment/argo-server -n argo || echo "โš ๏ธ Argo server may still be starting..." +kubectl wait --for=condition=available --timeout=300s deployment/workflow-controller -n argo || echo "โš ๏ธ Workflow controller may still be starting..." + +echo "๐Ÿ” Setting up RBAC for workflows..." +kubectl create rolebinding default-admin --clusterrole=admin --serviceaccount=default:default + +echo "โœ… Setup complete!" diff --git a/docs/proposals/orchestration/orchestration.md b/docs/proposals/orchestration/orchestration.md new file mode 100644 index 0000000..03ff695 --- /dev/null +++ b/docs/proposals/orchestration/orchestration.md @@ -0,0 +1,9 @@ +{% include "proposals/orchestration/standard.md" %} + +______________________________________________________________________ + +{% include "proposals/orchestration/technical-capabilities.md" %} + +______________________________________________________________________ + +{% include "proposals/orchestration/reference.md" %} diff --git a/docs/proposals/orchestration/reference.md b/docs/proposals/orchestration/reference.md new file mode 100644 index 0000000..db195c0 --- /dev/null +++ b/docs/proposals/orchestration/reference.md @@ -0,0 +1,248 @@ +## ๐Ÿ“š Reference + +### Argo Workflows Implementation + +This reference implementation demonstrates how Argo Workflows could support FFRD orchestration needs. Argo Workflows is presented as one example of an orchestration system that offers relevant capabilities, alongside other potential solutions that could meet similar workflow requirements. + +#### Implementation Overview + +The reference implementation uses Argo Workflows running on Kubernetes to illustrate: + +- DAG-based workflow execution patterns with explicit task dependencies +- Container execution approaches with shared volume access +- Parallel task execution techniques with parameterization +- Shared volume management strategies for data exchange between tasks +- Logging and monitoring capabilities for workflow observability + +#### Example Workflow Structure + +The following example demonstrates a basic FFRD workflow pattern with parallel processing and data collection: + +```yaml +# This is a simplified example showing the orchestration pattern +# Full FFRD workflows would use FFRD-compliant containers and configurations + +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-example- +spec: + entrypoint: main + volumeClaimTemplates: # Create a shared volume for the workflow + - metadata: + name: workdir + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi + templates: + - name: main + dag: + tasks: + - name: generate-number + template: generate-number + - name: process-numbers + dependencies: [generate-number] + template: process-numbers + - name: sum-results + dependencies: [process-numbers] + template: sum-results + + - name: generate-number + container: + image: alpine:3.18 + command: [sh, -c] + args: ["echo 5 > /work/number.txt"] + volumeMounts: + - name: workdir + mountPath: /work + + - name: process-numbers + parallelism: 2 # Run two steps at a time + steps: + - - name: process-number + template: process-number + withItems: # Iterate over this list of numbers + - 1 + - 2 + - 3 + - 4 + arguments: + parameters: + - name: item + value: "{{ '{{item}}' }}" # Pass the item from the list to the process-number template + + - name: process-number + inputs: + parameters: + - name: item + container: + image: alpine:3.18 + command: [sh, -c] + args: + - | + num=$(cat /work/number.txt) + result=$((num + {{ '{{inputs.parameters.item}}' }})) + echo $result > /work/result-{{ '{{inputs.parameters.item}}' }}.txt + volumeMounts: + - name: workdir + mountPath: /work + + - name: sum-results + container: + image: alpine:3.18 + command: [sh, -c] + args: + - | + sum=0 + for file in /work/result-*.txt; do + sum=$((sum + $(cat $file))) + done + echo "Total sum: $sum" + volumeMounts: + - name: workdir + mountPath: /work +``` + +#### Key Implementation Features + +##### DAG Structure + +- Uses Argo's DAG template to define explicit task dependencies (`dependencies: [generate-number]`) +- Demonstrates parallel execution through steps with `withItems` parameterization +- Shows sequential workflow phases (generate โ†’ process โ†’ collect) + +##### Container Execution + +- Executes standard containers (Alpine Linux) as a pattern for FFRD containers +- Demonstrates passing command line arguments to containers +- Shows volume mounting for data access across all tasks + +##### Data Sharing + +- Uses persistent volume claims (`volumeClaimTemplates`) for shared storage +- Consistent volume mounting (`/work`) across all workflow tasks +- Demonstrates file-based data exchange between workflow steps + +##### Parameterization + +- Shows parameter passing with `withItems` for parallel task execution +- Demonstrates template parameter usage with `inputs.parameters.item` +- Illustrates how to iterate over lists to create multiple parallel tasks + +#### Deployment Requirements + +##### Infrastructure + +- Kubernetes cluster +- Argo Workflows +- Container runtime (Docker, containerd, or CRI-O) +- Persistent storage provisioner + +##### Configuration + +- Argo Workflows controller installation +- RBAC configuration for workflow execution +- Storage class configuration for volume provisioning +- Container registry access credentials + +#### Usage Examples + +##### Validate Workflow + +```bash +# Validate the workflow definition +argo lint reference.yaml +``` + +##### Submit Workflow + +```bash +# Submit the workflow to Argo +argo submit reference.yaml +``` + +##### Monitor Execution + +```bash +# List all workflows +argo list + +# Watch workflow execution (use actual workflow name from list) +argo get dag-example-abc123 + +# View workflow logs +argo logs dag-example-abc123 +``` + +##### Access Results + +```bash +# View workflow status and results +argo get dag-example-abc123 +``` + +### Dev Container Setup (Optional) + +This section describes how to set up a development environment for running argo workflows locally using Visual Studio Code Dev Containers. This setup includes a lightweight Kubernetes cluster (k3s) with Argo Workflows installed, allowing you to run and test the reference implementation locally. + +1. Open [this](https://github.com/fema-ffrd/specs) repository in VS Code +1. When prompted, click "Reopen in Container" or use the Command Palette (Ctrl+Shift+P) and select "Dev Containers: Reopen in Container" +1. The container will automatically set up the environment and install dependencies + +#### What Gets Installed + +The setup includes: + +- **Base**: Debian 12 (bookworm) container +- **Docker**: Docker-outside-of-Docker for running k3s +- **kubectl**: Kubernetes CLI +- **argo**: Argo Workflows CLI v3.7.0 +- **k3s**: Lightweight Kubernetes cluster +- **Argo Workflows**: v3.7.0 installed in the cluster + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€-โ” +โ”‚ DevContainer โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ argo โ”‚ โ”‚ kubectl โ”‚ โ”‚ +โ”‚ โ”‚ CLI โ”‚ โ”‚ CLI โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Docker Host โ”‚ โ”‚ +โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ k3s Container โ”‚โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”โ”‚โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ Argo Workflows โ”‚โ”‚โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜โ”‚โ”‚ โ”‚ +โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€-โ”˜ +``` + +#### Useful Commands + +Once setup is complete, you can use these commands: + +```bash +# Validate workflow files +argo lint reference/orchestration/argo/reference.yaml + +# Submit workflow files +argo submit reference/orchestration/argo/reference.yaml + +# Watch the workflow execution +argo submit --watch reference/orchestration/argo/reference.yaml + +# List all workflows +argo list + +# View logs for a specific workflow +argo logs +``` + +#### Useful Links + +- Read the [Argo Workflows documentation](https://argo-workflows.readthedocs.io/) diff --git a/docs/proposals/orchestration/standard.md b/docs/proposals/orchestration/standard.md new file mode 100644 index 0000000..562e1eb --- /dev/null +++ b/docs/proposals/orchestration/standard.md @@ -0,0 +1,88 @@ +# Workflow Orchestration + +## ๐Ÿ“ Standard + +### Purpose + +To document orchestration capabilities and requirements that can support complex, multi-step flood risk data processing workflows within the FFRD initiative. This standard explores how orchestration systems can provide directed acyclic graphs (DAGs), container execution, resource management, observability, and error handling to support reliable execution of hydrologic and hydraulic modeling workflows. + +### Scope + +This standard explores orchestration capabilities relevant to FFRD initiative workflows, including: + +- Multi-step flood risk analysis workflow patterns +- Coordination approaches for hydrologic and hydraulic model runs (HEC-HMS, HEC-RAS, etc.) +- Data processing pipeline management for stochastic storm transposition +- Conformance testing and validation workflow approaches +- Distributed computing patterns across multiple processing nodes + +### Core Capabilities + +#### 1. Workflow Structure + +- **DAG Support**: Orchestration systems typically provide directed acyclic graph (DAG) workflow definitions with explicit task dependencies +- **Parallel Execution**: Modern systems generally enable parallel execution of independent tasks +- **Conditional Logic**: Advanced orchestration platforms often support conditional task execution based on upstream task results + +#### 2. Container Integration + +- **FFRD Container Compatibility**: Orchestration systems can execute FFRD-compliant containers (base image, HMS, RAS, conformance, plugin containers) +- **Container Registry Support**: Most platforms support pulling containers from public and private container registries +- **Runtime Configuration**: Systems typically support passing configuration files, environment variables, and command-line arguments to containers +- **Exit Code Handling**: Well-designed systems handle container exit codes and propagate failures appropriately + +#### 3. Resource Management + +- **Compute Resources**: Orchestration platforms generally allow specification of CPU cores, memory limits, and GPU resources per task +- **Storage Allocation**: Most systems support dynamic and static volume provisioning with configurable storage sizes +- **Resource Constraints**: Mature platforms enforce resource limits and prevent resource contention between concurrent tasks + +#### 4. Data Sharing and Persistence + +- **Volume Sharing**: Orchestration systems typically provide shared storage mechanisms for data exchange between workflow tasks +- **Persistent Volumes**: Most platforms support persistent storage that survives task and workflow completion +- **Data Lifecycle Management**: Advanced systems support cleanup of temporary data when workflows complete + +#### 5. Observability and Monitoring + +- **Execution Logging**: Standard orchestration capabilities include capturing logs from workflow tasks +- **Progress Tracking**: Most systems provide visibility into workflow execution status and task completion + +#### 6. Error Handling and Resilience + +- **Retry Strategies**: Modern orchestration systems support configurable retry policies for failed tasks +- **Failure Isolation**: Well-designed systems prevent individual task failures from stopping independent workflow branches + +#### 7. Workflow Definition and Versioning + +- **Declarative Format**: Standard orchestration systems support workflow definitions in human-readable, declarative formats +- **Version Control**: Most platforms enable workflow definitions to be versioned in source control systems +- **Validation**: Many systems provide validation mechanisms for workflow definitions (e.g., linting) + +#### 8. Security and Access Control + +- **Authentication**: Enterprise orchestration systems typically provide authentication mechanisms for workflow access +- **Authorization**: Most platforms provide authorization controls for workflow execution +- **Secret Management**: Modern systems provide secure mechanisms for handling sensitive data and credentials + +#### 9. Scalability and Performance + +- **Multi-node Execution**: Scalable orchestration systems support executing workflows across multiple compute nodes +- **Concurrent Workflows**: Most platforms support running multiple workflows simultaneously + +#### 10. Integration and Interoperability + +- **Workflow Submission**: Standard systems provide various mechanisms for submitting and executing workflows + +### Implementation Considerations + +- Immutable workflow definitions can help ensure reproducible executions +- Comprehensive testing strategies may be valuable for workflow validation before production deployment +- Designing workflows with failure scenarios in mind can improve reliability +- Documenting workflow dependencies, data requirements, and expected outcomes supports operational clarity +- Monitoring and alerting for critical workflow execution paths can improve observability +- Resource quotas and limits may help prevent resource exhaustion +- Security best practices for credential management and access control are generally recommended +- Maintaining workflow execution history can support analysis and troubleshooting +- Workflow approval processes may be appropriate for production environments +- Infrastructure as code practices can support consistent orchestration system deployment and configuration diff --git a/docs/proposals/orchestration/technical-capabilities.md b/docs/proposals/orchestration/technical-capabilities.md new file mode 100644 index 0000000..34e5a72 --- /dev/null +++ b/docs/proposals/orchestration/technical-capabilities.md @@ -0,0 +1,148 @@ +## ๐Ÿ“‹ Technical Capabilities + +### Overview + +This section explores technical capabilities commonly found in modern orchestration systems that could support FFRD initiative flood risk analysis workflows. These capabilities include DAG-based workflow execution, container integration patterns, and operational features suitable for complex computational workflows. This exploration examines various implementation patterns and approaches available in contemporary orchestration platforms. + +### Capabilities Framework + +#### 1. Workflow Structure + +##### 1.1 Directed Acyclic Graph (DAG) Support + +- **Graph Definition**: Effective orchestration systems represent workflows as directed acyclic graphs with explicit task dependencies +- **Task Dependencies**: Systems should support expressing dependencies between tasks (e.g., Task B depends on Task A completion) +- **Parallel Execution**: Efficient systems execute independent tasks concurrently when resources allow +- **Conditional Execution**: Advanced systems support conditional task execution based on upstream task results or external conditions + +##### 1.2 Workflow Definition + +- **Declarative Format**: Well-designed systems support workflow definitions in human-readable, version-controllable formats +- **Reproducibility**: Reliable systems ensure identical workflow definitions produce deterministic execution behavior +- **Parameterization**: Flexible systems support parameterized workflows for different study areas, configurations, and datasets + +#### 2. FFRD Container Integration + +##### 2.1 Container Execution + +- **FFRD Base Image**: Compatible systems can execute containers built on FFRD base image specifications +- **HMS Containers**: Suitable systems can execute HEC-HMS containers with appropriate Java runtime requirements +- **RAS Containers**: Capable systems can execute HEC-RAS containers with computational dependencies +- **Conformance Containers**: Supporting systems can execute validation and conformance testing containers +- **Plugin Containers**: Extensible systems can execute custom FFRD-compliant analysis containers + +##### 2.2 Container Configuration + +- **Configuration Files**: Effective systems support passing JSON configuration files to containers as specified in FFRD standards +- **Environment Variables**: Compatible systems support setting required environment variables for FFRD containers +- **Command Line Arguments**: Standard systems support passing command line arguments to containers +- **Exit Code Handling**: Reliable systems properly interpret container exit codes and handle success/failure states + +#### 3. Volume Sharing and Data Management + +##### 3.1 Shared Storage + +- **Inter-task Data Sharing**: Effective systems provide mechanisms for tasks to share data through persistent storage +- **Volume Persistence**: Robust systems support volumes that persist beyond individual task execution +- **Storage Size Configuration**: Flexible systems allow specification of storage volume sizes (minimum 1GB, configurable up to hundreds of GB) + +##### 3.2 Data Access Patterns + +- **Read/Write Access**: Well-designed systems support both read-only and read-write volume access modes +- **Multiple Mount Points**: Flexible systems support mounting volumes at different paths within containers +- **Data Isolation**: Secure systems prevent unauthorized access to data between different workflow executions + +#### 4. Resource Allocation + +##### 4.1 Compute Resources + +- **CPU Allocation**: Capable systems support specifying CPU core requirements per task (minimum 0.1 cores, typical 1-8 cores) +- **Memory Allocation**: Standard systems support specifying memory requirements per task (minimum 512MB, typical 1GB-32GB) +- **Resource Enforcement**: Reliable systems enforce specified resource limits to prevent resource contention + +##### 4.2 Resource Constraints + +- **Resource Isolation**: Well-architected systems isolate resources between concurrent tasks +- **Resource Monitoring**: Monitoring-capable systems track actual resource usage against allocated limits +- **Resource Availability**: Intelligent systems queue tasks when insufficient resources are available + +#### 5. Logging and Observability + +##### 5.1 Execution Logging + +- **Container Logs**: Comprehensive systems capture and store all container stdout/stderr output +- **Workflow Progress**: Transparent systems provide visibility into workflow execution status and task completion +- **Log Association**: Well-organized systems associate logs with specific workflow runs and individual tasks +- **Log Retention**: Configurable systems retain logs for completed workflows for specified periods + +##### 5.2 Monitoring + +- **Task Status**: Monitoring systems report status of workflow tasks (pending, running, completed, failed) +- **Workflow History**: Historical systems maintain records of workflow executions + +#### 6. Error Handling and Recovery + +##### 6.1 Retry Mechanisms + +- **Configurable Retry**: Resilient systems support configurable retry policies for failed tasks +- **Retry Limits**: Safe systems support maximum retry attempt limits + +##### 6.2 Failure Handling + +- **Failure Isolation**: Robust systems prevent individual task failures from stopping independent workflow branches +- **Partial Completion**: Flexible systems support completing successful workflow branches when other branches fail +- **Failure Reporting**: Clear systems report which tasks failed and provide failure details +- **Manual Recovery**: Recoverable systems support manual intervention to recover from failures + +#### 7. Integration Capabilities + +##### 7.1 Data Sources + +- **S3 Integration**: Compatible systems support integration with S3-compatible object storage for input/output data +- **File System Access**: Flexible systems support mounting external file systems for data access +- **Network Access**: Connected systems support controlled network access for containers requiring external connectivity + +##### 7.2 Operational Integration + +- **Workflow Submission**: Operational systems provide mechanisms for submitting and executing workflows + +#### 8. Security and Access Control + +##### 8.1 Access Control + +- **Authentication**: Secure systems provide authentication mechanisms for workflow access +- **Authorization**: Controlled systems provide authorization controls for workflow execution +- **Credential Management**: Protected systems provide secure mechanisms for handling sensitive data and credentials + +### Operational Considerations + +#### Performance Expectations + +- **Concurrent Workflows**: Scalable systems support executing multiple independent workflows simultaneously +- **Multi-node Execution**: Distributed systems support executing workflows across multiple compute nodes + +#### Reliability Considerations + +- **System Availability**: Reliable systems provide high availability for workflow execution +- **Data Durability**: Durable systems ensure persistence of workflow outputs and execution logs +- **Recovery**: Resilient systems support recovery from system failures without losing workflow progress + +#### Compliance Considerations + +- **Audit Trail**: Compliant systems maintain complete audit trails of workflow executions +- **Data Governance**: Governance-aware systems support data governance requirements for FFRD data +- **Documentation**: Well-documented systems provide documentation for operational procedures and troubleshooting + +### Example Workflow Scenario + +A typical FFRD workflow might include: + +1. **Data Preparation**: Validate input configuration and download required datasets from S3 +1. **Model Execution**: Run HEC-HMS hydrologic models with specified parameters +1. **Post-Processing**: Process model outputs and generate analysis results +1. **Validation**: Run conformance tests on outputs +1. **Data Upload**: Upload results to designated S3 locations + +Orchestration systems supporting such workflows would execute these tasks in the correct dependency order, share data between tasks through persistent volumes, allocate appropriate compute resources, handle any task failures with retries, and provide complete logging and monitoring throughout the process. + +This exploration of technical capabilities demonstrates the range of features available in modern orchestration systems while highlighting the flexibility organizations have in selecting implementation approaches and technology choices that align with their specific needs and constraints. diff --git a/mkdocs.yml b/mkdocs.yml index 323dff2..51ef4fb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,6 +38,7 @@ nav: - Proposals: - proposals/conformance/conformance.md + - proposals/orchestration/orchestration.md - proposals/logging.md - Appendix: diff --git a/reference/orchestration/argo/reference.yaml b/reference/orchestration/argo/reference.yaml new file mode 100644 index 0000000..fd7de7a --- /dev/null +++ b/reference/orchestration/argo/reference.yaml @@ -0,0 +1,81 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-example- +spec: + entrypoint: main + volumeClaimTemplates: # Create a shared volume for the workflow + - metadata: + name: workdir + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi + templates: + - name: main + dag: + tasks: + - name: generate-number + template: generate-number + - name: process-numbers + dependencies: [generate-number] + template: process-numbers + - name: sum-results + dependencies: [process-numbers] + template: sum-results + + - name: generate-number + container: + image: alpine:3.18 + command: [sh, -c] + args: ["echo 5 > /work/number.txt"] + volumeMounts: + - name: workdir + mountPath: /work + + - name: process-numbers + parallelism: 2 # Run two steps at a time + steps: + - - name: process-number + template: process-number + withItems: # Iterate over this list of numbers + - 1 + - 2 + - 3 + - 4 + arguments: + parameters: + - name: item + value: "{{item}}" # Pass the item from the list to the process-number template + + - name: process-number + inputs: + parameters: + - name: item + container: + image: alpine:3.18 + command: [sh, -c] + args: + - | + num=$(cat /work/number.txt) + result=$((num + {{inputs.parameters.item}})) + echo $result > /work/result-{{inputs.parameters.item}}.txt + volumeMounts: + - name: workdir + mountPath: /work + + - name: sum-results + container: + image: alpine:3.18 + command: [sh, -c] + args: + - | + sum=0 + for file in /work/result-*.txt; do + sum=$((sum + $(cat $file))) + done + echo "Total sum: $sum" + volumeMounts: + - name: workdir + mountPath: /work \ No newline at end of file