diff --git a/README.md b/README.md index c771847..665d35c 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ Groundhog automatically manages remote environments (powered by [uv](https://doc **Key concepts:** - `@hog.function()` - Configures a function to run on a Globus Compute endpoint. Decorator kwargs (like `endpoint`, `account`) become the default `user_endpoint_config`. -- `@hog.harness()` - Marks a local entry point that orchestrates remote calls via `.remote()` or `.submit()`. -- The desired remote Python environment (version and dependencies) is specified alongside your code via [PEP 723](https://peps.python.org/pep-0723/) metadata. +- `@hog.harness()` - Marks a local entry point that orchestrates remote calls via `.remote()` or `.submit()`. Can also parse CLI arguments ([example](https://groundhog-hpc.readthedocs.io/en/latest/examples/parameterized-harness/)). +- The desired remote Python environment (version and dependencies) is specified alongside your code via [PEP 723](https://peps.python.org/pep-0723/) metadata. **You don't manage any remote state.** ```python # /// script diff --git a/examples/hello_world.py b/examples/hello_world.py index a0bd8b6..b020418 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -3,7 +3,7 @@ # dependencies = [] # # [tool.uv] -# exclude-newer = "2025-12-02T19:48:40Z" +# exclude-newer = "2026-02-02T19:48:40Z" # # [tool.hog.anvil] # Anvil Multi-User Globus Compute Endpoint # endpoint = "5aafb4c1-27b2-40d8-a038-a0277611868f" diff --git a/src/groundhog_hpc/templates/shell_command.sh.jinja b/src/groundhog_hpc/templates/shell_command.sh.jinja index 1a4f865..610a858 100644 --- a/src/groundhog_hpc/templates/shell_command.sh.jinja +++ b/src/groundhog_hpc/templates/shell_command.sh.jinja @@ -1,6 +1,6 @@ set -euo pipefail -# Cleanup temporary files on exit +# Cleanup temporary files on exit (env is preserved for reuse) trap 'rm -f {{ user_script_name }}.py {{ runner_name }}.py {{ script_name }}.in {{ script_name }}.out' EXIT if command -v uv &> /dev/null; then @@ -33,6 +33,13 @@ fi mkdir -p "$UV_CACHE_DIR" "$UV_PYTHON_INSTALL_DIR" {% endraw %} +# Environment reuse: compute path from hash + version +ENV_HASH="{{ env_hash }}" +GROUNDHOG_VERSION="{{ groundhog_version }}" +{% raw %} +ENV_DIR="${{GROUNDHOG_CACHE_BASE}}/${{USER:-$(id -un)}}/groundhog-envs/${{ENV_HASH}}-${{GROUNDHOG_VERSION}}" +{% endraw %} + # Propagate log level to remote environment {% if log_level %} # Local override - use value from dispatching environment @@ -56,9 +63,43 @@ cat > {{ script_name }}.in << 'PAYLOAD_EOF' {{ payload }} PAYLOAD_EOF -"$UV_BIN" run --with {{ version_spec }} \ - --exclude-newer-package groundhog-hpc={{ groundhog_timestamp }} \ - {{ runner_name }}.py +# Check if environment exists; create if not +if [ -d "$ENV_DIR" ]; then + # Environment exists - reuse it + {% raw %} + if [ "${{GROUNDHOG_LOG_LEVEL}}" = "INFO" ] || [ "${{GROUNDHOG_LOG_LEVEL}}" = "DEBUG" ]; then + echo "INFO: Using environment $ENV_DIR" >&2 + fi + {% endraw %} +else + # Create new environment + {% raw %} + if [ "${{GROUNDHOG_LOG_LEVEL}}" = "INFO" ] || [ "${{GROUNDHOG_LOG_LEVEL}}" = "DEBUG" ]; then + echo "INFO: Creating environment $ENV_DIR" >&2 + fi + {% endraw %} + + "$UV_BIN" venv "$ENV_DIR"{% if requires_python %} --python "{{ requires_python }}"{% endif %} + + # Install dependencies + "$UV_BIN" pip install --python "$ENV_DIR/bin/python" \ + {% if exclude_newer %}--exclude-newer "{{ exclude_newer }}" {% endif %}\ + --exclude-newer-package groundhog-hpc={{ groundhog_timestamp }} \ + {% for dep in dependencies %}"{{ dep }}" {% endfor %}{{ version_spec }} + + # Write metadata for debugging + cat > "$ENV_DIR/groundhog-meta.json" << 'META_EOF' +{{ '{{' }} + "created_at": "{{ groundhog_timestamp }}", + "requires_python": "{{ requires_python }}", + "dependencies": {{ dependencies | tojson }}, + "groundhog_version": "{{ groundhog_version }}" +{{ '}}' }} +META_EOF +fi + +# Run using the cached environment's Python directly (bypasses uv resolution) +"$ENV_DIR/bin/python" {{ runner_name }}.py echo "__GROUNDHOG_RESULT__" cat {{ script_name }}.out diff --git a/src/groundhog_hpc/templating.py b/src/groundhog_hpc/templating.py index 4303009..f59a4b3 100644 --- a/src/groundhog_hpc/templating.py +++ b/src/groundhog_hpc/templating.py @@ -7,8 +7,10 @@ 3. Execute the runner with uv, which imports the user script, calls the function, and serializes results """ +import json import logging import os +import re import uuid from datetime import datetime, timezone from hashlib import sha1 @@ -16,6 +18,7 @@ from jinja2 import Environment, FileSystemLoader +from groundhog_hpc.configuration.models import Pep723Metadata from groundhog_hpc.configuration.pep723 import read_pep723, write_pep723 from groundhog_hpc.utils import get_groundhog_version_spec, path_to_module_name @@ -31,6 +34,34 @@ def escape_braces(text: str) -> str: return text.replace("{", "{{").replace("}", "}}") +def compute_env_hash(metadata: Pep723Metadata) -> str: + """Compute a deterministic 8-character hash for environment caching. + + The hash covers requires-python, sorted dependencies, and [tool.uv] + settings. Endpoint configs (tool.hog.*) are intentionally excluded — + a script can have many endpoints and worker_init content is not + always environment-affecting. + + Args: + metadata: PEP 723 metadata from the user script + + Returns: + 8-character hex hash string + """ + hash_data: dict = { + "requires_python": metadata.requires_python, + "dependencies": sorted(metadata.dependencies), + } + + if metadata.tool and metadata.tool.uv: + uv_dict = metadata.tool.uv.model_dump(by_alias=True, exclude_none=True) + if uv_dict: + hash_data["tool_uv"] = uv_dict + + canonical = json.dumps(hash_data, sort_keys=True, separators=(",", ":")) + return sha1(canonical.encode("utf-8")).hexdigest()[:8] + + def template_shell_command(script_path: str, function_name: str, payload: str) -> str: """Generate a shell command to execute a user function on a remote endpoint. @@ -60,6 +91,15 @@ def template_shell_command(script_path: str, function_name: str, payload: str) - metadata = read_pep723(user_script) pep723_metadata = write_pep723(metadata) if metadata else "" + if metadata: + env_hash = compute_env_hash(metadata) + else: + logger.warning( + "Script has no PEP 723 metadata. Environment hash based on script content; " + "environment may change unexpectedly between runs." + ) + env_hash = _script_hash_prefix(user_script) + script_hash = _script_hash_prefix(user_script) script_basename = _extract_script_basename(script_path) random_suffix = uuid.uuid4().hex[:8] @@ -74,6 +114,14 @@ def template_shell_command(script_path: str, function_name: str, payload: str) - version_spec = get_groundhog_version_spec() logger.debug(f"Using groundhog version spec: {version_spec}") + semver_match = re.search(r"==([0-9][^\s]*)", version_spec) + git_hash_match = re.search(r"@([a-f0-9]+)$", version_spec) + if semver_match: + groundhog_version = semver_match.group(1) + elif git_hash_match: + groundhog_version = git_hash_match.group(1) + else: + groundhog_version = _script_hash_prefix(version_spec) # Generate timestamp for groundhog-hpc exclude-newer override # This allows groundhog to bypass user's exclude-newer restrictions @@ -98,6 +146,7 @@ def template_shell_command(script_path: str, function_name: str, payload: str) - # Read local log level (None if not set) local_log_level = os.getenv("GROUNDHOG_LOG_LEVEL") if local_log_level: + local_log_level = local_log_level.upper() logger.debug(f"Propagating log level to remote: {local_log_level}") # Render shell command @@ -112,6 +161,13 @@ def template_shell_command(script_path: str, function_name: str, payload: str) - payload=payload, log_level=local_log_level, groundhog_timestamp=groundhog_timestamp, + env_hash=env_hash, + groundhog_version=groundhog_version, + requires_python=metadata.requires_python if metadata else "", + dependencies=metadata.dependencies if metadata else [], + exclude_newer=metadata.tool.uv.exclude_newer + if metadata and metadata.tool and metadata.tool.uv + else None, ) logger.debug(f"Generated shell command ({len(shell_command_string)} chars)") diff --git a/tests/test_templating.py b/tests/test_templating.py index 61154b5..9e825a6 100644 --- a/tests/test_templating.py +++ b/tests/test_templating.py @@ -134,8 +134,9 @@ def foo(): # Should NOT contain --managed-python (it's now in [tool.uv]) assert "--managed-python" not in shell_command - # Should still have --with for version matching - assert "--with" in shell_command + # version_spec is passed to uv pip install (not via --with since we no longer use uv run) + assert "--with" not in shell_command + assert '"$UV_BIN" pip install' in shell_command def test_generates_valid_shell_command(self, tmp_path): """Test that a valid shell command string is generated.""" @@ -223,8 +224,8 @@ def func(): # Payload should be rendered directly in the command (via Jinja2) assert test_payload in shell_command - def test_includes_uv_run_command(self, tmp_path): - """Test that the shell command uses uv run.""" + def test_includes_uv_commands(self, tmp_path): + """Test that the shell command uses uv for env creation.""" script_path = tmp_path / "script.py" script_content = """# /// script # requires-python = ">=3.12" @@ -240,9 +241,11 @@ def func(): shell_command = template_shell_command(str(script_path), "func", "test_payload") - # Check for uv installation and run command + # Check for uv installation assert "uv.find_uv_bin()" in shell_command - assert '"$UV_BIN" run' in shell_command + # Check for uv venv and pip install (for env creation) + assert '"$UV_BIN" venv' in shell_command + assert '"$UV_BIN" pip install' in shell_command def test_escapes_user_code_curly_braces(self, tmp_path): """Test that curly braces in user code are escaped in final shell command.""" @@ -378,6 +381,300 @@ def func(): assert match, "exclude-newer-package timestamp should be in ISO 8601 format" +class TestComputeEnvHash: + """Test environment hash computation.""" + + def test_hash_is_deterministic(self, tmp_path): + """Same metadata produces same hash.""" + from groundhog_hpc.configuration.models import ( + Pep723Metadata, + ToolMetadata, + UvMetadata, + ) + from groundhog_hpc.templating import compute_env_hash + + metadata = Pep723Metadata( + requires_python=">=3.11,<3.12", + dependencies=["numpy", "pandas"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + + hash1 = compute_env_hash(metadata) + hash2 = compute_env_hash(metadata) + + assert hash1 == hash2 + assert len(hash1) == 8 + + def test_hash_changes_with_different_dependencies(self, tmp_path): + """Different dependencies produce different hashes.""" + from groundhog_hpc.configuration.models import ( + Pep723Metadata, + ToolMetadata, + UvMetadata, + ) + from groundhog_hpc.templating import compute_env_hash + + metadata1 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + metadata2 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy", "pandas"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + + hash1 = compute_env_hash(metadata1) + hash2 = compute_env_hash(metadata2) + + assert hash1 != hash2 + + def test_hash_independent_of_dependency_order(self, tmp_path): + """Dependencies in different order produce same hash (sorted internally).""" + from groundhog_hpc.configuration.models import ( + Pep723Metadata, + ToolMetadata, + UvMetadata, + ) + from groundhog_hpc.templating import compute_env_hash + + metadata1 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["pandas", "numpy", "scipy"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + metadata2 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy", "scipy", "pandas"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + + hash1 = compute_env_hash(metadata1) + hash2 = compute_env_hash(metadata2) + + assert hash1 == hash2 + + def test_hash_changes_with_different_uv_settings(self, tmp_path): + """Different [tool.uv] settings produce different hashes.""" + from groundhog_hpc.configuration.models import ( + Pep723Metadata, + ToolMetadata, + UvMetadata, + ) + from groundhog_hpc.templating import compute_env_hash + + metadata1 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-01-01T00:00:00Z")), + ) + metadata2 = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=ToolMetadata(uv=UvMetadata(exclude_newer="2025-06-01T00:00:00Z")), + ) + + hash1 = compute_env_hash(metadata1) + hash2 = compute_env_hash(metadata2) + + assert hash1 != hash2 + + def test_hash_works_without_tool_uv(self, tmp_path): + """Hash works when tool is None.""" + from groundhog_hpc.configuration.models import Pep723Metadata + from groundhog_hpc.templating import compute_env_hash + + metadata = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=None, + ) + + env_hash = compute_env_hash(metadata) + + assert len(env_hash) == 8 + assert env_hash.isalnum() + + def test_hash_unchanged_by_tool_hog_config(self, tmp_path): + """tool.hog.* endpoint configs do not affect the environment hash. + + The hash is based only on Python version, dependencies, and [tool.uv] + settings. Endpoint-specific config (worker_init, endpoint UUIDs, etc.) + is excluded because a single script can have many endpoints, and + worker_init content (e.g., 'module load cuda') is not always + env-affecting. + """ + from groundhog_hpc.configuration.models import ( + EndpointConfig, + Pep723Metadata, + ToolMetadata, + UvMetadata, + ) + from groundhog_hpc.templating import compute_env_hash + + shared_uv = UvMetadata(exclude_newer="2025-01-01T00:00:00Z") + + metadata_no_hog = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=ToolMetadata(uv=shared_uv), + ) + metadata_with_hog = Pep723Metadata( + requires_python=">=3.11", + dependencies=["numpy"], + tool=ToolMetadata( + uv=shared_uv, + hog={ + "my_cluster": EndpointConfig( + endpoint="aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + worker_init="export UV_EXTRA_INDEX_URL=https://private.pypi/simple", + ) + }, + ), + ) + + hash1 = compute_env_hash(metadata_no_hog) + hash2 = compute_env_hash(metadata_with_hog) + + assert hash1 == hash2 + + +class TestEnvReuseTemplating: + """Test environment reuse in shell command templating.""" + + def test_shell_command_includes_env_hash(self, tmp_path): + """Shell command includes the environment hash for caching.""" + script_path = tmp_path / "script.py" + script_content = """# /// script +# requires-python = ">=3.11" +# dependencies = ["numpy"] +# /// + +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert "ENV_HASH=" in shell_command + + def test_shell_command_includes_env_dir_construction(self, tmp_path): + """Shell command constructs ENV_DIR from hash and version.""" + script_path = tmp_path / "script.py" + script_content = """# /// script +# requires-python = ">=3.11" +# dependencies = ["numpy"] +# /// + +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert "groundhog-envs" in shell_command + assert "ENV_DIR=" in shell_command + + def test_shell_command_checks_env_existence(self, tmp_path): + """Shell command checks if environment directory exists.""" + script_path = tmp_path / "script.py" + script_content = """# /// script +# requires-python = ">=3.11" +# dependencies = [] +# /// + +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert 'if [ -d "$ENV_DIR" ]' in shell_command + assert '"$UV_BIN" venv' in shell_command + assert '"$UV_BIN" pip install' in shell_command + + def test_shell_command_runs_python_directly(self, tmp_path): + """Shell command runs Python directly instead of uv run.""" + script_path = tmp_path / "script.py" + script_content = """# /// script +# requires-python = ">=3.11" +# dependencies = [] +# /// + +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert '"$ENV_DIR/bin/python"' in shell_command + assert '"$UV_BIN" run' not in shell_command + + def test_shell_command_writes_metadata_file(self, tmp_path): + """Shell command writes groundhog-meta.json when creating env.""" + script_path = tmp_path / "script.py" + script_content = """# /// script +# requires-python = ">=3.11" +# dependencies = ["numpy", "pandas"] +# /// + +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert "groundhog-meta.json" in shell_command + assert '"requires_python":' in shell_command + assert '"dependencies":' in shell_command + assert '"groundhog_version":' in shell_command + + def test_no_pep723_metadata_uses_script_hash_with_warning(self, tmp_path, caplog): + """Scripts without PEP 723 metadata fall back to script hash with warning.""" + import logging + + script_path = tmp_path / "no_metadata.py" + script_content = """ +import groundhog_hpc as hog + +@hog.function() +def func(): + return 1 +""" + script_path.write_text(script_content) + + with caplog.at_level(logging.WARNING): + shell_command = template_shell_command(str(script_path), "func", "payload") + + assert "ENV_HASH=" in shell_command + assert any( + "no pep 723 metadata" in record.message.lower() + or "environment may change" in record.message.lower() + for record in caplog.records + ) + + class TestDottedQualnames: """Test that templating handles dotted qualnames (class methods)."""