Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ Groundhog automatically manages remote environments (powered by [uv](https://doc

**Key concepts:**
- `@hog.function()` - Configures a function to run on a Globus Compute endpoint. Decorator kwargs (like `endpoint`, `account`) become the default `user_endpoint_config`.
- `@hog.harness()` - Marks a local entry point that orchestrates remote calls via `.remote()` or `.submit()`.
- The desired remote Python environment (version and dependencies) is specified alongside your code via [PEP 723](https://peps.python.org/pep-0723/) metadata.
- `@hog.harness()` - Marks a local entry point that orchestrates remote calls via `.remote()` or `.submit()`. Can also parse CLI arguments ([example](https://groundhog-hpc.readthedocs.io/en/latest/examples/parameterized-harness/)).
- The desired remote Python environment (version and dependencies) is specified alongside your code via [PEP 723](https://peps.python.org/pep-0723/) metadata. **You don't manage any remote state.**

```python
# /// script
Expand Down
2 changes: 1 addition & 1 deletion examples/hello_world.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# dependencies = []
#
# [tool.uv]
# exclude-newer = "2025-12-02T19:48:40Z"
# exclude-newer = "2026-02-02T19:48:40Z"
#
# [tool.hog.anvil] # Anvil Multi-User Globus Compute Endpoint
# endpoint = "5aafb4c1-27b2-40d8-a038-a0277611868f"
Expand Down
49 changes: 45 additions & 4 deletions src/groundhog_hpc/templates/shell_command.sh.jinja
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set -euo pipefail

# Cleanup temporary files on exit
# Cleanup temporary files on exit (env is preserved for reuse)
trap 'rm -f {{ user_script_name }}.py {{ runner_name }}.py {{ script_name }}.in {{ script_name }}.out' EXIT

if command -v uv &> /dev/null; then
Expand Down Expand Up @@ -33,6 +33,13 @@ fi
mkdir -p "$UV_CACHE_DIR" "$UV_PYTHON_INSTALL_DIR"
{% endraw %}

# Environment reuse: compute path from hash + version
ENV_HASH="{{ env_hash }}"
GROUNDHOG_VERSION="{{ groundhog_version }}"
{% raw %}
ENV_DIR="${{GROUNDHOG_CACHE_BASE}}/${{USER:-$(id -un)}}/groundhog-envs/${{ENV_HASH}}-${{GROUNDHOG_VERSION}}"
{% endraw %}

# Propagate log level to remote environment
{% if log_level %}
# Local override - use value from dispatching environment
Expand All @@ -56,9 +63,43 @@ cat > {{ script_name }}.in << 'PAYLOAD_EOF'
{{ payload }}
PAYLOAD_EOF

"$UV_BIN" run --with {{ version_spec }} \
--exclude-newer-package groundhog-hpc={{ groundhog_timestamp }} \
{{ runner_name }}.py
# Check if environment exists; create if not
if [ -d "$ENV_DIR" ]; then
# Environment exists - reuse it
{% raw %}
if [ "${{GROUNDHOG_LOG_LEVEL}}" = "INFO" ] || [ "${{GROUNDHOG_LOG_LEVEL}}" = "DEBUG" ]; then
echo "INFO: Using environment $ENV_DIR" >&2
fi
{% endraw %}
else
# Create new environment
{% raw %}
if [ "${{GROUNDHOG_LOG_LEVEL}}" = "INFO" ] || [ "${{GROUNDHOG_LOG_LEVEL}}" = "DEBUG" ]; then
echo "INFO: Creating environment $ENV_DIR" >&2
fi
{% endraw %}

"$UV_BIN" venv "$ENV_DIR"{% if requires_python %} --python "{{ requires_python }}"{% endif %}

# Install dependencies
"$UV_BIN" pip install --python "$ENV_DIR/bin/python" \
{% if exclude_newer %}--exclude-newer "{{ exclude_newer }}" {% endif %}\
--exclude-newer-package groundhog-hpc={{ groundhog_timestamp }} \
{% for dep in dependencies %}"{{ dep }}" {% endfor %}{{ version_spec }}

# Write metadata for debugging
cat > "$ENV_DIR/groundhog-meta.json" << 'META_EOF'
{{ '{{' }}
"created_at": "{{ groundhog_timestamp }}",
"requires_python": "{{ requires_python }}",
"dependencies": {{ dependencies | tojson }},
"groundhog_version": "{{ groundhog_version }}"
{{ '}}' }}
META_EOF
fi

# Run using the cached environment's Python directly (bypasses uv resolution)
"$ENV_DIR/bin/python" {{ runner_name }}.py

echo "__GROUNDHOG_RESULT__"
cat {{ script_name }}.out
56 changes: 56 additions & 0 deletions src/groundhog_hpc/templating.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@
3. Execute the runner with uv, which imports the user script, calls the function, and serializes results
"""

import json
import logging
import os
import re
import uuid
from datetime import datetime, timezone
from hashlib import sha1
from pathlib import Path

from jinja2 import Environment, FileSystemLoader

from groundhog_hpc.configuration.models import Pep723Metadata
from groundhog_hpc.configuration.pep723 import read_pep723, write_pep723
from groundhog_hpc.utils import get_groundhog_version_spec, path_to_module_name

Expand All @@ -31,6 +34,34 @@ def escape_braces(text: str) -> str:
return text.replace("{", "{{").replace("}", "}}")


def compute_env_hash(metadata: Pep723Metadata) -> str:
"""Compute a deterministic 8-character hash for environment caching.

The hash covers requires-python, sorted dependencies, and [tool.uv]
settings. Endpoint configs (tool.hog.*) are intentionally excluded —
a script can have many endpoints and worker_init content is not
always environment-affecting.

Args:
metadata: PEP 723 metadata from the user script

Returns:
8-character hex hash string
"""
hash_data: dict = {
"requires_python": metadata.requires_python,
"dependencies": sorted(metadata.dependencies),
}

if metadata.tool and metadata.tool.uv:
uv_dict = metadata.tool.uv.model_dump(by_alias=True, exclude_none=True)
if uv_dict:
hash_data["tool_uv"] = uv_dict

canonical = json.dumps(hash_data, sort_keys=True, separators=(",", ":"))
return sha1(canonical.encode("utf-8")).hexdigest()[:8]


def template_shell_command(script_path: str, function_name: str, payload: str) -> str:
"""Generate a shell command to execute a user function on a remote endpoint.

Expand Down Expand Up @@ -60,6 +91,15 @@ def template_shell_command(script_path: str, function_name: str, payload: str) -
metadata = read_pep723(user_script)
pep723_metadata = write_pep723(metadata) if metadata else ""

if metadata:
env_hash = compute_env_hash(metadata)
else:
logger.warning(
"Script has no PEP 723 metadata. Environment hash based on script content; "
"environment may change unexpectedly between runs."
)
env_hash = _script_hash_prefix(user_script)

script_hash = _script_hash_prefix(user_script)
script_basename = _extract_script_basename(script_path)
random_suffix = uuid.uuid4().hex[:8]
Expand All @@ -74,6 +114,14 @@ def template_shell_command(script_path: str, function_name: str, payload: str) -

version_spec = get_groundhog_version_spec()
logger.debug(f"Using groundhog version spec: {version_spec}")
semver_match = re.search(r"==([0-9][^\s]*)", version_spec)
git_hash_match = re.search(r"@([a-f0-9]+)$", version_spec)
if semver_match:
groundhog_version = semver_match.group(1)
elif git_hash_match:
groundhog_version = git_hash_match.group(1)
else:
groundhog_version = _script_hash_prefix(version_spec)

# Generate timestamp for groundhog-hpc exclude-newer override
# This allows groundhog to bypass user's exclude-newer restrictions
Expand All @@ -98,6 +146,7 @@ def template_shell_command(script_path: str, function_name: str, payload: str) -
# Read local log level (None if not set)
local_log_level = os.getenv("GROUNDHOG_LOG_LEVEL")
if local_log_level:
local_log_level = local_log_level.upper()
logger.debug(f"Propagating log level to remote: {local_log_level}")

# Render shell command
Expand All @@ -112,6 +161,13 @@ def template_shell_command(script_path: str, function_name: str, payload: str) -
payload=payload,
log_level=local_log_level,
groundhog_timestamp=groundhog_timestamp,
env_hash=env_hash,
groundhog_version=groundhog_version,
requires_python=metadata.requires_python if metadata else "",
dependencies=metadata.dependencies if metadata else [],
exclude_newer=metadata.tool.uv.exclude_newer
if metadata and metadata.tool and metadata.tool.uv
else None,
)

logger.debug(f"Generated shell command ({len(shell_command_string)} chars)")
Expand Down
Loading
Loading