Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions isabl_cli/batch_systems/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def submit_slurm_array(
f"-o '{root}/log.%a' -e '{root}/err.%a' "
f'-J "ISABL: {jobname}" --parsable {root}/in.sh'
)
click.secho(f"DEBUG Slurm command:\n {cmd}", fg="yellow")
jobid = subprocess.check_output(cmd, shell=True).decode("utf-8").strip()

# Job to clean job array rundir
Expand Down
4 changes: 4 additions & 0 deletions isabl_cli/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import six

from isabl_cli import exceptions
from isabl_cli import utils

_DEFAULTS = {
"SUBMIT_ANALYSES": "isabl_cli.batch_systems.submit_local",
Expand Down Expand Up @@ -257,6 +258,9 @@ def _validate(default, setting, attr):
elif attr in import_strings: # coerce import strings into object
setting = perform_import(setting, attr)

if isinstance(setting, str) and setting.startswith("s3://"):
setting = utils.stage_s3_file(setting)

if setting == NotImplemented:
errors.append(f"Missing required setting: '{attr}'")

Expand Down
47 changes: 47 additions & 0 deletions isabl_cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,50 @@ def first_matching_file(directory, pattern, exclude=None):
return str(next(matching_files))
except StopIteration: # pragma: no cover
raise FileNotFoundError(f"No file matching pattern '{pattern}' found in '{directory}'")

def stage_s3_file(s3_url, local_dir="/scratch"):
"""
Download an S3 file to a local scratch directory, if it doesn't already exist.

Args:
s3_url (str): The S3 URL of the file (e.g., "s3://bucket-name/path/to/file").
local_dir (str): The local directory where the file will be downloaded.

Returns:
str: The local path to the downloaded file.

Raises:
ValueError: If the S3 URL is invalid.
FileNotFoundError: If the file could not be downloaded.
"""
if not s3_url.startswith("s3://"):
raise ValueError(f"Invalid S3 URL: {s3_url}")

s3_parts = s3_url[5:].split("/", 1)
if len(s3_parts) != 2:
raise ValueError(f"Invalid S3 URL format: {s3_url}")

bucket_name, object_key = s3_parts

# Create the scratch directory if it doesn't exist
scratch_path = Path(local_dir)
scratch_path.mkdir(parents=True, exist_ok=True)
local_file_path = scratch_path / Path(object_key).name

if local_file_path.exists():
print(f"File already exists in scratch: {local_file_path}")
return str(local_file_path)

# Download the file from S3
s3 = boto3.client("s3")
try:
print(f"Downloading {s3_url} to {local_file_path}...")
s3.download_file(bucket_name, object_key, str(local_file_path))
except Exception as e:
raise FileNotFoundError(f"Failed to download {s3_url}: {e}")

if not local_file_path.exists() or local_file_path.stat().st_size == 0:
raise FileNotFoundError(f"File {local_file_path} was not fully downloaded.")

print(f"File successfully downloaded to {local_file_path}")
return str(local_file_path)