From 081aaf98fb258816711e670eadfc985f72802a09 Mon Sep 17 00:00:00 2001 From: "Juan E. Arango Ossa" Date: Tue, 8 Jul 2025 15:15:45 -0400 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20stage=20settings=20fil?= =?UTF-8?q?es=20that=20are=20in=20s3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- isabl_cli/batch_systems/slurm.py | 1 + isabl_cli/settings.py | 4 +++ isabl_cli/utils.py | 50 ++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/isabl_cli/batch_systems/slurm.py b/isabl_cli/batch_systems/slurm.py index b674e5e..69db810 100644 --- a/isabl_cli/batch_systems/slurm.py +++ b/isabl_cli/batch_systems/slurm.py @@ -165,6 +165,7 @@ def submit_slurm_array( f"-o '{root}/log.%a' -e '{root}/err.%a' " f'-J "ISABL: {jobname}" --parsable {root}/in.sh' ) + click.secho(f"DEBUG Slurm command:\n {cmd}", fg="yellow") jobid = subprocess.check_output(cmd, shell=True).decode("utf-8").strip() # Job to clean job array rundir diff --git a/isabl_cli/settings.py b/isabl_cli/settings.py index c8622b2..2de13a9 100644 --- a/isabl_cli/settings.py +++ b/isabl_cli/settings.py @@ -18,6 +18,7 @@ import six from isabl_cli import exceptions +from isabl_cli import utils _DEFAULTS = { "SUBMIT_ANALYSES": "isabl_cli.batch_systems.submit_local", @@ -257,6 +258,9 @@ def _validate(default, setting, attr): elif attr in import_strings: # coerce import strings into object setting = perform_import(setting, attr) + if isinstance(setting, str) and setting.startswith("s3://"): + setting = utils.stage_s3_file(setting) + if setting == NotImplemented: errors.append(f"Missing required setting: '{attr}'") diff --git a/isabl_cli/utils.py b/isabl_cli/utils.py index b1d9869..d32d3e6 100644 --- a/isabl_cli/utils.py +++ b/isabl_cli/utils.py @@ -344,3 +344,53 @@ def first_matching_file(directory, pattern, exclude=None): return str(next(matching_files)) except StopIteration: # pragma: no cover raise FileNotFoundError(f"No file matching pattern '{pattern}' found in '{directory}'") + + + + +def stage_s3_file(s3_url, local_dir="/scratch"): + """ + Download an S3 file to a local scratch directory, if it doesn't already exist. + + Args: + s3_url (str): The S3 URL of the file (e.g., "s3://bucket-name/path/to/file"). + local_dir (str): The local directory where the file will be downloaded. + + Returns: + str: The local path to the downloaded file. + + Raises: + ValueError: If the S3 URL is invalid. + FileNotFoundError: If the file could not be downloaded. + """ + if not s3_url.startswith("s3://"): + raise ValueError(f"Invalid S3 URL: {s3_url}") + + s3_parts = s3_url[5:].split("/", 1) + if len(s3_parts) != 2: + raise ValueError(f"Invalid S3 URL format: {s3_url}") + + bucket_name, object_key = s3_parts + + # Create the scratch directory if it doesn't exist + scratch_path = Path(local_dir) + scratch_path.mkdir(parents=True, exist_ok=True) + local_file_path = scratch_path / Path(object_key).name + + if local_file_path.exists(): + print(f"File already exists in scratch: {local_file_path}") + return str(local_file_path) + + # Download the file from S3 + s3 = boto3.client("s3") + try: + print(f"Downloading {s3_url} to {local_file_path}...") + s3.download_file(bucket_name, object_key, str(local_file_path)) + except Exception as e: + raise FileNotFoundError(f"Failed to download {s3_url}: {e}") + + if not local_file_path.exists() or local_file_path.stat().st_size == 0: + raise FileNotFoundError(f"File {local_file_path} was not fully downloaded.") + + print(f"File successfully downloaded to {local_file_path}") + return str(local_file_path) From 2d062988043522962d172fba59bfca9bd6901ebf Mon Sep 17 00:00:00 2001 From: "Juan E. Arango Ossa" Date: Tue, 8 Jul 2025 15:16:35 -0400 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=8E=A8=20fix=20whitespace?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- isabl_cli/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/isabl_cli/utils.py b/isabl_cli/utils.py index d32d3e6..b077333 100644 --- a/isabl_cli/utils.py +++ b/isabl_cli/utils.py @@ -345,9 +345,6 @@ def first_matching_file(directory, pattern, exclude=None): except StopIteration: # pragma: no cover raise FileNotFoundError(f"No file matching pattern '{pattern}' found in '{directory}'") - - - def stage_s3_file(s3_url, local_dir="/scratch"): """ Download an S3 file to a local scratch directory, if it doesn't already exist.