From 7f6ce4bbbd5a350bbbd3bfc2651759c715073e52 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 13:52:39 +1000 Subject: [PATCH 1/7] fix(workflow): update for new cpg-flow --- cpg_flow_test/workflow.py | 2 +- pyproject.toml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpg_flow_test/workflow.py b/cpg_flow_test/workflow.py index 5e05594..6932a9a 100644 --- a/cpg_flow_test/workflow.py +++ b/cpg_flow_test/workflow.py @@ -22,7 +22,7 @@ def run_cpg_flow(dry_run=False): # Inserting after the "defaults" config, but before user configs: # set_config_paths(config_paths[:1] + [CONFIG_FILE] + config_paths[1:]) set_config_paths(config_paths) - run_workflow(stages=workflow, dry_run=dry_run) + run_workflow(name='test_workflows_shared', stages=workflow, dry_run=dry_run) def validate_batch_workflow(): diff --git a/pyproject.toml b/pyproject.toml index 3dc727e..0922d17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,9 @@ readme = "README.md" requires-python = ">=3.10,<3.11" dependencies = [ "analysis-runner>=3.2.2", - "cpg-flow>=0.2.1", - "cpg-utils>=5.1.1", - "hail>=0.2.133", + "cpg-flow~=1.2", + "cpg-utils>=5.4", + "hail>=0.2.137", "loguru>=0.7.3", ] From bfd5a387bb9d6b0fcaa55c2b7362cfebcb2535cd Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:16:15 +1000 Subject: [PATCH 2/7] rewrite workflows --- cpg_flow_test/jobs/__init__.py | 6 - cpg_flow_test/jobs/build_pyramid.py | 8 +- cpg_flow_test/jobs/cumulative_calc.py | 7 +- cpg_flow_test/jobs/filter_evens.py | 9 +- cpg_flow_test/jobs/first_n_primes.py | 7 +- cpg_flow_test/jobs/iterative_digit_sum.py | 7 +- cpg_flow_test/jobs/say_hi.py | 7 +- cpg_flow_test/stages.py | 39 ++--- pyproject.toml | 7 +- requirements-dev.txt | 190 +++++++++++++++------ requirements.txt | 193 +++++++++++++++++----- 11 files changed, 338 insertions(+), 142 deletions(-) diff --git a/cpg_flow_test/jobs/__init__.py b/cpg_flow_test/jobs/__init__.py index d9966b5..e69de29 100644 --- a/cpg_flow_test/jobs/__init__.py +++ b/cpg_flow_test/jobs/__init__.py @@ -1,6 +0,0 @@ -from jobs.build_pyramid import build_pyramid -from jobs.cumulative_calc import cumulative_calc -from jobs.filter_evens import filter_evens -from jobs.first_n_primes import first_n_primes -from jobs.iterative_digit_sum import iterative_digit_sum -from jobs.say_hi import say_hi diff --git a/cpg_flow_test/jobs/build_pyramid.py b/cpg_flow_test/jobs/build_pyramid.py index 43fafe2..b23138e 100644 --- a/cpg_flow_test/jobs/build_pyramid.py +++ b/cpg_flow_test/jobs/build_pyramid.py @@ -1,24 +1,27 @@ from typing import Any from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def build_pyramid( - b: Batch, +def build_pyramid_job( sequencing_groups: list[SequencingGroup], input_files: dict[str, Any], job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = 'Build A Pyramid' # Compute the no evens list for each sequencing group sg_jobs = [] sg_output_files = [] for sg in sequencing_groups: # type: ignore job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs | {'sequencing_group': sg.id}) + job.image(config_retrieve['workflow', 'driver_image']) no_evens_input_file_path = input_files[sg.id]['no_evens'] no_evens_input_file = b.read_input(no_evens_input_file_path) @@ -52,6 +55,7 @@ def build_pyramid( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs | {'tool': 'cat'}) + job.image(config_retrieve['workflow', 'driver_image']) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.pyramid}') diff --git a/cpg_flow_test/jobs/cumulative_calc.py b/cpg_flow_test/jobs/cumulative_calc.py index 6b71fac..6a79321 100644 --- a/cpg_flow_test/jobs/cumulative_calc.py +++ b/cpg_flow_test/jobs/cumulative_calc.py @@ -1,18 +1,21 @@ from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def cumulative_calc( - b: Batch, +def cumulative_calc_job( sequencing_group: SequencingGroup, input_file_path: str, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Cumulative Calc: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) primes_path = b.read_input(input_file_path) cmd = f""" diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index 1199e32..22cc3a4 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -2,19 +2,22 @@ from cpg_flow.stage import Stage, StageInput from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def filter_evens( - b: Batch, +def filter_evens_job( sequencing_groups: list[SequencingGroup], input_files: dict[str, dict[str, Any]], job_attrs: dict[str, str], sg_outputs: dict[str, dict[str, Any]], output_file_path: str, ) -> list[Job]: + + b = get_batch() title = 'Filter Evens' # Compute the no evens list for each sequencing group @@ -22,6 +25,7 @@ def filter_evens( sg_output_files = [] for sg in sequencing_groups: # type: ignore job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) input_file_path = input_files[sg.id]['cumulative'] no_evens_input_file = b.read_input(input_file_path) no_evens_output_file_path = str(sg_outputs[sg.id]) @@ -44,6 +48,7 @@ def filter_evens( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.no_evens_file}') diff --git a/cpg_flow_test/jobs/first_n_primes.py b/cpg_flow_test/jobs/first_n_primes.py index c6b85fd..71d6766 100644 --- a/cpg_flow_test/jobs/first_n_primes.py +++ b/cpg_flow_test/jobs/first_n_primes.py @@ -1,19 +1,22 @@ from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def first_n_primes( - b: Batch, +def first_n_primes_job( sequencing_group: SequencingGroup, input_file_path: str, job_attrs: dict[str, str], output_file_path: str, depends_on: Job, ) -> list[Job]: + b = get_batch() title = f'First N Primes: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) id_sum_path = b.read_input(input_file_path) if depends_on: diff --git a/cpg_flow_test/jobs/iterative_digit_sum.py b/cpg_flow_test/jobs/iterative_digit_sum.py index 3b33632..2ff643c 100644 --- a/cpg_flow_test/jobs/iterative_digit_sum.py +++ b/cpg_flow_test/jobs/iterative_digit_sum.py @@ -1,17 +1,20 @@ from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def iterative_digit_sum( - b: Batch, +def iterative_digit_sum_job( sequencing_group: SequencingGroup, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Iterative Digit Sum: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) cmd = f"""\ #!/bin/bash diff --git a/cpg_flow_test/jobs/say_hi.py b/cpg_flow_test/jobs/say_hi.py index c58fe0b..2d07620 100644 --- a/cpg_flow_test/jobs/say_hi.py +++ b/cpg_flow_test/jobs/say_hi.py @@ -1,17 +1,20 @@ from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger -def say_hi( - b: Batch, +def say_hi_job( sequencing_group: SequencingGroup, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Say Hi: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve['workflow', 'driver_image']) cmd = f""" echo "This is a hello from sequencing_group {sequencing_group.id}" > {job.sayhi} diff --git a/cpg_flow_test/stages.py b/cpg_flow_test/stages.py index fb4bc66..147f2ec 100644 --- a/cpg_flow_test/stages.py +++ b/cpg_flow_test/stages.py @@ -59,22 +59,18 @@ def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path 'primes': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_primes.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: - # Get batch - b = get_batch() - + def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: # Print out alignment input for this sequencing group logger.info('-----ALIGNMENT INPUT-----') logger.info(sequencing_group.alignment_input) # Write id_sum to output file id_sum_output_path = str(self.expected_outputs(sequencing_group).get('id_sum', '')) - job_id_sum = iterative_digit_sum(b, sequencing_group, self.get_job_attrs(sequencing_group), id_sum_output_path) + job_id_sum = iterative_digit_sum.iterative_digit_sum_job(sequencing_group, self.get_job_attrs(sequencing_group), id_sum_output_path) # Generate first N primes primes_output_path = str(self.expected_outputs(sequencing_group).get('primes', '')) - job_primes = first_n_primes( - b, + job_primes = first_n_primes.first_n_primes_job( sequencing_group, id_sum_output_path, self.get_job_attrs(sequencing_group), @@ -94,13 +90,11 @@ def expected_outputs(self, sequencing_group: SequencingGroup): 'cumulative': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: input_txt = inputs.as_path(sequencing_group, GeneratePrimes, 'primes') - b = get_batch() cumulative_calc_output_path = str(self.expected_outputs(sequencing_group).get('cumulative', '')) - job_cumulative_calc = cumulative_calc( - b, + job_cumulative_calc = cumulative_calc.cumulative_calc_job( sequencing_group, input_txt, self.get_job_attrs(sequencing_group), @@ -123,11 +117,13 @@ def expected_outputs(self, sequencing_group: SequencingGroup): 'hello': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: - b = get_batch() - + def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: hello_output_path = str(self.expected_outputs(sequencing_group).get('hello', '')) - job_say_hi = say_hi(b, sequencing_group, self.get_job_attrs(sequencing_group), hello_output_path) + job_say_hi = say_hi.say_hi_job( + sequencing_group, + self.get_job_attrs(sequencing_group), + hello_output_path, + ) jobs = [job_say_hi] @@ -148,14 +144,12 @@ def expected_outputs(self, cohort: Cohort): sg_outputs['no_evens'] = cohort.dataset.prefix() / WORKFLOW_FOLDER / f'{cohort.name}_no_evens.txt' return sg_outputs - def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput: input_files = inputs.as_dict_by_target(CumulativeCalc) - b = get_batch() sg_outputs = self.expected_outputs(cohort) no_evens_output_path = str(sg_outputs['no_evens']) - job_no_evens = filter_evens( - b, + job_no_evens = filter_evens.filter_evens_job( cohort.get_sequencing_groups(), input_files, self.get_job_attrs(cohort), @@ -177,7 +171,7 @@ def expected_outputs(self, multicohort: MultiCohort): 'pyramid': multicohort.analysis_dataset.prefix() / WORKFLOW_FOLDER / f'{multicohort.name}_pyramid.txt', } - def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutput: input_files_filter_evens = inputs.as_dict_by_target(FilterEvens) logger.info('----INPUT FILES FILTER EVENS----') logger.info(input_files_filter_evens) @@ -197,11 +191,8 @@ def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutpu logger.info('----INPUT FILES----') logger.info(input_files) - b = get_batch() - pyramid_output_path = str(self.expected_outputs(multicohort).get('pyramid', '')) - job_pyramid = build_pyramid( - b, + job_pyramid = build_pyramid.build_pyramid_job( multicohort.get_sequencing_groups(), input_files, self.get_job_attrs(multicohort), diff --git a/pyproject.toml b/pyproject.toml index 0922d17..8fe23a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "test-workflows-shared" version = "0.1.0" description = "Add your description here" readme = "README.md" -requires-python = ">=3.10,<3.11" +requires-python = ">=3.10,<3.12" dependencies = [ "analysis-runner>=3.2.2", "cpg-flow~=1.2", @@ -89,11 +89,6 @@ ignore = [ "E731", # Do not assign a lambda expression, use a def "E741", # Ambiguous variable name "G004", # Logging statement uses f-string - "PLR0911", # Too many return statements - "PLR0912", # Too many branches - "PLR0913", # Too many arguments to function call - "PLR0915", # Too many statements - "PLW0603", # Using the global statement to update `` is discouraged "PT018", # Assertion should be broken down into multiple parts "Q000", # Single quotes found but double quotes preferred "S101", # Use of assert detected diff --git a/requirements-dev.txt b/requirements-dev.txt index f5eb7f3..b990ef5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,21 +1,21 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --extra=dev --output-file=requirements-dev.txt pyproject.toml -# +# This file was autogenerated by uv via the following command: +# uv pip compile --extra=dev --output-file=requirements-dev.txt pyproject.toml aiodns==2.0.0 # via hail aiohappyeyeballs==2.6.0 # via aiohttp -aiohttp==3.11.13 - # via hail -aiosignal==1.3.2 +aiohttp==3.13.3 + # via + # gql + # hail +aiosignal==1.4.0 # via aiohttp analysis-runner==3.2.3 # via test-workflows-shared (pyproject.toml) -async-timeout==5.0.1 - # via aiohttp +anyio==4.12.1 + # via gql +asttokens==3.0.1 + # via stack-data attrs==25.1.0 # via aiohttp avro==1.11.3 @@ -29,7 +29,7 @@ azure-core==1.32.0 # azure-storage-blob # azure-storage-file-datalake # msrest -azure-identity==1.21.0 +azure-identity==1.25.1 # via hail azure-mgmt-core==1.5.0 # via azure-mgmt-storage @@ -42,6 +42,10 @@ azure-storage-blob==12.25.0 # hail azure-storage-file-datalake==12.19.0 # via cloudpathlib +backoff==2.2.1 + # via + # gql + # metamist black==24.10.0 # via test-workflows-shared (pyproject.toml) bokeh==3.4.3 @@ -61,10 +65,8 @@ botocore==1.37.11 # s3transfer build==1.2.2.post1 # via pip-tools -cachecontrol[filecache]==0.14.1 - # via - # cachecontrol - # pip-audit +cachecontrol==0.14.1 + # via pip-audit cachetools==5.5.2 # via google-auth certifi==2024.12.14 @@ -82,20 +84,27 @@ charset-normalizer==3.4.0 click==8.1.7 # via # black + # metamist # pip-tools # typer -cloudpathlib[all,azure,gs,s3]==0.21.0 +cloudpathlib==0.21.0 # via # analysis-runner # cpg-utils +comm==0.2.3 + # via ipywidgets commonmark==0.9.1 # via rich contourpy==1.3.1 # via bokeh -cpg-utils==5.2.0 +cpg-flow==1.3.0 + # via test-workflows-shared (pyproject.toml) +cpg-utils==5.4.3 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow + # metamist cryptography==44.0.2 # via # azure-identity @@ -105,17 +114,21 @@ cryptography==44.0.2 cyclonedx-python-lib==7.6.2 # via pip-audit decorator==4.4.2 - # via hail + # via + # hail + # ipython defusedxml==0.7.1 # via py-serializable deprecated==1.2.18 # via # cpg-utils # hail -dill==0.3.9 +dill==0.4.1 # via hail distlib==0.3.9 # via virtualenv +executing==2.2.1 + # via stack-data filelock==3.16.1 # via # cachecontrol @@ -127,27 +140,33 @@ frozenlist==1.5.0 # aiohttp # aiosignal # hail -google-api-core[grpc]==2.24.2 +google-api-core==1.34.1 # via + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage + # metamist google-auth==2.38.0 # via # cpg-utils # google-api-core # google-auth-oauthlib + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage # hail + # metamist google-auth-oauthlib==0.8.0 # via hail +google-cloud-artifact-registry==1.19.0 + # via cpg-utils google-cloud-core==2.4.3 # via google-cloud-storage google-cloud-secret-manager==2.23.1 # via cpg-utils -google-cloud-storage==3.1.0 +google-cloud-storage==2.14.0 # via cloudpathlib google-crc32c==1.6.0 # via @@ -155,27 +174,37 @@ google-crc32c==1.6.0 # google-resumable-media google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos[grpc]==1.69.1 +googleapis-common-protos==1.69.1 # via # google-api-core # grpc-google-iam-v1 # grpcio-status +gql==3.5.3 + # via metamist +graphql-core==3.2.6 + # via gql grpc-google-iam-v1==0.14.1 - # via google-cloud-secret-manager + # via + # google-cloud-artifact-registry + # google-cloud-secret-manager grpcio==1.71.0 # via + # cpg-flow # google-api-core + # google-cloud-artifact-registry # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status grpcio-status==1.48.2 # via # analysis-runner + # cpg-flow # google-api-core -hail==0.2.134 +hail==0.2.137 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow html5lib==1.1 # via pip-audit humanize==4.12.1 @@ -184,8 +213,15 @@ identify==2.6.3 # via pre-commit idna==3.10 # via + # anyio # requests # yarl +ipython==9.9.0 + # via ipywidgets +ipython-pygments-lexers==1.1.1 + # via ipython +ipywidgets==8.1.8 + # via cpg-flow isodate==0.7.2 # via # azure-storage-blob @@ -193,6 +229,8 @@ isodate==0.7.2 # msrest janus==1.0.0 # via hail +jedi==0.19.2 + # via ipython jinja2==3.1.6 # via bokeh jmespath==1.0.1 @@ -201,10 +239,20 @@ jmespath==1.0.1 # botocore jproperties==2.1.2 # via hail +jupyterlab-widgets==3.0.16 + # via ipywidgets license-expression==30.4.0 # via cyclonedx-python-lib +loguru==0.7.3 + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow markupsafe==3.0.2 # via jinja2 +matplotlib-inline==0.2.1 + # via ipython +metamist==7.13.4 + # via cpg-flow msal==1.31.1 # via # azure-identity @@ -223,9 +271,11 @@ mypy-extensions==1.0.0 # via black nest-asyncio==1.6.0 # via hail +networkx==3.6.1 + # via cpg-flow nodeenv==1.9.1 # via pre-commit -numpy==1.26.4 +numpy==2.4.1 # via # bokeh # contourpy @@ -252,10 +302,18 @@ pandas==2.2.3 # hail parsimonious==0.10.0 # via hail +parso==0.8.5 + # via jedi pathspec==0.12.1 # via black +pexpect==4.9.0 + # via ipython pillow==11.1.0 # via bokeh +pip==25.3 + # via + # pip-api + # pip-tools pip-api==0.0.34 # via pip-audit pip-audit==2.7.3 @@ -269,28 +327,38 @@ platformdirs==4.3.6 # black # virtualenv plotly==5.24.1 - # via hail + # via + # cpg-flow + # hail portalocker==2.10.1 # via msal-extensions pre-commit==4.0.1 - # via test-workflows-shared (pyproject.toml) + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow +prompt-toolkit==3.0.52 + # via ipython propcache==0.3.0 # via # aiohttp # yarl proto-plus==1.26.1 # via - # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager protobuf==3.20.2 # via # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status - # hail # proto-plus +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data py-serializable==1.1.2 # via cyclonedx-python-lib py4j==0.10.9.7 @@ -306,11 +374,12 @@ pycares==4.5.0 pycparser==2.22 # via cffi pygments==2.18.0 - # via rich -pyjwt[crypto]==2.10.1 # via - # msal - # pyjwt + # ipython + # ipython-pygments-lexers + # rich +pyjwt==2.10.1 + # via msal pyparsing==3.2.0 # via pip-requirements-parser pyproject-hooks==1.2.0 @@ -322,6 +391,7 @@ pyspark==3.5.5 python-dateutil==2.9.0.post0 # via # botocore + # metamist # pandas python-json-logger==2.0.7 # via hail @@ -330,11 +400,12 @@ pytz==2025.1 pyyaml==6.0.2 # via # bokeh + # cpg-flow # hail # pre-commit regex==2024.11.6 # via parsimonious -requests==2.32.3 +requests==2.32.5 # via # analysis-runner # azure-core @@ -342,15 +413,20 @@ requests==2.32.3 # cpg-utils # google-api-core # google-cloud-storage + # gql # hail + # metamist # msal # msrest # pip-audit # requests-oauthlib + # requests-toolbelt requests-oauthlib==2.0.0 # via # google-auth-oauthlib # msrest +requests-toolbelt==1.0.0 + # via gql rich==12.6.0 # via # hail @@ -360,8 +436,10 @@ rsa==4.9 # via google-auth s3transfer==0.11.4 # via boto3 -scipy==1.11.4 +scipy==1.17.0 # via hail +setuptools==80.9.0 + # via pip-tools shellingham==1.5.4 # via typer six==1.17.0 @@ -374,58 +452,66 @@ sortedcontainers==2.4.0 # via # cyclonedx-python-lib # hail +stack-data==0.6.3 + # via ipython tabulate==0.9.0 # via # analysis-runner # cpg-utils # hail + # metamist tenacity==9.0.0 # via plotly toml==0.10.2 # via # cpg-utils # pip-audit -tomli==2.2.1 +tornado==6.5.4 # via - # black - # build - # pip-tools -tornado==6.4.2 - # via bokeh + # bokeh + # cpg-flow +traitlets==5.14.3 + # via + # ipython + # ipywidgets + # matplotlib-inline typer==0.15.2 # via hail typing-extensions==4.12.2 # via + # aiosignal + # anyio # azure-core # azure-identity # azure-storage-blob # azure-storage-file-datalake - # black - # cloudpathlib + # ipython # janus - # multidict # typer tzdata==2025.1 # via pandas urllib3==2.2.3 # via # botocore + # metamist # requests uvloop==0.21.0 # via hail virtualenv==20.28.0 # via pre-commit +wcwidth==0.2.14 + # via prompt-toolkit webencodings==0.5.1 # via html5lib wheel==0.45.1 # via pip-tools +widgetsnbextension==4.0.15 + # via ipywidgets wrapt==1.17.2 # via deprecated xyzservices==2025.1.0 # via bokeh yarl==1.18.3 - # via aiohttp - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools + # via + # aiohttp + # gql diff --git a/requirements.txt b/requirements.txt index 6253df2..3cf2f5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,21 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements.txt pyproject.toml -# +# This file was autogenerated by uv via the following command: +# uv pip compile --output-file=requirements.txt pyproject.toml aiodns==2.0.0 # via hail -aiohappyeyeballs==2.4.4 +aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.11.10 - # via hail -aiosignal==1.3.2 +aiohttp==3.13.3 + # via + # gql + # hail +aiosignal==1.4.0 # via aiohttp analysis-runner==3.2.2 # via test-workflows-shared (pyproject.toml) -async-timeout==5.0.1 - # via aiohttp +anyio==4.12.1 + # via gql +asttokens==3.0.1 + # via stack-data attrs==24.2.0 # via aiohttp avro==1.11.3 @@ -29,7 +29,7 @@ azure-core==1.32.0 # azure-storage-blob # azure-storage-file-datalake # msrest -azure-identity==1.19.0 +azure-identity==1.25.1 # via hail azure-mgmt-core==1.5.0 # via azure-mgmt-storage @@ -42,6 +42,10 @@ azure-storage-blob==12.24.0 # hail azure-storage-file-datalake==12.18.0 # via cloudpathlib +backoff==2.2.1 + # via + # gql + # metamist bokeh==3.3.4 # via hail boto3==1.35.81 @@ -65,22 +69,32 @@ cffi==1.17.1 # via # cryptography # pycares +cfgv==3.5.0 + # via pre-commit charset-normalizer==3.4.0 # via requests click==8.1.7 - # via typer -cloudpathlib[all,azure,gs,s3]==0.20.0 + # via + # metamist + # typer +cloudpathlib==0.20.0 # via # analysis-runner # cpg-utils +comm==0.2.3 + # via ipywidgets commonmark==0.9.1 # via rich contourpy==1.3.1 # via bokeh -cpg-utils==5.2.0 +cpg-flow==1.3.0 + # via test-workflows-shared (pyproject.toml) +cpg-utils==5.4.3 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow + # metamist cryptography==44.0.0 # via # azure-identity @@ -88,13 +102,21 @@ cryptography==44.0.0 # msal # pyjwt decorator==4.4.2 - # via hail + # via + # hail + # ipython deprecated==1.2.15 # via # cpg-utils # hail -dill==0.3.9 +dill==0.4.1 # via hail +distlib==0.4.0 + # via virtualenv +executing==2.2.1 + # via stack-data +filelock==3.20.3 + # via virtualenv frozendict==2.4.6 # via cpg-utils frozenlist==1.5.0 @@ -102,27 +124,33 @@ frozenlist==1.5.0 # aiohttp # aiosignal # hail -google-api-core[grpc]==2.24.0 +google-api-core==1.34.1 # via + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage + # metamist google-auth==2.37.0 # via # cpg-utils # google-api-core # google-auth-oauthlib + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage # hail + # metamist google-auth-oauthlib==0.8.0 # via hail +google-cloud-artifact-registry==1.19.0 + # via cpg-utils google-cloud-core==2.4.1 # via google-cloud-storage google-cloud-secret-manager==2.22.0 # via cpg-utils -google-cloud-storage==2.19.0 +google-cloud-storage==2.14.0 # via cloudpathlib google-crc32c==1.6.0 # via @@ -130,31 +158,51 @@ google-crc32c==1.6.0 # google-resumable-media google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos[grpc]==1.66.0 +googleapis-common-protos==1.66.0 # via # google-api-core # grpc-google-iam-v1 # grpcio-status -grpc-google-iam-v1==0.13.1 - # via google-cloud-secret-manager +gql==3.5.3 + # via metamist +graphql-core==3.2.6 + # via gql +grpc-google-iam-v1==0.14.3 + # via + # google-cloud-artifact-registry + # google-cloud-secret-manager grpcio==1.68.1 # via + # cpg-flow # google-api-core + # google-cloud-artifact-registry # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status grpcio-status==1.48.2 - # via google-api-core -hail==0.2.133 # via - # analysis-runner + # cpg-flow + # google-api-core +hail==0.2.137 + # via # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow humanize==4.11.0 # via hail +identify==2.6.16 + # via pre-commit idna==3.10 # via + # anyio # requests # yarl +ipython==9.9.0 + # via ipywidgets +ipython-pygments-lexers==1.1.1 + # via ipython +ipywidgets==8.1.8 + # via cpg-flow isodate==0.7.2 # via # azure-storage-blob @@ -162,6 +210,8 @@ isodate==0.7.2 # msrest janus==1.0.0 # via hail +jedi==0.19.2 + # via ipython jinja2==3.1.5 # via bokeh jmespath==1.0.1 @@ -170,8 +220,18 @@ jmespath==1.0.1 # botocore jproperties==2.1.2 # via hail +jupyterlab-widgets==3.0.16 + # via ipywidgets +loguru==0.7.3 + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow markupsafe==3.0.2 # via jinja2 +matplotlib-inline==0.2.1 + # via ipython +metamist==7.13.4 + # via cpg-flow msal==1.31.1 # via # azure-identity @@ -186,7 +246,11 @@ multidict==6.1.0 # yarl nest-asyncio==1.6.0 # via hail -numpy==1.26.4 +networkx==3.6.1 + # via cpg-flow +nodeenv==1.10.0 + # via pre-commit +numpy==2.4.1 # via # bokeh # contourpy @@ -207,29 +271,45 @@ pandas==2.2.3 # hail parsimonious==0.10.0 # via hail +parso==0.8.5 + # via jedi +pexpect==4.9.0 + # via ipython pillow==11.0.0 # via bokeh +platformdirs==4.5.1 + # via virtualenv plotly==5.24.1 - # via hail + # via + # cpg-flow + # hail portalocker==2.10.1 # via msal-extensions +pre-commit==4.5.1 + # via cpg-flow +prompt-toolkit==3.0.52 + # via ipython propcache==0.2.1 # via # aiohttp # yarl proto-plus==1.25.0 # via - # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager protobuf==3.20.2 # via # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status - # hail # proto-plus +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data py4j==0.10.9.7 # via pyspark pyasn1==0.6.1 @@ -243,16 +323,18 @@ pycares==4.5.0 pycparser==2.22 # via cffi pygments==2.18.0 - # via rich -pyjwt[crypto]==2.10.1 # via - # msal - # pyjwt + # ipython + # ipython-pygments-lexers + # rich +pyjwt==2.10.1 + # via msal pyspark==3.5.3 # via hail python-dateutil==2.9.0.post0 # via # botocore + # metamist # pandas python-json-logger==2.0.7 # via hail @@ -261,24 +343,31 @@ pytz==2024.2 pyyaml==6.0.2 # via # bokeh + # cpg-flow # hail + # pre-commit regex==2024.11.6 # via parsimonious -requests==2.32.3 +requests==2.32.5 # via # analysis-runner # azure-core # cpg-utils # google-api-core # google-cloud-storage + # gql # hail + # metamist # msal # msrest # requests-oauthlib + # requests-toolbelt requests-oauthlib==2.0.0 # via # google-auth-oauthlib # msrest +requests-toolbelt==1.0.0 + # via gql rich==12.6.0 # via # hail @@ -287,7 +376,7 @@ rsa==4.9 # via google-auth s3transfer==0.10.4 # via boto3 -scipy==1.11.4 +scipy==1.17.0 # via hail shellingham==1.5.4 # via typer @@ -298,40 +387,60 @@ six==1.17.0 # python-dateutil sortedcontainers==2.4.0 # via hail +stack-data==0.6.3 + # via ipython tabulate==0.9.0 # via # analysis-runner # cpg-utils # hail + # metamist tenacity==9.0.0 # via plotly toml==0.10.2 # via cpg-utils -tornado==6.4.2 - # via bokeh +tornado==6.5.4 + # via + # bokeh + # cpg-flow +traitlets==5.14.3 + # via + # ipython + # ipywidgets + # matplotlib-inline typer==0.15.1 # via hail typing-extensions==4.12.2 # via + # aiosignal + # anyio # azure-core # azure-identity # azure-storage-blob # azure-storage-file-datalake - # cloudpathlib + # ipython # janus - # multidict # typer tzdata==2024.2 # via pandas urllib3==2.2.3 # via # botocore + # metamist # requests uvloop==0.21.0 # via hail +virtualenv==20.36.1 + # via pre-commit +wcwidth==0.2.14 + # via prompt-toolkit +widgetsnbextension==4.0.15 + # via ipywidgets wrapt==1.17.0 # via deprecated xyzservices==2024.9.0 # via bokeh yarl==1.18.3 - # via aiohttp + # via + # aiohttp + # gql From da2622678a328ad49f085a513163ae45400685fa Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:22:32 +1000 Subject: [PATCH 3/7] correction to config call --- cpg_flow_test/jobs/build_pyramid.py | 4 ++-- cpg_flow_test/jobs/cumulative_calc.py | 2 +- cpg_flow_test/jobs/filter_evens.py | 4 ++-- cpg_flow_test/jobs/first_n_primes.py | 2 +- cpg_flow_test/jobs/iterative_digit_sum.py | 2 +- cpg_flow_test/jobs/say_hi.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpg_flow_test/jobs/build_pyramid.py b/cpg_flow_test/jobs/build_pyramid.py index b23138e..bff1a40 100644 --- a/cpg_flow_test/jobs/build_pyramid.py +++ b/cpg_flow_test/jobs/build_pyramid.py @@ -21,7 +21,7 @@ def build_pyramid_job( sg_output_files = [] for sg in sequencing_groups: # type: ignore job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs | {'sequencing_group': sg.id}) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) no_evens_input_file_path = input_files[sg.id]['no_evens'] no_evens_input_file = b.read_input(no_evens_input_file_path) @@ -55,7 +55,7 @@ def build_pyramid_job( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs | {'tool': 'cat'}) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.pyramid}') diff --git a/cpg_flow_test/jobs/cumulative_calc.py b/cpg_flow_test/jobs/cumulative_calc.py index 6a79321..09562a7 100644 --- a/cpg_flow_test/jobs/cumulative_calc.py +++ b/cpg_flow_test/jobs/cumulative_calc.py @@ -15,7 +15,7 @@ def cumulative_calc_job( b = get_batch() title = f'Cumulative Calc: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) primes_path = b.read_input(input_file_path) cmd = f""" diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index 22cc3a4..e61a436 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -25,7 +25,7 @@ def filter_evens_job( sg_output_files = [] for sg in sequencing_groups: # type: ignore job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) input_file_path = input_files[sg.id]['cumulative'] no_evens_input_file = b.read_input(input_file_path) no_evens_output_file_path = str(sg_outputs[sg.id]) @@ -48,7 +48,7 @@ def filter_evens_job( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.no_evens_file}') diff --git a/cpg_flow_test/jobs/first_n_primes.py b/cpg_flow_test/jobs/first_n_primes.py index 71d6766..e7180cb 100644 --- a/cpg_flow_test/jobs/first_n_primes.py +++ b/cpg_flow_test/jobs/first_n_primes.py @@ -16,7 +16,7 @@ def first_n_primes_job( b = get_batch() title = f'First N Primes: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) id_sum_path = b.read_input(input_file_path) if depends_on: diff --git a/cpg_flow_test/jobs/iterative_digit_sum.py b/cpg_flow_test/jobs/iterative_digit_sum.py index 2ff643c..8d74fd4 100644 --- a/cpg_flow_test/jobs/iterative_digit_sum.py +++ b/cpg_flow_test/jobs/iterative_digit_sum.py @@ -14,7 +14,7 @@ def iterative_digit_sum_job( b = get_batch() title = f'Iterative Digit Sum: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) cmd = f"""\ #!/bin/bash diff --git a/cpg_flow_test/jobs/say_hi.py b/cpg_flow_test/jobs/say_hi.py index 2d07620..b1aaa41 100644 --- a/cpg_flow_test/jobs/say_hi.py +++ b/cpg_flow_test/jobs/say_hi.py @@ -14,7 +14,7 @@ def say_hi_job( b = get_batch() title = f'Say Hi: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) - job.image(config_retrieve['workflow', 'driver_image']) + job.image(config_retrieve(['workflow', 'driver_image'])) cmd = f""" echo "This is a hello from sequencing_group {sequencing_group.id}" > {job.sayhi} From f3f207b7872037ba94f90261db61efe0171cceb0 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:31:51 +1000 Subject: [PATCH 4/7] more changes --- .github/workflows/lint.yaml | 2 +- .github/workflows/security.yaml | 2 +- cpg_flow_test/jobs/build_pyramid.py | 7 +- cpg_flow_test/jobs/cumulative_calc.py | 1 - cpg_flow_test/jobs/filter_evens.py | 8 +- cpg_flow_test/jobs/first_n_primes.py | 1 - cpg_flow_test/jobs/iterative_digit_sum.py | 1 - cpg_flow_test/jobs/say_hi.py | 1 - cpg_flow_test/stages.py | 5 +- cpg_flow_test/workflow.py | 8 +- pyproject.toml | 115 ---------------------- 11 files changed, 10 insertions(+), 141 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 2295876..7151a83 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | diff --git a/.github/workflows/security.yaml b/.github/workflows/security.yaml index d88375d..e01fcaf 100644 --- a/.github/workflows/security.yaml +++ b/.github/workflows/security.yaml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | diff --git a/cpg_flow_test/jobs/build_pyramid.py b/cpg_flow_test/jobs/build_pyramid.py index bff1a40..6c9bc5a 100644 --- a/cpg_flow_test/jobs/build_pyramid.py +++ b/cpg_flow_test/jobs/build_pyramid.py @@ -3,7 +3,6 @@ from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger @@ -19,7 +18,7 @@ def build_pyramid_job( # Compute the no evens list for each sequencing group sg_jobs = [] sg_output_files = [] - for sg in sequencing_groups: # type: ignore + for sg in sequencing_groups: job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs | {'sequencing_group': sg.id}) job.image(config_retrieve(['workflow', 'driver_image'])) no_evens_input_file_path = input_files[sg.id]['no_evens'] @@ -64,6 +63,4 @@ def build_pyramid_job( logger.info('-----PRINT PYRAMID-----') logger.info(output_file_path) - all_jobs = [job, *sg_jobs] - - return all_jobs + return [job, *sg_jobs] diff --git a/cpg_flow_test/jobs/cumulative_calc.py b/cpg_flow_test/jobs/cumulative_calc.py index 09562a7..98ad1ba 100644 --- a/cpg_flow_test/jobs/cumulative_calc.py +++ b/cpg_flow_test/jobs/cumulative_calc.py @@ -1,7 +1,6 @@ from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index e61a436..c90410a 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -1,10 +1,8 @@ from typing import Any -from cpg_flow.stage import Stage, StageInput from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger @@ -23,7 +21,7 @@ def filter_evens_job( # Compute the no evens list for each sequencing group sg_jobs = [] sg_output_files = [] - for sg in sequencing_groups: # type: ignore + for sg in sequencing_groups: job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs) job.image(config_retrieve(['workflow', 'driver_image'])) input_file_path = input_files[sg.id]['cumulative'] @@ -57,6 +55,4 @@ def filter_evens_job( logger.info('-----PRINT NO EVENS-----') logger.info(output_file_path) - all_jobs = [job, *sg_jobs] - - return all_jobs + return [job, *sg_jobs] diff --git a/cpg_flow_test/jobs/first_n_primes.py b/cpg_flow_test/jobs/first_n_primes.py index e7180cb..dcc3404 100644 --- a/cpg_flow_test/jobs/first_n_primes.py +++ b/cpg_flow_test/jobs/first_n_primes.py @@ -1,7 +1,6 @@ from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger diff --git a/cpg_flow_test/jobs/iterative_digit_sum.py b/cpg_flow_test/jobs/iterative_digit_sum.py index 8d74fd4..189f73e 100644 --- a/cpg_flow_test/jobs/iterative_digit_sum.py +++ b/cpg_flow_test/jobs/iterative_digit_sum.py @@ -1,7 +1,6 @@ from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger diff --git a/cpg_flow_test/jobs/say_hi.py b/cpg_flow_test/jobs/say_hi.py index b1aaa41..4e4d12e 100644 --- a/cpg_flow_test/jobs/say_hi.py +++ b/cpg_flow_test/jobs/say_hi.py @@ -1,7 +1,6 @@ from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch import Batch from hailtop.batch.job import Job from loguru import logger diff --git a/cpg_flow_test/stages.py b/cpg_flow_test/stages.py index 147f2ec..d91d550 100644 --- a/cpg_flow_test/stages.py +++ b/cpg_flow_test/stages.py @@ -7,10 +7,9 @@ from cpg_flow.targets.multicohort import MultiCohort from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils import Path -from cpg_utils.hail_batch import get_batch from jobs import build_pyramid, cumulative_calc, filter_evens, first_n_primes, iterative_digit_sum, say_hi -""" +r""" Here's a fun programming task with four interdependent steps, using the concept of **prime numbers** and their relationships: --- @@ -80,7 +79,7 @@ def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> S jobs = [job_id_sum, job_primes] - return self.make_outputs(sequencing_group, data=self.expected_outputs(sequencing_group), jobs=jobs) # type: ignore + return self.make_outputs(sequencing_group, data=self.expected_outputs(sequencing_group), jobs=jobs) @stage(required_stages=[GeneratePrimes], analysis_keys=['cumulative'], analysis_type='custom') diff --git a/cpg_flow_test/workflow.py b/cpg_flow_test/workflow.py index 6932a9a..942f6fd 100644 --- a/cpg_flow_test/workflow.py +++ b/cpg_flow_test/workflow.py @@ -1,26 +1,22 @@ -#!/usr/bin/env python3 import os import sys -from pathlib import Path from cpg_flow.workflow import run_workflow from cpg_utils.config import set_config_paths from stages import BuildAPrimePyramid, CumulativeCalc, FilterEvens, GeneratePrimes, SayHi TMP_DIR = os.getenv('TMP_DIR') -# CONFIG_FILE = str(Path(__file__).parent / 'config.toml') message = "Hello, Hail Batch! I'm CPG flow, nice to meet you." -def run_cpg_flow(dry_run=False): +def run_cpg_flow(dry_run: bool = False): workflow = [GeneratePrimes, CumulativeCalc, FilterEvens, BuildAPrimePyramid, SayHi] config_paths = os.environ['CPG_CONFIG_PATH'].split(',') print(f'CPG_CONFIG_PATHS: {config_paths}') # Inserting after the "defaults" config, but before user configs: - # set_config_paths(config_paths[:1] + [CONFIG_FILE] + config_paths[1:]) set_config_paths(config_paths) run_workflow(name='test_workflows_shared', stages=workflow, dry_run=dry_run) @@ -31,7 +27,7 @@ def validate_batch_workflow(): sys.exit(1) success = False - with open(f'{TMP_DIR}/out.txt', 'r') as f: + with open(f'{TMP_DIR}/out.txt') as f: success = f.read().strip() == message print(f'Batch workflow {"succeeded" if success else "failed"}') diff --git a/pyproject.toml b/pyproject.toml index 8fe23a9..04d390d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,119 +99,4 @@ ignore = [ "PLR2004", # Magic value used - "ANN001", - "ANN202", - "C408", - "TID252", - "RET504", - "ERA001", - "UP032", - "RUF100", - "ISC001", - "PIE804", - "F401", - "C901", - "W605", - "RET505", - "ANN003", - "RUF013", - "UP031", - "RUF010", - "B006", - "ANN002", - "B023", - "EXE001", - "G001", - "SIM108", - "RUF005", - "G002", - "PD901", - "N999", - "SIM118", - "SIM102", - "PLW2901", - "S603", - "ARG005", - "PGH003", - "B904", - "N802", - "ISC003", - "ANN205", - "S607", - "RUF015", - "E701", - "N818", - "PIE790", - "N803", - "A002", - "RUF012", - "W291", - "S113", - "S311", - "N806", - "PLR5501", - "F403", - "SIM115", - "B007", - "F841", - "C405", - "C419", - "SIM300", - "PD011", - "UP015", - "S602", - "Q002", - "ISC002", - "COM819", - "C416", - "DTZ005", - "G003", - "S608", - "PIE808", - "B008", - "S108", - "E402", - "S605", - "F821", - "RET507", - "RET503", - "UP030", - "UP026", - "PLR1714", - "C403", - "PLR1711", - "PIE810", - "DTZ011", - "S105", - "BLE001", - "C401", - "C400", - "PLR0402", - "SIM201", - "RET506", - "C417", - "PD010", - "PLW1510", - "A001", - "W292", - "PYI024", - "Q003", - "S301", - "RET501", - "PD003", - "SIM117", - "RUF002", - "SIM105", - "E713", - "S324", - "S310", - "Q001", - "UP020", - "S506", - "N805", - "E712", - "E401", - "SIM212", - "DTZ002", - "UP007", ] From 6d36f01dee66448f649b0cfce1249008302ced23 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:39:42 +1000 Subject: [PATCH 5/7] punch up pre-commit --- .pre-commit-config.yaml | 15 ++++++++------- cpg_flow_test/jobs/filter_evens.py | 1 - cpg_flow_test/stages.py | 18 ++++++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a99173..19bb34d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,8 @@ default_language_version: - python: python3.10 + python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-yaml exclude: '\.*conda/.*' @@ -16,21 +16,22 @@ repos: - id: check-added-large-files - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.43.0 + rev: v0.47.0 hooks: - id: markdownlint args: [-s, .markdownlint.json] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.8.3 + rev: v0.14.13 hooks: - - id: ruff + - id: ruff-format name: ruff (format) - args: [--fix] + - id: ruff + name: ruff (lint) - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 + rev: v1.19.1 hooks: - id: mypy args: diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index c90410a..ef51f7f 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -14,7 +14,6 @@ def filter_evens_job( sg_outputs: dict[str, dict[str, Any]], output_file_path: str, ) -> list[Job]: - b = get_batch() title = 'Filter Evens' diff --git a/cpg_flow_test/stages.py b/cpg_flow_test/stages.py index d91d550..f86d988 100644 --- a/cpg_flow_test/stages.py +++ b/cpg_flow_test/stages.py @@ -52,7 +52,7 @@ @stage(analysis_keys=['id_sum', 'primes'], analysis_type='custom') class GeneratePrimes(SequencingGroupStage): - def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path | str]: + def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path]: return { 'id_sum': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_id_sum.txt', 'primes': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_primes.txt', @@ -65,7 +65,11 @@ def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> S # Write id_sum to output file id_sum_output_path = str(self.expected_outputs(sequencing_group).get('id_sum', '')) - job_id_sum = iterative_digit_sum.iterative_digit_sum_job(sequencing_group, self.get_job_attrs(sequencing_group), id_sum_output_path) + job_id_sum = iterative_digit_sum.iterative_digit_sum_job( + sequencing_group, + self.get_job_attrs(sequencing_group), + id_sum_output_path, + ) # Generate first N primes primes_output_path = str(self.expected_outputs(sequencing_group).get('primes', '')) @@ -84,7 +88,7 @@ def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> S @stage(required_stages=[GeneratePrimes], analysis_keys=['cumulative'], analysis_type='custom') class CumulativeCalc(SequencingGroupStage): - def expected_outputs(self, sequencing_group: SequencingGroup): + def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path]: return { 'cumulative': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } @@ -124,18 +128,16 @@ def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> S hello_output_path, ) - jobs = [job_say_hi] - return self.make_outputs( sequencing_group, data=self.expected_outputs(sequencing_group), - jobs=jobs, + jobs=job_say_hi, ) @stage(required_stages=[CumulativeCalc], analysis_keys=['no_evens'], analysis_type='custom') class FilterEvens(CohortStage): - def expected_outputs(self, cohort: Cohort): + def expected_outputs(self, cohort: Cohort) -> dict[str, Path]: sg_outputs = { sg.id: str(sg.dataset.prefix() / WORKFLOW_FOLDER / f'{sg.id}_no_evens.txt') for sg in cohort.get_sequencing_groups() @@ -165,7 +167,7 @@ def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput: @stage(required_stages=[GeneratePrimes, FilterEvens], analysis_keys=['pyramid'], analysis_type='custom') class BuildAPrimePyramid(MultiCohortStage): - def expected_outputs(self, multicohort: MultiCohort): + def expected_outputs(self, multicohort: MultiCohort) -> dict[str, Path]: return { 'pyramid': multicohort.analysis_dataset.prefix() / WORKFLOW_FOLDER / f'{multicohort.name}_pyramid.txt', } From 5f7550c8c0fba37922e0c80113308d6b2c06875c Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:45:41 +1000 Subject: [PATCH 6/7] remove even more rules --- cpg_flow_test/stages.py | 25 +++++++++++++++++-------- pyproject.toml | 12 ------------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/cpg_flow_test/stages.py b/cpg_flow_test/stages.py index f86d988..79cc13c 100644 --- a/cpg_flow_test/stages.py +++ b/cpg_flow_test/stages.py @@ -7,21 +7,29 @@ from cpg_flow.targets.multicohort import MultiCohort from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils import Path -from jobs import build_pyramid, cumulative_calc, filter_evens, first_n_primes, iterative_digit_sum, say_hi +from test_workflows_shared.jobs import ( + build_pyramid, + cumulative_calc, + filter_evens, + first_n_primes, + iterative_digit_sum, + say_hi, +) r""" -Here's a fun programming task with four interdependent steps, using the concept of **prime numbers** and their relationships: +Here's a fun programming task with four interdependent steps, using **prime numbers** and their relationships: --- ### Task: Prime Pyramid -Write a program that builds a "Prime Pyramid" based on a given input number \( N \). The pyramid is built in four steps: +Write a program that builds a "Prime Pyramid" based on a given input number. The pyramid is built in four steps: #### Step 1: **Generate Prime Numbers** -Write a function to generate the first \( N \) prime numbers. For example, if \( N = 5 \), the output would be `[2, 3, 5, 7, 11]`. +Write a function to generate the first `N` prime numbers, i.e. if `N = 5`, the output would be `[2, 3, 5, 7, 11]`. #### Step 2: **Calculate Cumulative Sums** -Using the prime numbers generated in Step 1, calculate a list of cumulative sums. Each cumulative sum is the sum of the primes up to that index. +Using the prime numbers generated in Step 1, calculate a list of cumulative sums. +Each cumulative sum is the sum of the primes up to that index. Example: For `[2, 3, 5, 7, 11]`, the cumulative sums are `[2, 5, 10, 17, 28]`. #### Step 3: **Filter Even Numbers** @@ -29,7 +37,8 @@ Example: For `[2, 5, 10, 17, 28]`, the result is `[5, 17]`. #### Step 4: **Build the Prime Pyramid** -Using the filtered numbers from Step 3, construct a pyramid. Each level of the pyramid corresponds to a filtered number, and the number determines how many stars `*` appear on that level. +Using the filtered numbers from Step 3, construct a pyramid. +Each pyramid level corresponds to a filtered number, and the number determines how many stars `*` appear on that level. Example: For `[5, 17]`, the pyramid is: ``` ***** @@ -58,7 +67,7 @@ def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path] 'primes': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_primes.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: + def queue_jobs(self, sequencing_group: SequencingGroup, _inputs: StageInput) -> StageOutput: # Print out alignment input for this sequencing group logger.info('-----ALIGNMENT INPUT-----') logger.info(sequencing_group.alignment_input) @@ -120,7 +129,7 @@ def expected_outputs(self, sequencing_group: SequencingGroup): 'hello': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: + def queue_jobs(self, sequencing_group: SequencingGroup, _inputs: StageInput) -> StageOutput: hello_output_path = str(self.expected_outputs(sequencing_group).get('hello', '')) job_say_hi = say_hi.say_hi_job( sequencing_group, diff --git a/pyproject.toml b/pyproject.toml index 04d390d..37f8b23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,19 +84,7 @@ fixable = ["ALL"] ignore = [ "ANN201", # Missing return type annotation for public function "ANN204", # Missing type annotation for special method `__init__` - "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in `**kwargs` - "E501", # Line length too long - "E731", # Do not assign a lambda expression, use a def - "E741", # Ambiguous variable name "G004", # Logging statement uses f-string - "PT018", # Assertion should be broken down into multiple parts "Q000", # Single quotes found but double quotes preferred - "S101", # Use of assert detected - "SLF001", # Private member accessed: `_preemptible` - - "ARG001", # Unused function argument - "ARG002", # Unused method argument - "PLR2004", # Magic value used - ] From f58ca7e2c4cff294ac7597ec185f250f8eb55060 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Wed, 21 Jan 2026 14:46:44 +1000 Subject: [PATCH 7/7] apply isort linting rules --- cpg_flow_test/jobs/build_pyramid.py | 6 ++++-- cpg_flow_test/jobs/cumulative_calc.py | 6 ++++-- cpg_flow_test/jobs/filter_evens.py | 6 ++++-- cpg_flow_test/jobs/first_n_primes.py | 6 ++++-- cpg_flow_test/jobs/iterative_digit_sum.py | 6 ++++-- cpg_flow_test/jobs/say_hi.py | 6 ++++-- cpg_flow_test/workflow.py | 3 ++- pyproject.toml | 7 +++++++ 8 files changed, 33 insertions(+), 13 deletions(-) diff --git a/cpg_flow_test/jobs/build_pyramid.py b/cpg_flow_test/jobs/build_pyramid.py index 6c9bc5a..066bde5 100644 --- a/cpg_flow_test/jobs/build_pyramid.py +++ b/cpg_flow_test/jobs/build_pyramid.py @@ -1,10 +1,12 @@ from typing import Any +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def build_pyramid_job( diff --git a/cpg_flow_test/jobs/cumulative_calc.py b/cpg_flow_test/jobs/cumulative_calc.py index 98ad1ba..f958ecd 100644 --- a/cpg_flow_test/jobs/cumulative_calc.py +++ b/cpg_flow_test/jobs/cumulative_calc.py @@ -1,8 +1,10 @@ +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def cumulative_calc_job( diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index ef51f7f..4bc444a 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -1,10 +1,12 @@ from typing import Any +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def filter_evens_job( diff --git a/cpg_flow_test/jobs/first_n_primes.py b/cpg_flow_test/jobs/first_n_primes.py index dcc3404..7f49462 100644 --- a/cpg_flow_test/jobs/first_n_primes.py +++ b/cpg_flow_test/jobs/first_n_primes.py @@ -1,8 +1,10 @@ +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def first_n_primes_job( diff --git a/cpg_flow_test/jobs/iterative_digit_sum.py b/cpg_flow_test/jobs/iterative_digit_sum.py index 189f73e..2cfbbbb 100644 --- a/cpg_flow_test/jobs/iterative_digit_sum.py +++ b/cpg_flow_test/jobs/iterative_digit_sum.py @@ -1,8 +1,10 @@ +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def iterative_digit_sum_job( diff --git a/cpg_flow_test/jobs/say_hi.py b/cpg_flow_test/jobs/say_hi.py index 4e4d12e..f1b1b16 100644 --- a/cpg_flow_test/jobs/say_hi.py +++ b/cpg_flow_test/jobs/say_hi.py @@ -1,8 +1,10 @@ +from loguru import logger + +from hailtop.batch.job import Job + from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils.config import config_retrieve from cpg_utils.hail_batch import get_batch -from hailtop.batch.job import Job -from loguru import logger def say_hi_job( diff --git a/cpg_flow_test/workflow.py b/cpg_flow_test/workflow.py index 942f6fd..683c14a 100644 --- a/cpg_flow_test/workflow.py +++ b/cpg_flow_test/workflow.py @@ -1,9 +1,10 @@ import os import sys +from stages import BuildAPrimePyramid, CumulativeCalc, FilterEvens, GeneratePrimes, SayHi + from cpg_flow.workflow import run_workflow from cpg_utils.config import set_config_paths -from stages import BuildAPrimePyramid, CumulativeCalc, FilterEvens, GeneratePrimes, SayHi TMP_DIR = os.getenv('TMP_DIR') diff --git a/pyproject.toml b/pyproject.toml index 37f8b23..30015fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,3 +88,10 @@ ignore = [ "Q000", # Single quotes found but double quotes preferred "PLR2004", # Magic value used ] + +[tool.ruff.lint.isort] +section-order = ["future", "standard-library", "third-party", "hail", "cpg", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +cpg = ["metamist", "cpg_flow", "cpg_utils"] +hail = ["hail", "hailtop"]