diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 2295876..7151a83 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | diff --git a/.github/workflows/security.yaml b/.github/workflows/security.yaml index d88375d..e01fcaf 100644 --- a/.github/workflows/security.yaml +++ b/.github/workflows/security.yaml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a99173..19bb34d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,8 @@ default_language_version: - python: python3.10 + python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-yaml exclude: '\.*conda/.*' @@ -16,21 +16,22 @@ repos: - id: check-added-large-files - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.43.0 + rev: v0.47.0 hooks: - id: markdownlint args: [-s, .markdownlint.json] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.8.3 + rev: v0.14.13 hooks: - - id: ruff + - id: ruff-format name: ruff (format) - args: [--fix] + - id: ruff + name: ruff (lint) - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 + rev: v1.19.1 hooks: - id: mypy args: diff --git a/cpg_flow_test/jobs/__init__.py b/cpg_flow_test/jobs/__init__.py index d9966b5..e69de29 100644 --- a/cpg_flow_test/jobs/__init__.py +++ b/cpg_flow_test/jobs/__init__.py @@ -1,6 +0,0 @@ -from jobs.build_pyramid import build_pyramid -from jobs.cumulative_calc import cumulative_calc -from jobs.filter_evens import filter_evens -from jobs.first_n_primes import first_n_primes -from jobs.iterative_digit_sum import iterative_digit_sum -from jobs.say_hi import say_hi diff --git a/cpg_flow_test/jobs/build_pyramid.py b/cpg_flow_test/jobs/build_pyramid.py index 43fafe2..066bde5 100644 --- a/cpg_flow_test/jobs/build_pyramid.py +++ b/cpg_flow_test/jobs/build_pyramid.py @@ -1,24 +1,28 @@ from typing import Any -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job -def build_pyramid( - b: Batch, +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + + +def build_pyramid_job( sequencing_groups: list[SequencingGroup], input_files: dict[str, Any], job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = 'Build A Pyramid' # Compute the no evens list for each sequencing group sg_jobs = [] sg_output_files = [] - for sg in sequencing_groups: # type: ignore + for sg in sequencing_groups: job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs | {'sequencing_group': sg.id}) + job.image(config_retrieve(['workflow', 'driver_image'])) no_evens_input_file_path = input_files[sg.id]['no_evens'] no_evens_input_file = b.read_input(no_evens_input_file_path) @@ -52,6 +56,7 @@ def build_pyramid( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs | {'tool': 'cat'}) + job.image(config_retrieve(['workflow', 'driver_image'])) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.pyramid}') @@ -60,6 +65,4 @@ def build_pyramid( logger.info('-----PRINT PYRAMID-----') logger.info(output_file_path) - all_jobs = [job, *sg_jobs] - - return all_jobs + return [job, *sg_jobs] diff --git a/cpg_flow_test/jobs/cumulative_calc.py b/cpg_flow_test/jobs/cumulative_calc.py index 6b71fac..f958ecd 100644 --- a/cpg_flow_test/jobs/cumulative_calc.py +++ b/cpg_flow_test/jobs/cumulative_calc.py @@ -1,18 +1,22 @@ -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job + +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + -def cumulative_calc( - b: Batch, +def cumulative_calc_job( sequencing_group: SequencingGroup, input_file_path: str, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Cumulative Calc: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) primes_path = b.read_input(input_file_path) cmd = f""" diff --git a/cpg_flow_test/jobs/filter_evens.py b/cpg_flow_test/jobs/filter_evens.py index 1199e32..4bc444a 100644 --- a/cpg_flow_test/jobs/filter_evens.py +++ b/cpg_flow_test/jobs/filter_evens.py @@ -1,27 +1,30 @@ from typing import Any -from cpg_flow.stage import Stage, StageInput -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job -def filter_evens( - b: Batch, +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + + +def filter_evens_job( sequencing_groups: list[SequencingGroup], input_files: dict[str, dict[str, Any]], job_attrs: dict[str, str], sg_outputs: dict[str, dict[str, Any]], output_file_path: str, ) -> list[Job]: + b = get_batch() title = 'Filter Evens' # Compute the no evens list for each sequencing group sg_jobs = [] sg_output_files = [] - for sg in sequencing_groups: # type: ignore + for sg in sequencing_groups: job = b.new_job(name=title + ': ' + sg.id, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) input_file_path = input_files[sg.id]['cumulative'] no_evens_input_file = b.read_input(input_file_path) no_evens_output_file_path = str(sg_outputs[sg.id]) @@ -44,6 +47,7 @@ def filter_evens( # Merge the no evens lists for all sequencing groups into a single file job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) job.depends_on(*sg_jobs) inputs = ' '.join([b.read_input(f) for f in sg_output_files]) job.command(f'cat {inputs} >> {job.no_evens_file}') @@ -52,6 +56,4 @@ def filter_evens( logger.info('-----PRINT NO EVENS-----') logger.info(output_file_path) - all_jobs = [job, *sg_jobs] - - return all_jobs + return [job, *sg_jobs] diff --git a/cpg_flow_test/jobs/first_n_primes.py b/cpg_flow_test/jobs/first_n_primes.py index c6b85fd..7f49462 100644 --- a/cpg_flow_test/jobs/first_n_primes.py +++ b/cpg_flow_test/jobs/first_n_primes.py @@ -1,19 +1,23 @@ -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job + +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + -def first_n_primes( - b: Batch, +def first_n_primes_job( sequencing_group: SequencingGroup, input_file_path: str, job_attrs: dict[str, str], output_file_path: str, depends_on: Job, ) -> list[Job]: + b = get_batch() title = f'First N Primes: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) id_sum_path = b.read_input(input_file_path) if depends_on: diff --git a/cpg_flow_test/jobs/iterative_digit_sum.py b/cpg_flow_test/jobs/iterative_digit_sum.py index 3b33632..2cfbbbb 100644 --- a/cpg_flow_test/jobs/iterative_digit_sum.py +++ b/cpg_flow_test/jobs/iterative_digit_sum.py @@ -1,17 +1,21 @@ -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job + +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + -def iterative_digit_sum( - b: Batch, +def iterative_digit_sum_job( sequencing_group: SequencingGroup, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Iterative Digit Sum: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) cmd = f"""\ #!/bin/bash diff --git a/cpg_flow_test/jobs/say_hi.py b/cpg_flow_test/jobs/say_hi.py index c58fe0b..f1b1b16 100644 --- a/cpg_flow_test/jobs/say_hi.py +++ b/cpg_flow_test/jobs/say_hi.py @@ -1,17 +1,21 @@ -from cpg_flow.targets.sequencing_group import SequencingGroup -from hailtop.batch import Batch -from hailtop.batch.job import Job from loguru import logger +from hailtop.batch.job import Job + +from cpg_flow.targets.sequencing_group import SequencingGroup +from cpg_utils.config import config_retrieve +from cpg_utils.hail_batch import get_batch + -def say_hi( - b: Batch, +def say_hi_job( sequencing_group: SequencingGroup, job_attrs: dict[str, str], output_file_path: str, ) -> list[Job]: + b = get_batch() title = f'Say Hi: {sequencing_group.id}' job = b.new_job(name=title, attributes=job_attrs) + job.image(config_retrieve(['workflow', 'driver_image'])) cmd = f""" echo "This is a hello from sequencing_group {sequencing_group.id}" > {job.sayhi} diff --git a/cpg_flow_test/stages.py b/cpg_flow_test/stages.py index fb4bc66..79cc13c 100644 --- a/cpg_flow_test/stages.py +++ b/cpg_flow_test/stages.py @@ -7,22 +7,29 @@ from cpg_flow.targets.multicohort import MultiCohort from cpg_flow.targets.sequencing_group import SequencingGroup from cpg_utils import Path -from cpg_utils.hail_batch import get_batch -from jobs import build_pyramid, cumulative_calc, filter_evens, first_n_primes, iterative_digit_sum, say_hi - -""" -Here's a fun programming task with four interdependent steps, using the concept of **prime numbers** and their relationships: +from test_workflows_shared.jobs import ( + build_pyramid, + cumulative_calc, + filter_evens, + first_n_primes, + iterative_digit_sum, + say_hi, +) + +r""" +Here's a fun programming task with four interdependent steps, using **prime numbers** and their relationships: --- ### Task: Prime Pyramid -Write a program that builds a "Prime Pyramid" based on a given input number \( N \). The pyramid is built in four steps: +Write a program that builds a "Prime Pyramid" based on a given input number. The pyramid is built in four steps: #### Step 1: **Generate Prime Numbers** -Write a function to generate the first \( N \) prime numbers. For example, if \( N = 5 \), the output would be `[2, 3, 5, 7, 11]`. +Write a function to generate the first `N` prime numbers, i.e. if `N = 5`, the output would be `[2, 3, 5, 7, 11]`. #### Step 2: **Calculate Cumulative Sums** -Using the prime numbers generated in Step 1, calculate a list of cumulative sums. Each cumulative sum is the sum of the primes up to that index. +Using the prime numbers generated in Step 1, calculate a list of cumulative sums. +Each cumulative sum is the sum of the primes up to that index. Example: For `[2, 3, 5, 7, 11]`, the cumulative sums are `[2, 5, 10, 17, 28]`. #### Step 3: **Filter Even Numbers** @@ -30,7 +37,8 @@ Example: For `[2, 5, 10, 17, 28]`, the result is `[5, 17]`. #### Step 4: **Build the Prime Pyramid** -Using the filtered numbers from Step 3, construct a pyramid. Each level of the pyramid corresponds to a filtered number, and the number determines how many stars `*` appear on that level. +Using the filtered numbers from Step 3, construct a pyramid. +Each pyramid level corresponds to a filtered number, and the number determines how many stars `*` appear on that level. Example: For `[5, 17]`, the pyramid is: ``` ***** @@ -53,28 +61,28 @@ @stage(analysis_keys=['id_sum', 'primes'], analysis_type='custom') class GeneratePrimes(SequencingGroupStage): - def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path | str]: + def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path]: return { 'id_sum': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_id_sum.txt', 'primes': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_primes.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: - # Get batch - b = get_batch() - + def queue_jobs(self, sequencing_group: SequencingGroup, _inputs: StageInput) -> StageOutput: # Print out alignment input for this sequencing group logger.info('-----ALIGNMENT INPUT-----') logger.info(sequencing_group.alignment_input) # Write id_sum to output file id_sum_output_path = str(self.expected_outputs(sequencing_group).get('id_sum', '')) - job_id_sum = iterative_digit_sum(b, sequencing_group, self.get_job_attrs(sequencing_group), id_sum_output_path) + job_id_sum = iterative_digit_sum.iterative_digit_sum_job( + sequencing_group, + self.get_job_attrs(sequencing_group), + id_sum_output_path, + ) # Generate first N primes primes_output_path = str(self.expected_outputs(sequencing_group).get('primes', '')) - job_primes = first_n_primes( - b, + job_primes = first_n_primes.first_n_primes_job( sequencing_group, id_sum_output_path, self.get_job_attrs(sequencing_group), @@ -84,23 +92,21 @@ def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> S jobs = [job_id_sum, job_primes] - return self.make_outputs(sequencing_group, data=self.expected_outputs(sequencing_group), jobs=jobs) # type: ignore + return self.make_outputs(sequencing_group, data=self.expected_outputs(sequencing_group), jobs=jobs) @stage(required_stages=[GeneratePrimes], analysis_keys=['cumulative'], analysis_type='custom') class CumulativeCalc(SequencingGroupStage): - def expected_outputs(self, sequencing_group: SequencingGroup): + def expected_outputs(self, sequencing_group: SequencingGroup) -> dict[str, Path]: return { 'cumulative': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput: input_txt = inputs.as_path(sequencing_group, GeneratePrimes, 'primes') - b = get_batch() cumulative_calc_output_path = str(self.expected_outputs(sequencing_group).get('cumulative', '')) - job_cumulative_calc = cumulative_calc( - b, + job_cumulative_calc = cumulative_calc.cumulative_calc_job( sequencing_group, input_txt, self.get_job_attrs(sequencing_group), @@ -123,24 +129,24 @@ def expected_outputs(self, sequencing_group: SequencingGroup): 'hello': sequencing_group.dataset.prefix() / WORKFLOW_FOLDER / f'{sequencing_group.id}_cumulative.txt', } - def queue_jobs(self, sequencing_group: SequencingGroup, inputs: StageInput) -> StageOutput | None: - b = get_batch() - + def queue_jobs(self, sequencing_group: SequencingGroup, _inputs: StageInput) -> StageOutput: hello_output_path = str(self.expected_outputs(sequencing_group).get('hello', '')) - job_say_hi = say_hi(b, sequencing_group, self.get_job_attrs(sequencing_group), hello_output_path) - - jobs = [job_say_hi] + job_say_hi = say_hi.say_hi_job( + sequencing_group, + self.get_job_attrs(sequencing_group), + hello_output_path, + ) return self.make_outputs( sequencing_group, data=self.expected_outputs(sequencing_group), - jobs=jobs, + jobs=job_say_hi, ) @stage(required_stages=[CumulativeCalc], analysis_keys=['no_evens'], analysis_type='custom') class FilterEvens(CohortStage): - def expected_outputs(self, cohort: Cohort): + def expected_outputs(self, cohort: Cohort) -> dict[str, Path]: sg_outputs = { sg.id: str(sg.dataset.prefix() / WORKFLOW_FOLDER / f'{sg.id}_no_evens.txt') for sg in cohort.get_sequencing_groups() @@ -148,14 +154,12 @@ def expected_outputs(self, cohort: Cohort): sg_outputs['no_evens'] = cohort.dataset.prefix() / WORKFLOW_FOLDER / f'{cohort.name}_no_evens.txt' return sg_outputs - def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput: input_files = inputs.as_dict_by_target(CumulativeCalc) - b = get_batch() sg_outputs = self.expected_outputs(cohort) no_evens_output_path = str(sg_outputs['no_evens']) - job_no_evens = filter_evens( - b, + job_no_evens = filter_evens.filter_evens_job( cohort.get_sequencing_groups(), input_files, self.get_job_attrs(cohort), @@ -172,12 +176,12 @@ def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput | None: @stage(required_stages=[GeneratePrimes, FilterEvens], analysis_keys=['pyramid'], analysis_type='custom') class BuildAPrimePyramid(MultiCohortStage): - def expected_outputs(self, multicohort: MultiCohort): + def expected_outputs(self, multicohort: MultiCohort) -> dict[str, Path]: return { 'pyramid': multicohort.analysis_dataset.prefix() / WORKFLOW_FOLDER / f'{multicohort.name}_pyramid.txt', } - def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutput | None: + def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutput: input_files_filter_evens = inputs.as_dict_by_target(FilterEvens) logger.info('----INPUT FILES FILTER EVENS----') logger.info(input_files_filter_evens) @@ -197,11 +201,8 @@ def queue_jobs(self, multicohort: MultiCohort, inputs: StageInput) -> StageOutpu logger.info('----INPUT FILES----') logger.info(input_files) - b = get_batch() - pyramid_output_path = str(self.expected_outputs(multicohort).get('pyramid', '')) - job_pyramid = build_pyramid( - b, + job_pyramid = build_pyramid.build_pyramid_job( multicohort.get_sequencing_groups(), input_files, self.get_job_attrs(multicohort), diff --git a/cpg_flow_test/workflow.py b/cpg_flow_test/workflow.py index 5e05594..683c14a 100644 --- a/cpg_flow_test/workflow.py +++ b/cpg_flow_test/workflow.py @@ -1,28 +1,25 @@ -#!/usr/bin/env python3 import os import sys -from pathlib import Path + +from stages import BuildAPrimePyramid, CumulativeCalc, FilterEvens, GeneratePrimes, SayHi from cpg_flow.workflow import run_workflow from cpg_utils.config import set_config_paths -from stages import BuildAPrimePyramid, CumulativeCalc, FilterEvens, GeneratePrimes, SayHi TMP_DIR = os.getenv('TMP_DIR') -# CONFIG_FILE = str(Path(__file__).parent / 'config.toml') message = "Hello, Hail Batch! I'm CPG flow, nice to meet you." -def run_cpg_flow(dry_run=False): +def run_cpg_flow(dry_run: bool = False): workflow = [GeneratePrimes, CumulativeCalc, FilterEvens, BuildAPrimePyramid, SayHi] config_paths = os.environ['CPG_CONFIG_PATH'].split(',') print(f'CPG_CONFIG_PATHS: {config_paths}') # Inserting after the "defaults" config, but before user configs: - # set_config_paths(config_paths[:1] + [CONFIG_FILE] + config_paths[1:]) set_config_paths(config_paths) - run_workflow(stages=workflow, dry_run=dry_run) + run_workflow(name='test_workflows_shared', stages=workflow, dry_run=dry_run) def validate_batch_workflow(): @@ -31,7 +28,7 @@ def validate_batch_workflow(): sys.exit(1) success = False - with open(f'{TMP_DIR}/out.txt', 'r') as f: + with open(f'{TMP_DIR}/out.txt') as f: success = f.read().strip() == message print(f'Batch workflow {"succeeded" if success else "failed"}') diff --git a/pyproject.toml b/pyproject.toml index 3dc727e..30015fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,12 +3,12 @@ name = "test-workflows-shared" version = "0.1.0" description = "Add your description here" readme = "README.md" -requires-python = ">=3.10,<3.11" +requires-python = ">=3.10,<3.12" dependencies = [ "analysis-runner>=3.2.2", - "cpg-flow>=0.2.1", - "cpg-utils>=5.1.1", - "hail>=0.2.133", + "cpg-flow~=1.2", + "cpg-utils>=5.4", + "hail>=0.2.137", "loguru>=0.7.3", ] @@ -84,139 +84,14 @@ fixable = ["ALL"] ignore = [ "ANN201", # Missing return type annotation for public function "ANN204", # Missing type annotation for special method `__init__` - "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in `**kwargs` - "E501", # Line length too long - "E731", # Do not assign a lambda expression, use a def - "E741", # Ambiguous variable name "G004", # Logging statement uses f-string - "PLR0911", # Too many return statements - "PLR0912", # Too many branches - "PLR0913", # Too many arguments to function call - "PLR0915", # Too many statements - "PLW0603", # Using the global statement to update `` is discouraged - "PT018", # Assertion should be broken down into multiple parts "Q000", # Single quotes found but double quotes preferred - "S101", # Use of assert detected - "SLF001", # Private member accessed: `_preemptible` - - "ARG001", # Unused function argument - "ARG002", # Unused method argument - "PLR2004", # Magic value used - - "ANN001", - "ANN202", - "C408", - "TID252", - "RET504", - "ERA001", - "UP032", - "RUF100", - "ISC001", - "PIE804", - "F401", - "C901", - "W605", - "RET505", - "ANN003", - "RUF013", - "UP031", - "RUF010", - "B006", - "ANN002", - "B023", - "EXE001", - "G001", - "SIM108", - "RUF005", - "G002", - "PD901", - "N999", - "SIM118", - "SIM102", - "PLW2901", - "S603", - "ARG005", - "PGH003", - "B904", - "N802", - "ISC003", - "ANN205", - "S607", - "RUF015", - "E701", - "N818", - "PIE790", - "N803", - "A002", - "RUF012", - "W291", - "S113", - "S311", - "N806", - "PLR5501", - "F403", - "SIM115", - "B007", - "F841", - "C405", - "C419", - "SIM300", - "PD011", - "UP015", - "S602", - "Q002", - "ISC002", - "COM819", - "C416", - "DTZ005", - "G003", - "S608", - "PIE808", - "B008", - "S108", - "E402", - "S605", - "F821", - "RET507", - "RET503", - "UP030", - "UP026", - "PLR1714", - "C403", - "PLR1711", - "PIE810", - "DTZ011", - "S105", - "BLE001", - "C401", - "C400", - "PLR0402", - "SIM201", - "RET506", - "C417", - "PD010", - "PLW1510", - "A001", - "W292", - "PYI024", - "Q003", - "S301", - "RET501", - "PD003", - "SIM117", - "RUF002", - "SIM105", - "E713", - "S324", - "S310", - "Q001", - "UP020", - "S506", - "N805", - "E712", - "E401", - "SIM212", - "DTZ002", - "UP007", ] + +[tool.ruff.lint.isort] +section-order = ["future", "standard-library", "third-party", "hail", "cpg", "first-party", "local-folder"] + +[tool.ruff.lint.isort.sections] +cpg = ["metamist", "cpg_flow", "cpg_utils"] +hail = ["hail", "hailtop"] diff --git a/requirements-dev.txt b/requirements-dev.txt index f5eb7f3..b990ef5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,21 +1,21 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --extra=dev --output-file=requirements-dev.txt pyproject.toml -# +# This file was autogenerated by uv via the following command: +# uv pip compile --extra=dev --output-file=requirements-dev.txt pyproject.toml aiodns==2.0.0 # via hail aiohappyeyeballs==2.6.0 # via aiohttp -aiohttp==3.11.13 - # via hail -aiosignal==1.3.2 +aiohttp==3.13.3 + # via + # gql + # hail +aiosignal==1.4.0 # via aiohttp analysis-runner==3.2.3 # via test-workflows-shared (pyproject.toml) -async-timeout==5.0.1 - # via aiohttp +anyio==4.12.1 + # via gql +asttokens==3.0.1 + # via stack-data attrs==25.1.0 # via aiohttp avro==1.11.3 @@ -29,7 +29,7 @@ azure-core==1.32.0 # azure-storage-blob # azure-storage-file-datalake # msrest -azure-identity==1.21.0 +azure-identity==1.25.1 # via hail azure-mgmt-core==1.5.0 # via azure-mgmt-storage @@ -42,6 +42,10 @@ azure-storage-blob==12.25.0 # hail azure-storage-file-datalake==12.19.0 # via cloudpathlib +backoff==2.2.1 + # via + # gql + # metamist black==24.10.0 # via test-workflows-shared (pyproject.toml) bokeh==3.4.3 @@ -61,10 +65,8 @@ botocore==1.37.11 # s3transfer build==1.2.2.post1 # via pip-tools -cachecontrol[filecache]==0.14.1 - # via - # cachecontrol - # pip-audit +cachecontrol==0.14.1 + # via pip-audit cachetools==5.5.2 # via google-auth certifi==2024.12.14 @@ -82,20 +84,27 @@ charset-normalizer==3.4.0 click==8.1.7 # via # black + # metamist # pip-tools # typer -cloudpathlib[all,azure,gs,s3]==0.21.0 +cloudpathlib==0.21.0 # via # analysis-runner # cpg-utils +comm==0.2.3 + # via ipywidgets commonmark==0.9.1 # via rich contourpy==1.3.1 # via bokeh -cpg-utils==5.2.0 +cpg-flow==1.3.0 + # via test-workflows-shared (pyproject.toml) +cpg-utils==5.4.3 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow + # metamist cryptography==44.0.2 # via # azure-identity @@ -105,17 +114,21 @@ cryptography==44.0.2 cyclonedx-python-lib==7.6.2 # via pip-audit decorator==4.4.2 - # via hail + # via + # hail + # ipython defusedxml==0.7.1 # via py-serializable deprecated==1.2.18 # via # cpg-utils # hail -dill==0.3.9 +dill==0.4.1 # via hail distlib==0.3.9 # via virtualenv +executing==2.2.1 + # via stack-data filelock==3.16.1 # via # cachecontrol @@ -127,27 +140,33 @@ frozenlist==1.5.0 # aiohttp # aiosignal # hail -google-api-core[grpc]==2.24.2 +google-api-core==1.34.1 # via + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage + # metamist google-auth==2.38.0 # via # cpg-utils # google-api-core # google-auth-oauthlib + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage # hail + # metamist google-auth-oauthlib==0.8.0 # via hail +google-cloud-artifact-registry==1.19.0 + # via cpg-utils google-cloud-core==2.4.3 # via google-cloud-storage google-cloud-secret-manager==2.23.1 # via cpg-utils -google-cloud-storage==3.1.0 +google-cloud-storage==2.14.0 # via cloudpathlib google-crc32c==1.6.0 # via @@ -155,27 +174,37 @@ google-crc32c==1.6.0 # google-resumable-media google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos[grpc]==1.69.1 +googleapis-common-protos==1.69.1 # via # google-api-core # grpc-google-iam-v1 # grpcio-status +gql==3.5.3 + # via metamist +graphql-core==3.2.6 + # via gql grpc-google-iam-v1==0.14.1 - # via google-cloud-secret-manager + # via + # google-cloud-artifact-registry + # google-cloud-secret-manager grpcio==1.71.0 # via + # cpg-flow # google-api-core + # google-cloud-artifact-registry # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status grpcio-status==1.48.2 # via # analysis-runner + # cpg-flow # google-api-core -hail==0.2.134 +hail==0.2.137 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow html5lib==1.1 # via pip-audit humanize==4.12.1 @@ -184,8 +213,15 @@ identify==2.6.3 # via pre-commit idna==3.10 # via + # anyio # requests # yarl +ipython==9.9.0 + # via ipywidgets +ipython-pygments-lexers==1.1.1 + # via ipython +ipywidgets==8.1.8 + # via cpg-flow isodate==0.7.2 # via # azure-storage-blob @@ -193,6 +229,8 @@ isodate==0.7.2 # msrest janus==1.0.0 # via hail +jedi==0.19.2 + # via ipython jinja2==3.1.6 # via bokeh jmespath==1.0.1 @@ -201,10 +239,20 @@ jmespath==1.0.1 # botocore jproperties==2.1.2 # via hail +jupyterlab-widgets==3.0.16 + # via ipywidgets license-expression==30.4.0 # via cyclonedx-python-lib +loguru==0.7.3 + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow markupsafe==3.0.2 # via jinja2 +matplotlib-inline==0.2.1 + # via ipython +metamist==7.13.4 + # via cpg-flow msal==1.31.1 # via # azure-identity @@ -223,9 +271,11 @@ mypy-extensions==1.0.0 # via black nest-asyncio==1.6.0 # via hail +networkx==3.6.1 + # via cpg-flow nodeenv==1.9.1 # via pre-commit -numpy==1.26.4 +numpy==2.4.1 # via # bokeh # contourpy @@ -252,10 +302,18 @@ pandas==2.2.3 # hail parsimonious==0.10.0 # via hail +parso==0.8.5 + # via jedi pathspec==0.12.1 # via black +pexpect==4.9.0 + # via ipython pillow==11.1.0 # via bokeh +pip==25.3 + # via + # pip-api + # pip-tools pip-api==0.0.34 # via pip-audit pip-audit==2.7.3 @@ -269,28 +327,38 @@ platformdirs==4.3.6 # black # virtualenv plotly==5.24.1 - # via hail + # via + # cpg-flow + # hail portalocker==2.10.1 # via msal-extensions pre-commit==4.0.1 - # via test-workflows-shared (pyproject.toml) + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow +prompt-toolkit==3.0.52 + # via ipython propcache==0.3.0 # via # aiohttp # yarl proto-plus==1.26.1 # via - # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager protobuf==3.20.2 # via # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status - # hail # proto-plus +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data py-serializable==1.1.2 # via cyclonedx-python-lib py4j==0.10.9.7 @@ -306,11 +374,12 @@ pycares==4.5.0 pycparser==2.22 # via cffi pygments==2.18.0 - # via rich -pyjwt[crypto]==2.10.1 # via - # msal - # pyjwt + # ipython + # ipython-pygments-lexers + # rich +pyjwt==2.10.1 + # via msal pyparsing==3.2.0 # via pip-requirements-parser pyproject-hooks==1.2.0 @@ -322,6 +391,7 @@ pyspark==3.5.5 python-dateutil==2.9.0.post0 # via # botocore + # metamist # pandas python-json-logger==2.0.7 # via hail @@ -330,11 +400,12 @@ pytz==2025.1 pyyaml==6.0.2 # via # bokeh + # cpg-flow # hail # pre-commit regex==2024.11.6 # via parsimonious -requests==2.32.3 +requests==2.32.5 # via # analysis-runner # azure-core @@ -342,15 +413,20 @@ requests==2.32.3 # cpg-utils # google-api-core # google-cloud-storage + # gql # hail + # metamist # msal # msrest # pip-audit # requests-oauthlib + # requests-toolbelt requests-oauthlib==2.0.0 # via # google-auth-oauthlib # msrest +requests-toolbelt==1.0.0 + # via gql rich==12.6.0 # via # hail @@ -360,8 +436,10 @@ rsa==4.9 # via google-auth s3transfer==0.11.4 # via boto3 -scipy==1.11.4 +scipy==1.17.0 # via hail +setuptools==80.9.0 + # via pip-tools shellingham==1.5.4 # via typer six==1.17.0 @@ -374,58 +452,66 @@ sortedcontainers==2.4.0 # via # cyclonedx-python-lib # hail +stack-data==0.6.3 + # via ipython tabulate==0.9.0 # via # analysis-runner # cpg-utils # hail + # metamist tenacity==9.0.0 # via plotly toml==0.10.2 # via # cpg-utils # pip-audit -tomli==2.2.1 +tornado==6.5.4 # via - # black - # build - # pip-tools -tornado==6.4.2 - # via bokeh + # bokeh + # cpg-flow +traitlets==5.14.3 + # via + # ipython + # ipywidgets + # matplotlib-inline typer==0.15.2 # via hail typing-extensions==4.12.2 # via + # aiosignal + # anyio # azure-core # azure-identity # azure-storage-blob # azure-storage-file-datalake - # black - # cloudpathlib + # ipython # janus - # multidict # typer tzdata==2025.1 # via pandas urllib3==2.2.3 # via # botocore + # metamist # requests uvloop==0.21.0 # via hail virtualenv==20.28.0 # via pre-commit +wcwidth==0.2.14 + # via prompt-toolkit webencodings==0.5.1 # via html5lib wheel==0.45.1 # via pip-tools +widgetsnbextension==4.0.15 + # via ipywidgets wrapt==1.17.2 # via deprecated xyzservices==2025.1.0 # via bokeh yarl==1.18.3 - # via aiohttp - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools + # via + # aiohttp + # gql diff --git a/requirements.txt b/requirements.txt index 6253df2..3cf2f5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,21 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements.txt pyproject.toml -# +# This file was autogenerated by uv via the following command: +# uv pip compile --output-file=requirements.txt pyproject.toml aiodns==2.0.0 # via hail -aiohappyeyeballs==2.4.4 +aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.11.10 - # via hail -aiosignal==1.3.2 +aiohttp==3.13.3 + # via + # gql + # hail +aiosignal==1.4.0 # via aiohttp analysis-runner==3.2.2 # via test-workflows-shared (pyproject.toml) -async-timeout==5.0.1 - # via aiohttp +anyio==4.12.1 + # via gql +asttokens==3.0.1 + # via stack-data attrs==24.2.0 # via aiohttp avro==1.11.3 @@ -29,7 +29,7 @@ azure-core==1.32.0 # azure-storage-blob # azure-storage-file-datalake # msrest -azure-identity==1.19.0 +azure-identity==1.25.1 # via hail azure-mgmt-core==1.5.0 # via azure-mgmt-storage @@ -42,6 +42,10 @@ azure-storage-blob==12.24.0 # hail azure-storage-file-datalake==12.18.0 # via cloudpathlib +backoff==2.2.1 + # via + # gql + # metamist bokeh==3.3.4 # via hail boto3==1.35.81 @@ -65,22 +69,32 @@ cffi==1.17.1 # via # cryptography # pycares +cfgv==3.5.0 + # via pre-commit charset-normalizer==3.4.0 # via requests click==8.1.7 - # via typer -cloudpathlib[all,azure,gs,s3]==0.20.0 + # via + # metamist + # typer +cloudpathlib==0.20.0 # via # analysis-runner # cpg-utils +comm==0.2.3 + # via ipywidgets commonmark==0.9.1 # via rich contourpy==1.3.1 # via bokeh -cpg-utils==5.2.0 +cpg-flow==1.3.0 + # via test-workflows-shared (pyproject.toml) +cpg-utils==5.4.3 # via - # analysis-runner # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow + # metamist cryptography==44.0.0 # via # azure-identity @@ -88,13 +102,21 @@ cryptography==44.0.0 # msal # pyjwt decorator==4.4.2 - # via hail + # via + # hail + # ipython deprecated==1.2.15 # via # cpg-utils # hail -dill==0.3.9 +dill==0.4.1 # via hail +distlib==0.4.0 + # via virtualenv +executing==2.2.1 + # via stack-data +filelock==3.20.3 + # via virtualenv frozendict==2.4.6 # via cpg-utils frozenlist==1.5.0 @@ -102,27 +124,33 @@ frozenlist==1.5.0 # aiohttp # aiosignal # hail -google-api-core[grpc]==2.24.0 +google-api-core==1.34.1 # via + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage + # metamist google-auth==2.37.0 # via # cpg-utils # google-api-core # google-auth-oauthlib + # google-cloud-artifact-registry # google-cloud-core # google-cloud-secret-manager # google-cloud-storage # hail + # metamist google-auth-oauthlib==0.8.0 # via hail +google-cloud-artifact-registry==1.19.0 + # via cpg-utils google-cloud-core==2.4.1 # via google-cloud-storage google-cloud-secret-manager==2.22.0 # via cpg-utils -google-cloud-storage==2.19.0 +google-cloud-storage==2.14.0 # via cloudpathlib google-crc32c==1.6.0 # via @@ -130,31 +158,51 @@ google-crc32c==1.6.0 # google-resumable-media google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos[grpc]==1.66.0 +googleapis-common-protos==1.66.0 # via # google-api-core # grpc-google-iam-v1 # grpcio-status -grpc-google-iam-v1==0.13.1 - # via google-cloud-secret-manager +gql==3.5.3 + # via metamist +graphql-core==3.2.6 + # via gql +grpc-google-iam-v1==0.14.3 + # via + # google-cloud-artifact-registry + # google-cloud-secret-manager grpcio==1.68.1 # via + # cpg-flow # google-api-core + # google-cloud-artifact-registry # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status grpcio-status==1.48.2 - # via google-api-core -hail==0.2.133 # via - # analysis-runner + # cpg-flow + # google-api-core +hail==0.2.137 + # via # test-workflows-shared (pyproject.toml) + # analysis-runner + # cpg-flow humanize==4.11.0 # via hail +identify==2.6.16 + # via pre-commit idna==3.10 # via + # anyio # requests # yarl +ipython==9.9.0 + # via ipywidgets +ipython-pygments-lexers==1.1.1 + # via ipython +ipywidgets==8.1.8 + # via cpg-flow isodate==0.7.2 # via # azure-storage-blob @@ -162,6 +210,8 @@ isodate==0.7.2 # msrest janus==1.0.0 # via hail +jedi==0.19.2 + # via ipython jinja2==3.1.5 # via bokeh jmespath==1.0.1 @@ -170,8 +220,18 @@ jmespath==1.0.1 # botocore jproperties==2.1.2 # via hail +jupyterlab-widgets==3.0.16 + # via ipywidgets +loguru==0.7.3 + # via + # test-workflows-shared (pyproject.toml) + # cpg-flow markupsafe==3.0.2 # via jinja2 +matplotlib-inline==0.2.1 + # via ipython +metamist==7.13.4 + # via cpg-flow msal==1.31.1 # via # azure-identity @@ -186,7 +246,11 @@ multidict==6.1.0 # yarl nest-asyncio==1.6.0 # via hail -numpy==1.26.4 +networkx==3.6.1 + # via cpg-flow +nodeenv==1.10.0 + # via pre-commit +numpy==2.4.1 # via # bokeh # contourpy @@ -207,29 +271,45 @@ pandas==2.2.3 # hail parsimonious==0.10.0 # via hail +parso==0.8.5 + # via jedi +pexpect==4.9.0 + # via ipython pillow==11.0.0 # via bokeh +platformdirs==4.5.1 + # via virtualenv plotly==5.24.1 - # via hail + # via + # cpg-flow + # hail portalocker==2.10.1 # via msal-extensions +pre-commit==4.5.1 + # via cpg-flow +prompt-toolkit==3.0.52 + # via ipython propcache==0.2.1 # via # aiohttp # yarl proto-plus==1.25.0 # via - # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager protobuf==3.20.2 # via # google-api-core + # google-cloud-artifact-registry # google-cloud-secret-manager # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status - # hail # proto-plus +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data py4j==0.10.9.7 # via pyspark pyasn1==0.6.1 @@ -243,16 +323,18 @@ pycares==4.5.0 pycparser==2.22 # via cffi pygments==2.18.0 - # via rich -pyjwt[crypto]==2.10.1 # via - # msal - # pyjwt + # ipython + # ipython-pygments-lexers + # rich +pyjwt==2.10.1 + # via msal pyspark==3.5.3 # via hail python-dateutil==2.9.0.post0 # via # botocore + # metamist # pandas python-json-logger==2.0.7 # via hail @@ -261,24 +343,31 @@ pytz==2024.2 pyyaml==6.0.2 # via # bokeh + # cpg-flow # hail + # pre-commit regex==2024.11.6 # via parsimonious -requests==2.32.3 +requests==2.32.5 # via # analysis-runner # azure-core # cpg-utils # google-api-core # google-cloud-storage + # gql # hail + # metamist # msal # msrest # requests-oauthlib + # requests-toolbelt requests-oauthlib==2.0.0 # via # google-auth-oauthlib # msrest +requests-toolbelt==1.0.0 + # via gql rich==12.6.0 # via # hail @@ -287,7 +376,7 @@ rsa==4.9 # via google-auth s3transfer==0.10.4 # via boto3 -scipy==1.11.4 +scipy==1.17.0 # via hail shellingham==1.5.4 # via typer @@ -298,40 +387,60 @@ six==1.17.0 # python-dateutil sortedcontainers==2.4.0 # via hail +stack-data==0.6.3 + # via ipython tabulate==0.9.0 # via # analysis-runner # cpg-utils # hail + # metamist tenacity==9.0.0 # via plotly toml==0.10.2 # via cpg-utils -tornado==6.4.2 - # via bokeh +tornado==6.5.4 + # via + # bokeh + # cpg-flow +traitlets==5.14.3 + # via + # ipython + # ipywidgets + # matplotlib-inline typer==0.15.1 # via hail typing-extensions==4.12.2 # via + # aiosignal + # anyio # azure-core # azure-identity # azure-storage-blob # azure-storage-file-datalake - # cloudpathlib + # ipython # janus - # multidict # typer tzdata==2024.2 # via pandas urllib3==2.2.3 # via # botocore + # metamist # requests uvloop==0.21.0 # via hail +virtualenv==20.36.1 + # via pre-commit +wcwidth==0.2.14 + # via prompt-toolkit +widgetsnbextension==4.0.15 + # via ipywidgets wrapt==1.17.0 # via deprecated xyzservices==2024.9.0 # via bokeh yarl==1.18.3 - # via aiohttp + # via + # aiohttp + # gql