From 355696fce8a078fa303feb37c021a124dc4450bd Mon Sep 17 00:00:00 2001 From: Jackie Goldstein Date: Mon, 10 Nov 2025 15:10:14 -0500 Subject: [PATCH 1/2] Added persist and unpersist in union cols --- .../glimpse_hail_batch/imputation/jobs.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py index 666777c..eeb4943 100644 --- a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py +++ b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py @@ -489,6 +489,8 @@ def add_info_if_needed(mt): mt_init = hl.read_matrix_table(paths[0]) intervals = mt_init._calculate_new_partitions(n_partitions) + last_mt_left = None + mt_left = hl.read_matrix_table(paths[0], _intervals=intervals) mt_left = add_info_if_needed(mt_left) mt_left = mt_left.annotate_rows( @@ -510,6 +512,12 @@ def add_info_if_needed(mt): drop_right_row_fields=False, row_join_type='outer') + if idx % 5 == 0: + mt_left = mt_left.persist() + if last_mt_left is not None: + last_mt_left.unpersist() + last_mt_left = mt_left + mt = mt_left n_batches = len(paths) From c8835f8bbf02373a2bd2f29028311a36c1b903ba Mon Sep 17 00:00:00 2001 From: Jackie Goldstein Date: Mon, 10 Nov 2025 15:11:54 -0500 Subject: [PATCH 2/2] increase n partitions --- .../glimpse_hail_batch/imputation/imputation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py index 38a6747..08a0c98 100644 --- a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py +++ b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py @@ -336,7 +336,7 @@ async def impute(args: dict): batch_regions, args['use_checkpoints'], contig, - len(chunks)) + len(chunks) * 5) if union_j is not None: union_j.depends_on(*union_ligate_input_jobs.get(contig, []))