diff --git a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py index 38a6747..08a0c98 100644 --- a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py +++ b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/imputation.py @@ -336,7 +336,7 @@ async def impute(args: dict): batch_regions, args['use_checkpoints'], contig, - len(chunks)) + len(chunks) * 5) if union_j is not None: union_j.depends_on(*union_ligate_input_jobs.get(contig, [])) diff --git a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py index 666777c..eeb4943 100644 --- a/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py +++ b/imputation/glimpse_hail_batch/glimpse_hail_batch/imputation/jobs.py @@ -489,6 +489,8 @@ def add_info_if_needed(mt): mt_init = hl.read_matrix_table(paths[0]) intervals = mt_init._calculate_new_partitions(n_partitions) + last_mt_left = None + mt_left = hl.read_matrix_table(paths[0], _intervals=intervals) mt_left = add_info_if_needed(mt_left) mt_left = mt_left.annotate_rows( @@ -510,6 +512,12 @@ def add_info_if_needed(mt): drop_right_row_fields=False, row_join_type='outer') + if idx % 5 == 0: + mt_left = mt_left.persist() + if last_mt_left is not None: + last_mt_left.unpersist() + last_mt_left = mt_left + mt = mt_left n_batches = len(paths)