Skip to content

Commit 8e5bcb2

Browse files
committed
feat(pipelines): annotate v2 rmc with liftover coords
1 parent 1343ab2 commit 8e5bcb2

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

data-pipeline/src/data_pipeline/datasets/gnomad_v2/gnomad_v2_regional_missense_constraint.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import hail as hl
22

33

4-
def prepare_gnomad_regional_missense_constraint(path):
4+
def prepare_gnomad_regional_missense_constraint(path, liftover=False):
55
ds = hl.read_table(path)
66

77
# rename key field transcript_id to transcript to allow merging in genes pipeline
@@ -25,6 +25,22 @@ def prepare_gnomad_regional_missense_constraint(path):
2525
p_value=ds_with_rmc.regions.p,
2626
),
2727
)
28+
29+
if liftover:
30+
rg37 = hl.get_reference("GRCh37")
31+
rg38 = hl.get_reference("GRCh38")
32+
33+
if not rg37.has_liftover(rg38):
34+
chain_file_path = "gs://hail-common-references/grch38_to_grch38.over.chain.gz"
35+
rg37.add_liftover(chain_file_path, rg38)
36+
37+
ds_with_rmc = ds_with_rmc.annotate(
38+
start=hl.liftover(ds_with_rmc.start, "GRCh38"),
39+
stop=hl.liftover(ds_with_rmc.stop, "GRCh38"),
40+
start_grch37=ds_with_rmc.start,
41+
stop_grch38=ds_with_rmc.stop,
42+
)
43+
2844
ds_with_rmc = ds_with_rmc.group_by("transcript_id").aggregate(regions=hl.agg.collect(ds_with_rmc.row_value).regions)
2945

3046
ds_with_rmc = ds_with_rmc.group_by("transcript_id").aggregate(regions_array=hl.agg.collect(ds_with_rmc.row_value))

0 commit comments

Comments
 (0)