File tree Expand file tree Collapse file tree 1 file changed +17
-1
lines changed
data-pipeline/src/data_pipeline/datasets/gnomad_v2 Expand file tree Collapse file tree 1 file changed +17
-1
lines changed Original file line number Diff line number Diff line change 11import hail as hl
22
33
4- def prepare_gnomad_regional_missense_constraint (path ):
4+ def prepare_gnomad_regional_missense_constraint (path , liftover = False ):
55 ds = hl .read_table (path )
66
77 # rename key field transcript_id to transcript to allow merging in genes pipeline
@@ -25,6 +25,22 @@ def prepare_gnomad_regional_missense_constraint(path):
2525 p_value = ds_with_rmc .regions .p ,
2626 ),
2727 )
28+
29+ if liftover :
30+ rg37 = hl .get_reference ("GRCh37" )
31+ rg38 = hl .get_reference ("GRCh38" )
32+
33+ if not rg37 .has_liftover (rg38 ):
34+ chain_file_path = "gs://hail-common-references/grch38_to_grch38.over.chain.gz"
35+ rg37 .add_liftover (chain_file_path , rg38 )
36+
37+ ds_with_rmc = ds_with_rmc .annotate (
38+ start = hl .liftover (ds_with_rmc .start , "GRCh38" ),
39+ stop = hl .liftover (ds_with_rmc .stop , "GRCh38" ),
40+ start_grch37 = ds_with_rmc .start ,
41+ stop_grch38 = ds_with_rmc .stop ,
42+ )
43+
2844 ds_with_rmc = ds_with_rmc .group_by ("transcript_id" ).aggregate (regions = hl .agg .collect (ds_with_rmc .row_value ).regions )
2945
3046 ds_with_rmc = ds_with_rmc .group_by ("transcript_id" ).aggregate (regions_array = hl .agg .collect (ds_with_rmc .row_value ))
You can’t perform that action at this time.
0 commit comments