forked from gerstung-lab/Delphi
-
Notifications
You must be signed in to change notification settings - Fork 0
Delphi cheatsheet
Rodrigo Bonazzola edited this page Jan 2, 2026
·
3 revisions
from delphi.model.transformer import (
Delphi,
EmbedConfig,
DelphiConfig,
)
attention_scheme_str = ...
attention_scheme = parse_attention_scheme(attention_scheme_str)
domains = "DOMAIN1,DOMAIN2,DOMAIN3,..."
domains = domains.split(",")
domain_config_yaml = "PATH/TO/DOMAIN_CONFIG/YAML"
default_cfg_per_domain = load_embed_config(domain_config_yaml, root_path / 'tokens')
domain_cfg = { k: default_cfg_per_domain[k] for k in default_cfg_per_domain for k in domains }
config = DelphiConfig(
n_embd=..., n_layer=...,
token_dropout=...,
domains=domain_cfg,
attention_scheme=attention_scheme
)
model = Delphi(config)The following is an exemplar YAML file for domain configuration:
diseases:
projector: embed
path: diseases
predict: true
death:
projector: embed
path: death
predict: true
cv_drugs:
projector: embed
path: cv_drugs
predict: true
ns_drugs:
projector: embed
path: ns_drugs
predict: true
lifestyle:
projector: embed
path: lifestyle
age_jitter: true
hla_alleles:
projector: embed
path: hla_alleles
at_birth: true
sex:
projector: embed
path: sex
at_birth: true
rare_variants:
projector: embed
path: rare_variants
at_birth: true
padding:
projector: embed# A function return lists of subject IDs
train_ids, val_ids, test_ids = ...
if subset_of_subjects is not None:
if isinstance(subset_of_subjects):
subset_of_subjects = pd.read_csv(subset_of_subjects, header=None)[0].tolist()
elif isinstance(subset_of_subjects, list):
pass
train_ids = list( set(train_ids) & set(subset_of_subjects) )
val_ids = list( set(val_ids) & set(subset_of_subjects) )
test_ids = list( set(test_ids) & set(subset_of_subjects) )
dataset_config = dict(root=root_path, domains=domain_cfg, exclusions=[], required_domains=["diseases"])
train_dataset = DelphiDataset(subjects=train_ids, **dataset_config).to(DEVICE)
valid_dataset = DelphiDataset(subjects=val_ids, **dataset_config).to(DEVICE)
test_dataset = DelphiDataset(subjects=test_ids, **dataset_config).to(DEVICE)
dataloaders = [
DelphiDataloader(d, batch_size=[batch_size, batch_size, batch_size][i])
for i, d in enumerate([train_dataset, valid_dataset, test_dataset])
]