Skip to content

Delphi cheatsheet

Rodrigo Bonazzola edited this page Jan 2, 2026 · 3 revisions

Building the model

from delphi.model.transformer import (
    Delphi,
    EmbedConfig,
    DelphiConfig,
)

attention_scheme_str = ...
attention_scheme = parse_attention_scheme(attention_scheme_str)

domains = "DOMAIN1,DOMAIN2,DOMAIN3,..."
domains = domains.split(",")

domain_config_yaml = "PATH/TO/DOMAIN_CONFIG/YAML"
default_cfg_per_domain = load_embed_config(domain_config_yaml, root_path / 'tokens')
domain_cfg = { k: default_cfg_per_domain[k] for k in default_cfg_per_domain for k in domains }
      
config = DelphiConfig( 
  n_embd=..., n_layer=..., 
  token_dropout=..., 
  domains=domain_cfg, 
  attention_scheme=attention_scheme
)

model = Delphi(config)

The following is an exemplar YAML file for domain configuration:

diseases:
  projector: embed
  path: diseases
  predict: true

death:
  projector: embed
  path: death
  predict: true

cv_drugs:
  projector: embed
  path: cv_drugs
  predict: true

ns_drugs:
  projector: embed
  path: ns_drugs
  predict: true

lifestyle:
  projector: embed
  path: lifestyle
  age_jitter: true

hla_alleles:
  projector: embed
  path: hla_alleles
  at_birth: true

sex:
  projector: embed
  path: sex
  at_birth: true

rare_variants:
  projector: embed
  path: rare_variants
  at_birth: true

padding:
  projector: embed

Loading the data

# A function return lists of subject IDs
train_ids, val_ids, test_ids = ...
    
if subset_of_subjects is not None:
    if isinstance(subset_of_subjects):
        subset_of_subjects = pd.read_csv(subset_of_subjects, header=None)[0].tolist()
    elif isinstance(subset_of_subjects, list):
        pass    
    train_ids   = list( set(train_ids) & set(subset_of_subjects) )
    val_ids     = list( set(val_ids)   & set(subset_of_subjects) )
    test_ids    = list( set(test_ids)  & set(subset_of_subjects) )

dataset_config = dict(root=root_path, domains=domain_cfg, exclusions=[], required_domains=["diseases"])
    
train_dataset = DelphiDataset(subjects=train_ids, **dataset_config).to(DEVICE)
valid_dataset = DelphiDataset(subjects=val_ids,   **dataset_config).to(DEVICE)
test_dataset  = DelphiDataset(subjects=test_ids,  **dataset_config).to(DEVICE)
    
dataloaders = [ 
    DelphiDataloader(d, batch_size=[batch_size, batch_size, batch_size][i]) 
    for i, d in enumerate([train_dataset, valid_dataset, test_dataset]) 
]

Training the model

Clone this wiki locally