diff --git a/config/demog/ArchIE_4D19.yaml b/config/demog/ArchIE_4D19.yaml deleted file mode 100644 index d84a02e..0000000 --- a/config/demog/ArchIE_4D19.yaml +++ /dev/null @@ -1,29 +0,0 @@ -doi: - - https://doi.org/10.1371/journal.pgen.1008175 -time_units: generations -demes: - - name: Anc - epochs: - - {end_time: 2500, start_size: 10000} - - name: Ghost - ancestors: [Anc] - start_time: 12000 - epochs: # https://github.com/sriramlab/ArchIE/blob/master/simulations/ms.sh#L7C48-L7C179 - - {end_time: 6120, start_size: 10000} - - {end_time: 6000, start_size: 100} - - {end_time: 0, start_size: 10000} - - name: Ref - ancestors: [Anc] - epochs: - - {end_time: 0, start_size: 10000} - - name: Tgt - ancestors: [Anc] - epochs: - - {end_time: 0, start_size: 10000} - - name: Unused - ancestors: [Ghost] - start_time: 7000 - epochs: - - {end_time: 0, start_size: 10000} -pulses: - - {sources: [Ghost], dest: Tgt, time: 2000, proportions: [0.02]} diff --git a/config/demog/HumanNeanderthal_3G21.yaml b/config/demog/HumanNeanderthal_3G21.yaml deleted file mode 100644 index 112c054..0000000 --- a/config/demog/HumanNeanderthal_3G21.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Human-Neantherthal introgression model -description: - A composite hominin model with demographic parameters taken from - Gower et al. (2021). - -time_units: generations - -doi: -- https://doi.org/10.7554/eLife.64669 - -demes: - - name: Anc - epochs: - - {end_time: 2265.5172413793102, start_size: 18500.0} - - name: Nea - ancestors: [Anc] - start_time: 18965.51724137931 - epochs: - - {end_time: 0, start_size: 3400.0} - - name: CEU - ancestors: [Anc] - epochs: - - {end_time: 1100.0, start_size: 1080.0} - - {start_size: 1450.0, end_size: 13377.357734109575} - - name: YRI - ancestors: [Anc] - epochs: - - {end_time: 0, start_size: 27000.0} -pulses: - - {sources: [Nea], dest: CEU, time: 1896.551724137931, proportions: [0.0225]} diff --git a/config/features/archie.features.yaml b/config/features/archie.features.yaml deleted file mode 100644 index 7dd97a1..0000000 --- a/config/features/archie.features.yaml +++ /dev/null @@ -1,16 +0,0 @@ -features: - reference distances: - minimum: - target distances: - all: - mean: - variance: - skew: - kurtosis: - individual allele frequency spectra: - number of private mutations: - sstar: - genotype distance: 'archie' - match bonus: 5000 - max mismatch: 5 - mismatch penalty: -10000 diff --git a/config/scenarios/config1.yaml b/config/scenarios/config1.yaml deleted file mode 100644 index 1fd4140..0000000 --- a/config/scenarios/config1.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 1000000 -nref: 108 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config1" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "HumanNeanderthal_3G21" - test: "HumanNeanderthal_3G21" - -demes: - train: "config/demog/HumanNeanderthal_3G21.yaml" - test: "config/demog/HumanNeanderthal_3G21.yaml" - -mut_rate: - train: 1.29e-8 - test: 1.29e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "YRI" - test: "YRI" - -tgt_id: - train: "CEU" - test: "CEU" - -src_id: - train: "Nea" - test: "Nea" - -win_step: 10000 -cutoff_num: 20 diff --git a/config/scenarios/config2.yaml b/config/scenarios/config2.yaml deleted file mode 100644 index 76fbf38..0000000 --- a/config/scenarios/config2.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 1000000 -nref: 50 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config2" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "ArchIE_4D19" - test: "HumanNeanderthal_3G21" - -demes: - train: "config/demog/ArchIE_4D19.yaml" - test: "config/demog/HumanNeanderthal_3G21.yaml" - -mut_rate: - train: 1.25e-8 - test: 1.29e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "Ref" - test: "YRI" - -tgt_id: - train: "Tgt" - test: "CEU" - -src_id: - train: "Ghost" - test: "Nea" - -win_step: 10000 -cutoff_num: 20 diff --git a/config/scenarios/config3.yaml b/config/scenarios/config3.yaml deleted file mode 100644 index b06fef3..0000000 --- a/config/scenarios/config3.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 1000000 -nref: 50 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config3" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "HumanNeanderthal_3G21" - test: "HumanNeanderthal_3G21" - -demes: - train: "config/demog/HumanNeanderthal_3G21.yaml" - test: "config/demog/HumanNeanderthal_3G21.yaml" - -mut_rate: - train: 1.29e-8 - test: 1.29e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "YRI" - test: "YRI" - -tgt_id: - train: "CEU" - test: "CEU" - -src_id: - train: "Nea" - test: "Nea" - -win_step: 10000 -cutoff_num: 20 diff --git a/config/scenarios/config4.yaml b/config/scenarios/config4.yaml deleted file mode 100644 index f3c437a..0000000 --- a/config/scenarios/config4.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 1000000 -nref: 50 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config4" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "ArchIE_4D19" - test: "ArchIE_4D19" - -demes: - train: "config/demog/ArchIE_4D19.yaml" - test: "config/demog/ArchIE_4D19.yaml" - -mut_rate: - train: 1.25e-8 - test: 1.25e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "Ref" - test: "Ref" - -tgt_id: - train: "Tgt" - test: "Tgt" - -src_id: - train: "Ghost" - test: "Ghost" - -win_step: 10000 -cutoff_num: 20 diff --git a/config/scenarios/config5.yaml b/config/scenarios/config5.yaml deleted file mode 100644 index 2310cae..0000000 --- a/config/scenarios/config5.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 1000000 -nref: 50 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config5" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "HumanNeanderthal_3G21" - test: "ArchIE_4D19" - -demes: - train: "config/demog/HumanNeanderthal_3G21.yaml" - test: "config/demog/ArchIE_4D19.yaml" - -mut_rate: - train: 1.29e-8 - test: 1.25e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "YRI" - test: "Ref" - -tgt_id: - train: "CEU" - test: "Tgt" - -src_id: - train: "Nea" - test: "Ghost" - -win_step: 10000 -cutoff_num: 20 diff --git a/config/scenarios/config6.yaml b/config/scenarios/config6.yaml deleted file mode 100644 index a18cecf..0000000 --- a/config/scenarios/config6.yaml +++ /dev/null @@ -1,48 +0,0 @@ -seed: 12345 -feature_id: "archie" -feature_config: "config/features/archie.features.yaml" -nfeature: 10000000 -nref: 50 -ntgt: 50 -ploidy: 2 -geno_states: ["phased", "unphased"] -output_prefix: "gita.lr.archie.config6" - -nrep: - train: 10 - test: 10 - -seq_len: - train: 50000 - test: 200000000 - -demog_id: - train: "ArchIE_4D19" - test: "ArchIE_4D19" - -demes: - train: "config/demog/ArchIE_4D19.yaml" - test: "config/demog/ArchIE_4D19.yaml" - -mut_rate: - train: 1.25e-8 - test: 1.25e-8 - -rec_rate: - train: 1e-8 - test: 1e-8 - -ref_id: - train: "Ref" - test: "Ref" - -tgt_id: - train: "Tgt" - test: "Tgt" - -src_id: - train: "Ghost" - test: "Ghost" - -win_step: 10000 -cutoff_num: 20 diff --git a/workflow/envs/sai.yaml b/workflow/envs/sai.yaml new file mode 100644 index 0000000..dd8af4d --- /dev/null +++ b/workflow/envs/sai.yaml @@ -0,0 +1,13 @@ +name: sai +channels: + - conda-forge + - bioconda +dependencies: + - natsort=8.4.0 + - numpy=1.26.4 + - pandas=2.2.1 + - pydantic=2.11.7 + - pysam=0.23.0 + - python=3.9.19 + - scikit-allel=1.3.7 + - scipy=1.12.0 diff --git a/workflow/rules/commons/plot.smk b/workflow/rules/commons/plot.smk deleted file mode 100644 index db10dba..0000000 --- a/workflow/rules/commons/plot.smk +++ /dev/null @@ -1,11 +0,0 @@ -rule plot: - input: - performance = rules.summary.output.summary, - output: - png = "results/plots/{output_prefix}.performance.png", - log: - "logs/plot/{output_prefix}.log", - resources: - partition = "himem,gpu", - script: - "../scripts/plot.py" diff --git a/workflow/rules/commons/simulation.smk b/workflow/rules/commons/simulation.smk deleted file mode 100644 index 1676f7f..0000000 --- a/workflow/rules/commons/simulation.smk +++ /dev/null @@ -1,131 +0,0 @@ -rule simulate_test_data: - input: - demes_file = demes["test"], - output: - ts = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.ts", - vcf = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.vcf", - ref = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.ref.ind.list", - tgt = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.tgt.ind.list", - log: - "logs/simulate_test_data/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - params: - ploidy = ploidy, - seq_len = seq_len["test"], - mut_rate = mut_rate["test"], - rec_rate = rec_rate["test"], - ref_id = ref_id["test"], - tgt_id = tgt_id["test"], - src_id = src_id["test"], - script: - "../scripts/simulation.py" - - -rule compress_vcf: - input: - vcf = rules.simulate_test_data.output.vcf, - output: - vcf = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.vcf.gz", - log: - "logs/compress_vcf/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - shell: - """ - bgzip -c {input.vcf} > {output.vcf} 2> {log} - tabix -p vcf {output.vcf} 2>> {log} - rm {input.vcf} 2>> {log} - """ - - -rule get_phased_true_tracts: - input: - ts = rules.simulate_test_data.output.ts, - output: - bed = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.phased.true.tracts.bed", - log: - "logs/get_phased_true_tracts/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.log", - params: - ploidy = ploidy, - tgt_id = tgt_id["test"], - src_id = src_id["test"], - resources: - partition = "himem", - time = 60, - mem_gb = 2000, - cpus = 128, - run: - import tskit - import pyranges as pr - from multiprocessing import Process, Manager - - def worker_func(in_queue, out_queue, **kwargs): - while True: - trees, migration = in_queue.get() - tgt_name = kwargs['tgt_name'] - ts = kwargs['ts'] - ploidy = kwargs['ploidy'] - - res = '' - - try: - for t in trees: - for n in ts.samples(tgt_name): - if t.is_descendant(n, migration.node): - left = migration.left if migration.left > t.interval.left else t.interval.left - right = migration.right if migration.right < t.interval.right else t.interval.right - res += f'1\t{int(left)}\t{int(right)}\ttsk_{ts.node(n).individual}_{int(n%ploidy+1)}\n' - out_queue.put(res) - except Exception as e: - # Handle or log the exception as needed - print(f"Error in worker: {e}") - - ts = tskit.load(input.ts) - - src_name = [p.id for p in ts.populations() if p.metadata['name']==params.src_id][0] - tgt_name = [p.id for p in ts.populations() if p.metadata['name']==params.tgt_id][0] - - res = "Chromosome\tStart\tEnd\tSample\n" - with Manager() as manager: - in_queue = manager.Queue() - out_queue = manager.Queue() - keywords = {'tgt_name': tgt_name, 'ts': ts, 'ploidy': params.ploidy} - workers = [ - Process(target=worker_func, args=(in_queue, out_queue), kwargs=keywords) for i in range(resources.cpus) - ] - - num_introgression = 0 - - for m in ts.migrations(): - if (m.dest==src_name) and (m.source==tgt_name): - in_queue.put((ts.trees(left=m.left, right=m.right), m)) - num_introgression += 1 - - for w in workers: w.start() - - try: - for i in range(num_introgression): - item = out_queue.get() - res += item - for w in workers: w.terminate() - except Exception as e: - print(f"Error in manager: {e}") - - res = pr.from_string(res) - res = res.merge(strand=False, by='Sample') - if not res.empty: res.to_csv(output.bed, sep="\t", header=False) - else: open(output.bed, 'w').close() - - -rule get_unphased_true_tracts: - input: - bed = rules.get_phased_true_tracts.output.bed, - output: - bed = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.unphased.true.tracts.bed", - log: - "logs/get_unphased_true_tracts/{test_demog}/nref_{nref}/ntgt_{ntgt}/{test_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - script: - "../scripts/get_unphased_true_tracts.py" diff --git a/workflow/rules/methods/lr/apply.gita.lr.smk b/workflow/rules/methods/lr/apply.gita.lr.smk deleted file mode 100644 index 0a00386..0000000 --- a/workflow/rules/methods/lr/apply.gita.lr.smk +++ /dev/null @@ -1,2 +0,0 @@ -rule process_real_data: -rule apply_logistic_regression_model: diff --git a/workflow/rules/methods/lr/test.gita.lr.smk b/workflow/rules/methods/lr/test.gita.lr.smk deleted file mode 100644 index 9985c1c..0000000 --- a/workflow/rules/methods/lr/test.gita.lr.smk +++ /dev/null @@ -1,93 +0,0 @@ -rule process_test_data: - input: - vcf = rules.compress_vcf.output.vcf, - ref = rules.simulate_test_data.output.ref, - tgt = rules.simulate_test_data.output.tgt, - feature_config = feature_config, - output: - features = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{test_seed}/{output_prefix}.features.gz", - params: - seq_len = seq_len["train"], - win_step = win_step, - is_phased = lambda wildcards: '--phased' if wildcards.geno_state == 'phased' else '', - output_dir = os.path.join(output_dir["test"], "{geno_state}/{test_seed}"), - features = "results/data/test/{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{test_seed}/{output_prefix}.features", - log: - "logs/process_test_data/{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{test_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - mem_gb = 32, - cpus = 16, - shell: - """ - gita lr preprocess --vcf {input.vcf} --ref {input.ref} --tgt {input.tgt} --features {input.feature_config} \ - --win-len {params.seq_len} --win-step {params.win_step} --output-prefix {wildcards.output_prefix} --output-dir {params.output_dir} \ - --worker {resources.cpus} {params.is_phased} - gzip -c {params.features} > {output.features} 2> {log} - rm {params.features} 2>> {log} - """ - - -rule test_logistic_regression_model: - input: - features = rules.process_test_data.output.features, - model_file = rules.train_logistic_regression_model.output.model_file, - output: - pred = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.pred.gz", - params: - pred = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.pred", - output_dir = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}", - log: - "logs/test_logistic_regression_model/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - mem_gb = 100, - shell: - """ - gita lr infer --features {input.features} --model-file {input.model_file} --output-prefix {output_prefix} --output-dir {params.output_dir} - gzip -c {params.pred} > {output.pred} 2> {log} - rm {params.pred} 2>> {log} - """ - - -rule evaluate_logistic_regression_model: - input: - pred = rules.test_logistic_regression_model.output.pred, - true_tracts = lambda wildcards: rules.get_phased_true_tracts.output.bed if wildcards.geno_state == 'phased' else rules.get_unphased_true_tracts.output.bed, - output: - inferred_tracts = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.{cutoff}.inferred.tracts.bed", - performance = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.{cutoff}.performance", - summary = "results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.{cutoff}.performance.summary", - params: - feature_id = feature_id, - log: - "logs/evaluate_logistic_regression_model/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.{cutoff}.log", - resources: - partition = "himem,gpu", - shell: - """ - zcat {input.pred} | sed '1d' | awk '$NF>{wildcards.cutoff}' | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$3,$4}}' > {output.inferred_tracts} 2>> {log} - gita eval --true-tracts {input.true_tracts} --inferred-tracts {output.inferred_tracts} --output {output.performance} 2>> {log} - grep -w Summary {output.performance} | awk -v train_demog={wildcards.train_demog} -v test_demog={wildcards.test_demog} \ - -v geno_state={wildcards.geno_state} -v nref={wildcards.nref} -v ntgt={wildcards.ntgt} \ - -v train_seed={wildcards.train_seed} -v test_seed={wildcards.test_seed} \ - -v feature={params.feature_id} -v cutoff={wildcards.cutoff} \ - 'BEGIN{{OFS="\\t"}}{{print train_demog,test_demog,geno_state,nref,ntgt,train_seed,test_seed,feature,cutoff,$2,$3,$4,$5,$6}}' > {output.summary} 2>> {log} - """ - - -rule summary: - input: - summaries = expand("results/performance/train_{train_demog}_test_{test_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/train_{train_seed}_test_{test_seed}/{output_prefix}.lr.{cutoff}.performance.summary", - train_demog=train_demog, test_demog=test_demog, nref=nref, ntgt=ntgt, geno_state=geno_state_list, output_prefix=output_prefix, train_seed=seed_list["train"], test_seed=seed_list["test"], cutoff=cutoff_list), - output: - summary = "results/performance/{output_prefix}.performance.summary", - log: - "logs/summary/{output_prefix}.log", - resources: - partition = "himem,gpu", - mem_gb = 16, - shell: - """ - cat {input.summaries} | sed '1iTraining Data Demography\\tTest Data Demography\\tGenotype\\tNref\\tNtgt\\tTraining Seed\\tTest Seed\\tFeature\\tCutoff\\tPrecision\\tRecall\\tTrue_tracts_length\\tInferred_tracts_length\\tOverlaps_length' > {output.summary} 2>> {log} - """ diff --git a/workflow/rules/methods/lr/train.gita.lr.smk b/workflow/rules/methods/lr/train.gita.lr.smk deleted file mode 100644 index 3b120aa..0000000 --- a/workflow/rules/methods/lr/train.gita.lr.smk +++ /dev/null @@ -1,49 +0,0 @@ -rule simulate_training_data: - input: - demes_file = demes["train"], - features_config = feature_config, - output: - features = "results/data/train/{train_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{train_seed}/{output_prefix}.all.labeled.features", - params: - nref = nref, - ntgt = ntgt, - nfeature = nfeature, - ref_id = ref_id["train"], - tgt_id = tgt_id["train"], - src_id = src_id["train"], - mut_rate = mut_rate["train"], - rec_rate = rec_rate["train"], - seq_len = seq_len["train"], - is_phased = lambda wildcards: '--phased' if wildcards.geno_state == 'phased' else '', - output_dir = os.path.join(output_dir["train"], "{geno_state}/{train_seed}"), - log: - "logs/simulate_training_data/{train_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{train_seed}/{output_prefix}.log", - resources: - cpus = 16, - mem_gb = 16, - time = 120, - shell: - """ - gita lr simulate --demes {input.demes_file} --nref {params.nref} --ntgt {params.ntgt} \ - --ref-id {params.ref_id} --tgt-id {params.tgt_id} --src-id {params.src_id} \ - --mut-rate {params.mut_rate} --rec-rate {params.rec_rate} --seq-len {params.seq_len} \ - --output-prefix {wildcards.output_prefix} --output-dir {params.output_dir} \ - --seed {wildcards.train_seed} --replicate {resources.cpus} --worker {resources.cpus} \ - --feature-config {input.features_config} --nfeature {params.nfeature} {params.is_phased} 2>> {log} - """ - - -rule train_logistic_regression_model: - input: - features = rules.simulate_training_data.output.features - output: - model_file = "results/data/train/{train_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{train_seed}/{output_prefix}.lr.model", - log: - "logs/train_logistic_regression_model/{train_demog}/nref_{nref}/ntgt_{ntgt}/{geno_state}/{train_seed}/{output_prefix}.log", - resources: - partition = "himem,gpu", - mem_gb = 16, - shell: - """ - gita lr train --training-data {input.features} --model-file {output.model_file} --seed {wildcards.train_seed} 2>> {log} - """ diff --git a/workflow/rules/methods/unet/test.gita.unet.smk b/workflow/rules/methods/unet/test.gita.unet.smk deleted file mode 100644 index e69de29..0000000 diff --git a/workflow/rules/methods/unet/train.gita.unet.smk b/workflow/rules/methods/unet/train.gita.unet.smk deleted file mode 100644 index e69de29..0000000 diff --git a/workflow/rules/methods/unet/apply.gita.unet.smk b/workflow/rules/sai.smk similarity index 100% rename from workflow/rules/methods/unet/apply.gita.unet.smk rename to workflow/rules/sai.smk