Hello,
Thank you and your team for developing such a convenient tool!
I’m new to momi2 and have just run my first analysis of four wild plant populations to infer their divergence history.
I’m fourced on the divergence times among four groups, whose relationships might be either:
(W1, (W2, (W3, W4))) or (W2, (W1, (W3, W4)))
First I tried run this---(W1, (W2, (W3, W4)))
Below is the Python script I used:
#!/usr/bin/env python3
import momi
import logging
import numpy as np
import matplotlib
from matplotlib.backends.backend_pdf import PdfPages
logging.basicConfig(level=logging.INFO, filename="momi2.log")
sfs = momi.Sfs.load("four_wild.sfs.gz")
model = momi.DemographicModel(N_e=10000, gen_time=3.0, muts_per_gen=1.75e-8)
model.set_data(sfs, length=154495500)
1. Add leaves
model.add_leaf("W1", N="N_W1")
model.add_leaf("W2", N="N_W2")
model.add_leaf("W3", N="N_W3")
model.add_leaf("W4", N="N_W4")
2. Split events (in coalescent order)
model.move_lineages("W4", "W3", t="t_W4_W3", N="N_W4_W3")
model.move_lineages("W3", "W2", t="t_W3_W2", N="N_W3_W2")
model.move_lineages("W2", "W1", t="t_W2_W1", N="N_W2_W1")
3. Time parameters
model.add_time_param("t_W4_W3", lower=1000, upper=10000)
model.add_time_param("t_W3_W2", lower=10000, upper=200000)
model.add_time_param("t_W2_W1", lower=200000, upper=500000)
4. Ne parameters
base_model.add_size_param("N_W1", lower=1e3, upper=1e6)
base_model.add_size_param("N_W2", lower=1e3, upper=1e6)
base_model.add_size_param("N_W3", lower=1e3, upper=1e6)
base_model.add_size_param("N_W4", lower=1e3, upper=1e6)
base_model.add_size_param("N_W4_W3", lower=1e3, upper=1e6)
base_model.add_size_param("N_W3_W2", lower=1e3, upper=1e6)
base_model.add_size_param("N_W2_W1", lower=1e3, upper=1e6)
5. Fit and bootstrap
best_ll = -np.inf
best_params = None
initial fit
model0 = model.copy()
model0.set_params(model.get_params(), randomize=True)
res = model0.optimize(method="TNC", options={"maxiter":5000, "ftol":1e-8})
best_ll = model0.log_likelihood()
best_params = res.parameters
print("Initial fit log‐likelihood:", best_ll)
print("Best parameters:", best_params)
bootstrap
n_bootstraps = 20
bootstrap_results = []
for i in range(n_bootstraps):
bs_sfs = sfs.resample()
m_bs = model.copy()
m_bs.set_data(bs_sfs, length=154495500)
m_bs.set_params(best_params, randomize=True)
m_bs.optimize(method="TNC", options={"maxiter":5000, "ftol":1e-8})
bootstrap_results.append(m_bs.get_params())
print(f"Bootstrap {i+1}/{n_bootstraps} done")
plot
yticks = [1e3,1e4,5e4,1e5,2e5,5e5]
fig = momi.DemographyPlot(model0, ["W1","W2","W3","W4"],
figsize=(6,8), linthreshy=5e4, major_yticks=yticks)
for params in bootstrap_results:
fig.add_bootstrap(params, alpha=1/n_bootstraps)
pdf = PdfPages("momi2_Ficus_model.pdf")
fig.draw()
pdf.savefig()
pdf.close()
Everything runs without error, but I have a few questions:
-
Is my analysis workflow correct?
-
I notice that the estimated divergence times are very very close to the lower bounds I set in model.add_time_param(). Does this mean the results are overly dependent on the parameter ranges?
-
How can I choice the correct evolutionary relationship? Can I run momi2 for (W1, (W2, (W3, W4))) and (W2, (W1, (W3, W4))) respectively by the same parameter settings and then compare them based on likelihood and AIC?
-
How can I compute AIC for my model (both the raw AIC and bootstrap ± SD)? I’ve looked at confidence_region.py, but I’m not sure how to apply it for AIC calculation. What steps or functions would you recommend to obtain AIC and bootstrap AIC statistics?
Thank you very much for your time and help!
Bruce
Hello,
Thank you and your team for developing such a convenient tool!
I’m new to momi2 and have just run my first analysis of four wild plant populations to infer their divergence history.
I’m fourced on the divergence times among four groups, whose relationships might be either:
(W1, (W2, (W3, W4))) or (W2, (W1, (W3, W4)))
First I tried run this---(W1, (W2, (W3, W4)))
Below is the Python script I used:
#!/usr/bin/env python3
import momi
import logging
import numpy as np
import matplotlib
from matplotlib.backends.backend_pdf import PdfPages
logging.basicConfig(level=logging.INFO, filename="momi2.log")
sfs = momi.Sfs.load("four_wild.sfs.gz")
model = momi.DemographicModel(N_e=10000, gen_time=3.0, muts_per_gen=1.75e-8)
model.set_data(sfs, length=154495500)
1. Add leaves
model.add_leaf("W1", N="N_W1")
model.add_leaf("W2", N="N_W2")
model.add_leaf("W3", N="N_W3")
model.add_leaf("W4", N="N_W4")
2. Split events (in coalescent order)
model.move_lineages("W4", "W3", t="t_W4_W3", N="N_W4_W3")
model.move_lineages("W3", "W2", t="t_W3_W2", N="N_W3_W2")
model.move_lineages("W2", "W1", t="t_W2_W1", N="N_W2_W1")
3. Time parameters
model.add_time_param("t_W4_W3", lower=1000, upper=10000)
model.add_time_param("t_W3_W2", lower=10000, upper=200000)
model.add_time_param("t_W2_W1", lower=200000, upper=500000)
4. Ne parameters
base_model.add_size_param("N_W1", lower=1e3, upper=1e6)
base_model.add_size_param("N_W2", lower=1e3, upper=1e6)
base_model.add_size_param("N_W3", lower=1e3, upper=1e6)
base_model.add_size_param("N_W4", lower=1e3, upper=1e6)
base_model.add_size_param("N_W4_W3", lower=1e3, upper=1e6)
base_model.add_size_param("N_W3_W2", lower=1e3, upper=1e6)
base_model.add_size_param("N_W2_W1", lower=1e3, upper=1e6)
5. Fit and bootstrap
best_ll = -np.inf
best_params = None
initial fit
model0 = model.copy()
model0.set_params(model.get_params(), randomize=True)
res = model0.optimize(method="TNC", options={"maxiter":5000, "ftol":1e-8})
best_ll = model0.log_likelihood()
best_params = res.parameters
print("Initial fit log‐likelihood:", best_ll)
print("Best parameters:", best_params)
bootstrap
n_bootstraps = 20
bootstrap_results = []
for i in range(n_bootstraps):
bs_sfs = sfs.resample()
m_bs = model.copy()
m_bs.set_data(bs_sfs, length=154495500)
m_bs.set_params(best_params, randomize=True)
m_bs.optimize(method="TNC", options={"maxiter":5000, "ftol":1e-8})
bootstrap_results.append(m_bs.get_params())
print(f"Bootstrap {i+1}/{n_bootstraps} done")
plot
yticks = [1e3,1e4,5e4,1e5,2e5,5e5]
fig = momi.DemographyPlot(model0, ["W1","W2","W3","W4"],
figsize=(6,8), linthreshy=5e4, major_yticks=yticks)
for params in bootstrap_results:
fig.add_bootstrap(params, alpha=1/n_bootstraps)
pdf = PdfPages("momi2_Ficus_model.pdf")
fig.draw()
pdf.savefig()
pdf.close()
Everything runs without error, but I have a few questions:
Is my analysis workflow correct?
I notice that the estimated divergence times are very very close to the lower bounds I set in model.add_time_param(). Does this mean the results are overly dependent on the parameter ranges?
How can I choice the correct evolutionary relationship? Can I run momi2 for (W1, (W2, (W3, W4))) and (W2, (W1, (W3, W4))) respectively by the same parameter settings and then compare them based on likelihood and AIC?
How can I compute AIC for my model (both the raw AIC and bootstrap ± SD)? I’ve looked at confidence_region.py, but I’m not sure how to apply it for AIC calculation. What steps or functions would you recommend to obtain AIC and bootstrap AIC statistics?
Thank you very much for your time and help!
Bruce