Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 95 additions & 38 deletions samana/inference_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@ def compute_logfluxratio_summarystat(flux_ratios, measured_flux_ratios, measurem
def compute_fluxratio_logL(flux_ratios, measured_flux_ratios, measurement_uncertainties):
fr_logL = 0
S = 0
for i in range(0, len(measured_flux_ratios)):
if measurement_uncertainties[i] == -1:
continue
S += (flux_ratios[:, i] - measured_flux_ratios[i])**2
fr_logL += -0.5 * (flux_ratios[:, i] - measured_flux_ratios[i]) ** 2 / measurement_uncertainties[i] ** 2
return fr_logL, np.sqrt(S) / max(measured_flux_ratios)


#####
def compute_fluxratio_logL_cov(flux_ratios, measured_flux_ratios, measurement_uncertainties, keep_index_list):
S = 0

fr_logL = multivariate_normal.logpdf(flux_ratios, mean=measured_flux_ratios, cov=measurement_uncertainties) # shape (n_sets,)

for ind in keep_index_list:
S += (flux_ratios[:, ind] - measured_flux_ratios[ind])**2

return fr_logL, np.sqrt(S) / max(measured_flux_ratios)


##flux_ratios is an array of ixn i is the number of samples n is the number of flux ratios




for i in range(0, len(measured_flux_ratios)):
if measurement_uncertainties[i] == -1:
continue
Expand All @@ -51,10 +76,14 @@ def compute_fluxratio_logL(flux_ratios, measured_flux_ratios, measurement_uncert
return fr_logL, np.sqrt(S) / max(measured_flux_ratios)

def calculate_flux_ratio_likelihood(params, flux_ratios, measured_flux_ratios,
measurement_uncertainties):
measurement_uncertainties, keep_index_list=None):

params_out = deepcopy(params)
flux_ratio_logL, S_statistic = compute_fluxratio_logL(flux_ratios, measured_flux_ratios, measurement_uncertainties)
if measurement_uncertainties.ndim == 1:
flux_ratio_logL, S_statistic = compute_fluxratio_logL(flux_ratios, measured_flux_ratios, measurement_uncertainties)
if measurement_uncertainties.ndim == 2:
flux_ratio_logL, S_statistic = compute_fluxratio_logL_cov(flux_ratios, measured_flux_ratios, measurement_uncertainties, keep_index_list)

importance_weights = np.exp(flux_ratio_logL)
normalized_weights = importance_weights / np.max(importance_weights)
return params_out, normalized_weights, S_statistic
Expand Down Expand Up @@ -129,30 +158,56 @@ def compute_likelihoods(output_class,
joint_weights = np.array([])
accepted_seeds = np.array([])

for bootstrap_index in range(0, n_bootstraps+1):
##this doens't need to be in the for-loop I don't think.
param_ranges_dm = [param_ranges_dict[param_name] for param_name in param_names]
# first down-select on imaging data likelihood
logL_image_data = output_class.param_dict['logL_image_data']
use_imaging_boolean = False
if image_data_logL_sigma is not None:
assert percentile_cut_image_data is None, ('image_data_logL_sigma and percentile_cut_image_data should not '
'both be specified')
max_logL = np.max(logL_image_data)
logL_normalized_diff = (logL_image_data - max_logL) / image_data_logL_sigma
weights_image_data = np.exp(-0.5 * logL_normalized_diff ** 2)
use_imaging_boolean = True
elif percentile_cut_image_data is not None:
weights_image_data = np.zeros_like(logL_image_data)
inds_sorted = np.argsort(logL_image_data)
idx_cut = int(len(logL_image_data) * (100 - percentile_cut_image_data) / 100)
logL_cut = logL_image_data[inds_sorted][idx_cut]
weights_image_data[np.where(logL_image_data > logL_cut)] = 1
use_imaging_boolean = True
else:
weights_image_data = np.ones_like(logL_image_data)

param_ranges_dm = [param_ranges_dict[param_name] for param_name in param_names]
# first down-select on imaging data likelihood
logL_image_data = output_class.param_dict['logL_image_data']
if image_data_logL_sigma is not None:
assert percentile_cut_image_data is None, ('image_data_logL_sigma and percentile_cut_image_data should not '
'both be specified')
max_logL = np.max(logL_image_data)
logL_normalized_diff = (logL_image_data - max_logL) / image_data_logL_sigma
weights_image_data = np.exp(-0.5 * logL_normalized_diff ** 2)
elif percentile_cut_image_data is not None:
weights_image_data = np.zeros_like(logL_image_data)
inds_sorted = np.argsort(logL_image_data)
idx_cut = int(len(logL_image_data) * (100 - percentile_cut_image_data) / 100)
logL_cut = logL_image_data[inds_sorted][idx_cut]
weights_image_data[np.where(logL_image_data > logL_cut)] = 1

##we always need this
for i, parameter_name in enumerate(param_names):
if parameter_name in dm_param_names:
params[:, i] = np.squeeze(sim.parameter_array([parameter_name]))
else:
weights_image_data = np.ones_like(logL_image_data)
params[: ,i] = np.squeeze(sim.macromodel_parameter_array([parameter_name]))
# now we compute the imaging data likelihood only

# we only need this with imaging data.
pdf_imgdata = None

if use_imaging_boolean:
pdf_imgdata = DensitySamples(params,
param_names=param_names,
weights=weights_image_data,
param_ranges=param_ranges_dm,
**kwargs_kde)

for bootstrap_index in range(0, n_bootstraps+1):



if bootstrap_index == 0:
print('total samples: ', logL_image_data.shape[0])
if image_data_logL_sigma is not None:
print('effective sample size after imaging data likelihood: ', np.sum(weights_image_data)/(n_bootstraps+1))
if use_imaging_boolean:
print('total samples: ', logL_image_data.shape[0])
if image_data_logL_sigma is not None:
print('effective sample size after imaging data likelihood: ', np.sum(weights_image_data)/(n_bootstraps+1))
sim = deepcopy(output_class)
params = np.empty((sim.parameters.shape[0], len(param_names)))
random_seeds = np.squeeze(sim.parameter_array(['seed']))
Expand All @@ -163,17 +218,6 @@ def compute_likelihoods(output_class,
'summary_statistic', 'f_low', 'f_high', 'x_core_halo',
'log10_sigma_eff_mlow', 'log10_sigma_eff_8_mh', 'log10_subhalo_time_s',
]
for i, parameter_name in enumerate(param_names):
if parameter_name in dm_param_names:
params[:, i] = np.squeeze(sim.parameter_array([parameter_name]))
else:
params[: ,i] = np.squeeze(sim.macromodel_parameter_array([parameter_name]))
# now we compute the imaging data likelihood only
pdf_imgdata = DensitySamples(params,
param_names=param_names,
weights=weights_image_data,
param_ranges=param_ranges_dm,
**kwargs_kde)

# now compute the flux ratio likelihood
flux_ratios = sim.flux_ratios
Expand Down Expand Up @@ -202,24 +246,35 @@ def compute_likelihoods(output_class,
keep_index_list,
fluxes=fluxes,
S_statistic_tolerance=S_statistic_tolerance)
_joint_weights = flux_ratio_likelihood_weights * weights_image_data
_joint_weights=None
if use_imaging_boolean:
_joint_weights = flux_ratio_likelihood_weights * weights_image_data
else:
_joint_weights=flux_ratio_likelihood_weights

pdf_imgdata_fr = DensitySamples(_params_out,
param_names=param_names,
weights=_joint_weights/np.max(_joint_weights),
param_ranges=param_ranges_dm,
**kwargs_kde
)
# now compute the final pdf

if bootstrap_index == 0:
params_out = _params_out
joint_weights = _joint_weights
imaging_data_likelihood = IndependentLikelihoods([pdf_imgdata])
imaging_data_fluxratio_likelihood = IndependentLikelihoods([pdf_imgdata_fr])
if use_imaging_boolean:
imaging_data_likelihood = IndependentLikelihoods([pdf_imgdata])
imaging_data_fluxratio_likelihood = IndependentLikelihoods([pdf_imgdata_fr])
else:
imaging_data_likelihood = None
imaging_data_fluxratio_likelihood = IndependentLikelihoods([pdf_imgdata_fr])
S_statistic = _S_statistic
else:
params_out = np.vstack((params_out, _params_out))
joint_weights = np.append(joint_weights, _joint_weights)
imaging_data_likelihood += IndependentLikelihoods([pdf_imgdata])
if use_imaging_boolean:
imaging_data_likelihood += IndependentLikelihoods([pdf_imgdata])
imaging_data_fluxratio_likelihood += IndependentLikelihoods([pdf_imgdata_fr])
S_statistic = np.append(S_statistic, _S_statistic)

Expand All @@ -237,5 +292,7 @@ def compute_likelihoods(output_class,
np.sum(normalized_joint_weights))
# if n_bootstraps>0 and n_keep is not None:
# print('number of repeated index out of '+str(len(accepted_seeds))+' samples: ', str(len(accepted_seeds) - len(np.unique(accepted_seeds))))
likelihood_joint = imaging_data_fluxratio_likelihood / imaging_data_likelihood
if use_imaging_boolean:
likelihood_joint = imaging_data_fluxratio_likelihood / imaging_data_likelihood
##if using flux ratios, use imaging_data_fluxratio_likelihood variable.
return imaging_data_likelihood, imaging_data_fluxratio_likelihood, likelihood_joint, (params_out, normalized_joint_weights)