diff --git a/.gitignore b/.gitignore index 5087446..58d3e30 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ .idea/ .ipynb .ipynb* -*.sh saved_models/ configs/ .idea/ diff --git a/data_handlers/gaussians.py b/data_handlers/gaussians.py index bb4f46b..371b5a6 100644 --- a/data_handlers/gaussians.py +++ b/data_handlers/gaussians.py @@ -27,10 +27,10 @@ def __init__(self, data, vars, cov_mat): self.cov_matrix = cov_mat self.original_scale = 1.0 - def __init__(self, n_samples, n_dims=80, true_mutual_info=None, mean=None, std=None, **kwargs): + def __init__(self, n_samples, n_dims=80, true_mutual_info=None, mean=None, std=None, mean_denom=None, base_mi=None, **kwargs): - if (mean is not None) or (std is not None): - assert (mean is not None) and (std is not None) + if (mean is not None) or (std is not None) or (base_mi is not None) or (mean_denom is not None): + assert (mean is not None) and (std is not None) and (base_mi is not None) and (mean_denom is not None) assert true_mutual_info is None, "Can't specify mean/std AND true_mutual_info" else: assert true_mutual_info is not None, "Must specify MI if mean+std are unspecified" @@ -43,9 +43,11 @@ def __init__(self, n_samples, n_dims=80, true_mutual_info=None, mean=None, std=N self.rho = self.get_rho_from_mi(true_mutual_info, n_dims) # correlation coefficient self.cov_matrix = block_diag(*[[[1, self.rho], [self.rho, 1]] for _ in range(n_dims // 2)]) else: - self.cov_matrix = np.diag(self.variances) + self.rho = self.get_rho_from_mi(base_mi, n_dims) # correlation coefficient + self.cov_matrix = block_diag(*[[[1, self.rho], [self.rho, 1]] for _ in range(n_dims // 2)]) self.denom_cov_matrix = np.diag(self.variances) + self.denom_mean = mean_denom trn, val, tst = self.sample_data(n_samples), self.sample_data(n_samples), self.sample_data(n_samples) @@ -77,7 +79,7 @@ def numerator_log_prob(self, u): return log_probs def denominator_log_prob(self, u): - prod_of_marginals = multivariate_normal(mean=np.zeros(self.n_dims), cov=self.denom_cov_matrix) + prod_of_marginals = multivariate_normal(mean=(np.zeros(self.n_dims)+self.denom_mean), cov=self.denom_cov_matrix) return prod_of_marginals.logpdf(u) def empirical_mutual_info(self, samples=None): @@ -173,11 +175,15 @@ def show_pixel_histograms(self, split, pixel=None): def main(): - n, d = 100000, 80 - true_mi = 40 - - dataset = GAUSSIANS(n_samples=n, n_dims=d, true_mutual_info=true_mi) - print("True MI is {}, empirical MI is: {}".format(dataset.true_mutual_info, dataset.empirical_mutual_info())) + n, d = 100000, 40 + # true_mi = 40 + base_mi = 20 + mean, std = -1.0, 1.0 + mean_denom = 1 + + # dataset = GAUSSIANS(n_samples=n, n_dims=d, true_mutual_info=true_mi) + dataset = GAUSSIANS(n_samples=n, n_dims=d, true_mutual_info=None, mean=mean, std=std, mean_denom=mean_denom, base_mi=base_mi) + print("Empirical MI is: {}".format(dataset.empirical_mutual_info())) return dataset diff --git a/density_estimators/flows.py b/density_estimators/flows.py index 51ed0e7..1e3fa47 100644 --- a/density_estimators/flows.py +++ b/density_estimators/flows.py @@ -2,7 +2,7 @@ from density_estimators.mades import MogMade, residual_mog_made_template, residual_made_template from density_estimators.gauss_copula import GaussianCopulaFromSplines -from keras_layers import KerasWeightMatrix +import numpy as KerasWeightMatrix from utils.tf_utils import * tfb = tfp.bijectors diff --git a/make_configs.py b/make_configs.py index 6c299cc..4d3bf06 100644 --- a/make_configs.py +++ b/make_configs.py @@ -195,55 +195,11 @@ def make_1d_gauss_configs(): generate_configs_for_gridsearch(config, "model", p1, p2) -# -# def make_gaussians_configs(): -# config = make_base_config() -# config["data"]["dataset_name"] = "gaussians" -# config["data"]["data_dist_name"] = "gaussian" -# config["data"]["noise_dist_name"] = "gaussian" -# -# config["optimisation"]["n_epochs"] = 250 -# config["optimisation"]["n_batch"] = 512 -# config["optimisation"]["patience"] = 50 -# config["optimisation"]["save_every_x_epochs"] = 10 -# -# # config["architecture"]["network_type"] = "mlp" -# config["architecture"]["network_type"] = "quadratic" -# config["architecture"]["quadratic_constraint_type"] = "symmetric_pos_diag" -# config["architecture"]["quadratic_head_use_linear_term"] = True -# -# config["ais"]["ais_n_chains"] = 1000 -# config["ais"]["ais_total_n_steps"] = 1000 -# -# data_args1 = {"n_samples": 100000, "n_dims": 40, "mean": -1.0, "std": 1.0} -# data_args2 = {"n_samples": 100000, "n_dims": 160, "mean": -0.5, "std": 1.0} -# data_args3 = {"n_samples": 100000, "n_dims": 320, "mean": -0.5, "std": 1.0} -# -# p1 = [["data", "data", "data", "data", "data", "data", "optimisation"], -# ["linear_combo_alphas", "initial_waymark_indices", "n_dims", -# "data_args", "noise_dist_gaussian_loc", "noise_dist_gaussian_stds", "energy_lr"], -# [ -# [*get_poly_wmark_coefs(num=9, p=1.0), data_args1["n_dims"], data_args1, 1.0, 1.0, 1e-4], -# [*get_poly_wmark_coefs(num=2, p=1.0), data_args1["n_dims"], data_args1, 1.0, 1.0, 5e-4], -# -# [*get_poly_wmark_coefs(num=13, p=1.0), data_args2["n_dims"], data_args2, 0.6, 1.0, 1e-4], -# [*get_poly_wmark_coefs(num=2, p=1.0), data_args2["n_dims"], data_args2, 0.6, 1.0, 5e-4], -# -# [*get_poly_wmark_coefs(num=17, p=1.0), data_args3["n_dims"], data_args3, 0.5, 1.0, 1e-4], -# [*get_poly_wmark_coefs(num=2, p=1.0), data_args3["n_dims"], data_args3, 0.5, 1.0, 5e-4], -# ] -# ] -# -# generate_configs_for_gridsearch(config, "model", p1) - def make_gaussians_configs(): config = make_base_config() config["data"]["dataset_name"] = "gaussians" - config["data"]["n_dims"] = 80 - config["data"]["data_args"] = {"n_samples": 100000, "dims": config["data"]["n_dims"], "true_mutual_info": 20} config["data"]["data_dist_name"] = "gaussian" - config["data"]["noise_dist_name"] = "gaussian" config["optimisation"]["n_epochs"] = 250 @@ -259,31 +215,75 @@ def make_gaussians_configs(): config["ais"]["ais_n_chains"] = 1000 config["ais"]["ais_total_n_steps"] = 1000 - data_args1 = {"n_samples": 100000, "n_dims": 40, "true_mutual_info": 10} - data_args2 = {"n_samples": 100000, "n_dims": 80, "true_mutual_info": 20} - data_args3 = {"n_samples": 100000, "n_dims": 160, "true_mutual_info": 40} - data_args4 = {"n_samples": 100000, "n_dims": 320, "true_mutual_info": 80} + data_args1 = {"n_samples": 100000, "n_dims": 40, "base_mi": 20, "mean": -1.0, "std": 1.0, "mean_denom": 1.0} + data_args2 = {"n_samples": 100000, "n_dims": 160, "base_mi": 40, "mean": -0.5, "std": 1.0, "mean_denom": 0.6} + data_args3 = {"n_samples": 100000, "n_dims": 320, "base_mi": 80, "mean": -0.5, "std": 1.0, "mean_denom": 0.5} - p1 = [["data", "data", "data", "data", "optimisation"], - ["linear_combo_alphas", "initial_waymark_indices", "n_dims", "data_args", "energy_lr"], + p1 = [["data", "data", "data", "data", "data", "data", "optimisation"], + ["linear_combo_alphas", "initial_waymark_indices", "n_dims", + "data_args", "noise_dist_gaussian_loc", "noise_dist_gaussian_stds", "energy_lr"], [ - [*get_poly_wmark_coefs(num=3, p=1.0), data_args1["n_dims"], data_args1, 1e-4], - [*get_poly_wmark_coefs(num=2, p=1.0), data_args1["n_dims"], data_args1, 5e-4], - - [*get_poly_wmark_coefs(num=5, p=1.0), data_args2["n_dims"], data_args2, 1e-4], - [*get_poly_wmark_coefs(num=2, p=1.0), data_args2["n_dims"], data_args2, 5e-4], + [*get_poly_wmark_coefs(num=9, p=1.0), data_args1["n_dims"], data_args1, 1.0, 1.0, 1e-4], + [*get_poly_wmark_coefs(num=2, p=1.0), data_args1["n_dims"], data_args1, 1.0, 1.0, 5e-4], - [*get_poly_wmark_coefs(num=7, p=1.0), data_args3["n_dims"], data_args3, 1e-4], - [*get_poly_wmark_coefs(num=2, p=1.0), data_args3["n_dims"], data_args3, 5e-4], + [*get_poly_wmark_coefs(num=13, p=1.0), data_args2["n_dims"], data_args2, 0.6, 1.0, 1e-4], + [*get_poly_wmark_coefs(num=2, p=1.0), data_args2["n_dims"], data_args2, 0.6, 1.0, 5e-4], - [*get_poly_wmark_coefs(num=9, p=1.0), data_args4["n_dims"], data_args4, 1e-4], - [*get_poly_wmark_coefs(num=2, p=1.0), data_args4["n_dims"], data_args4, 5e-4], + [*get_poly_wmark_coefs(num=17, p=1.0), data_args3["n_dims"], data_args3, 0.5, 1.0, 1e-4], + [*get_poly_wmark_coefs(num=2, p=1.0), data_args3["n_dims"], data_args3, 0.5, 1.0, 5e-4], ] ] generate_configs_for_gridsearch(config, "model", p1) +# def make_gaussians_configs(): +# config = make_base_config() +# config["data"]["dataset_name"] = "gaussians" +# config["data"]["n_dims"] = 80 +# config["data"]["data_args"] = {"n_samples": 100000, "dims": config["data"]["n_dims"], "true_mutual_info": 20} +# config["data"]["data_dist_name"] = "gaussian" + +# config["data"]["noise_dist_name"] = "gaussian" + +# config["optimisation"]["n_epochs"] = 250 +# config["optimisation"]["n_batch"] = 512 +# config["optimisation"]["patience"] = 50 +# config["optimisation"]["save_every_x_epochs"] = 10 + +# # config["architecture"]["network_type"] = "mlp" +# config["architecture"]["network_type"] = "quadratic" +# config["architecture"]["quadratic_constraint_type"] = "symmetric_pos_diag" +# config["architecture"]["quadratic_head_use_linear_term"] = True + +# config["ais"]["ais_n_chains"] = 1000 +# config["ais"]["ais_total_n_steps"] = 1000 + +# data_args1 = {"n_samples": 100000, "n_dims": 40, "true_mutual_info": 10} +# data_args2 = {"n_samples": 100000, "n_dims": 80, "true_mutual_info": 20} +# data_args3 = {"n_samples": 100000, "n_dims": 160, "true_mutual_info": 40} +# data_args4 = {"n_samples": 100000, "n_dims": 320, "true_mutual_info": 80} + +# p1 = [["data", "data", "data", "data", "optimisation"], +# ["linear_combo_alphas", "initial_waymark_indices", "n_dims", "data_args", "energy_lr"], +# [ +# [*get_poly_wmark_coefs(num=3, p=1.0), data_args1["n_dims"], data_args1, 1e-4], +# [*get_poly_wmark_coefs(num=2, p=1.0), data_args1["n_dims"], data_args1, 5e-4], + +# [*get_poly_wmark_coefs(num=5, p=1.0), data_args2["n_dims"], data_args2, 1e-4], +# [*get_poly_wmark_coefs(num=2, p=1.0), data_args2["n_dims"], data_args2, 5e-4], + +# [*get_poly_wmark_coefs(num=7, p=1.0), data_args3["n_dims"], data_args3, 1e-4], +# [*get_poly_wmark_coefs(num=2, p=1.0), data_args3["n_dims"], data_args3, 5e-4], + +# [*get_poly_wmark_coefs(num=9, p=1.0), data_args4["n_dims"], data_args4, 1e-4], +# [*get_poly_wmark_coefs(num=2, p=1.0), data_args4["n_dims"], data_args4, 5e-4], +# ] +# ] + +# generate_configs_for_gridsearch(config, "model", p1) + + def make_mnist_configs(): config = make_base_config() config["data"]["dataset_name"] = "mnist" diff --git a/run_gaussians_single_ratio.sh b/run_gaussians_single_ratio.sh new file mode 100755 index 0000000..80eb9d4 --- /dev/null +++ b/run_gaussians_single_ratio.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +for c in 1 3 5 6 7 8 +do + for i in 1 2 + do + CUDA_VISIBLE_DEVICES=0 python build_bridges.py --config_path=gaussians/model/${c}_${i} > ${c}_run${i}.txt 2>&1 + done +done diff --git a/run_gaussians_tre.sh b/run_gaussians_tre.sh new file mode 100755 index 0000000..4b327a5 --- /dev/null +++ b/run_gaussians_tre.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +for c in 0 2 4 +do + for i in 1 2 + do + CUDA_VISIBLE_DEVICES=1 python build_bridges.py --config_path=gaussians/model/${c}_${i} > ${c}_run${i}.txt 2>&1 + done +done \ No newline at end of file diff --git a/utils/tf_utils.py b/utils/tf_utils.py index f51addc..6da1967 100644 --- a/utils/tf_utils.py +++ b/utils/tf_utils.py @@ -5,7 +5,7 @@ import matplotlib.pyplot as plt import tensorflow as tf -from keras_layers import GatuOrTanh +# from keras_layers import GatuOrTanh from tensorflow.keras import layers as k_layers from tensorflow.keras import initializers from __init__ import project_root @@ -94,8 +94,8 @@ def get_tf_activation(act_name): if act_name == "leaky_relu": activation = k_layers.LeakyReLU - elif act_name == "gatu": - activation = GatuOrTanh # gatu for 4D inputs, otherwise Relu + # elif act_name == "gatu": + # activation = GatuOrTanh # gatu for 4D inputs, otherwise Relu else: activation = lambda: k_layers.Activation(act_name)