-
Notifications
You must be signed in to change notification settings - Fork 40
Allow passing in cr/cl bounds and other settings #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,12 +1,13 @@ | ||
| import os | ||
| import argparse | ||
| import torch | ||
|
|
||
|
|
||
| parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
| parser.add_argument('-d', '--data_set', type=str, default='tic-tac-toe', | ||
| help='Set the data set for training. All the data sets in the dataset folder are available.') | ||
| parser.add_argument('-i', '--device_ids', type=str, default=None, help='Set the device (GPU ids). Split by @.' | ||
| ' E.g., 0@2@3.') | ||
| ' E.g., cuda:0@cuda:2@cuda:3.') | ||
| parser.add_argument('-nr', '--nr', default=0, type=int, help='ranking within the nodes') | ||
| parser.add_argument('-e', '--epoch', type=int, default=41, help='Set the total epoch.') | ||
| parser.add_argument('-bs', '--batch_size', type=int, default=64, help='Set the batch size.') | ||
|
|
@@ -51,7 +52,8 @@ | |
| rrl_args.plot_file = os.path.join(rrl_args.folder_path, 'plot_file.pdf') | ||
| rrl_args.log = os.path.join(rrl_args.folder_path, 'log.txt') | ||
| rrl_args.test_res = os.path.join(rrl_args.folder_path, 'test_res.txt') | ||
| rrl_args.device_ids = list(map(int, rrl_args.device_ids.strip().split('@'))) | ||
| rrl_args.device_ids = list(map(lambda id: torch.device(id), rrl_args.device_ids.strip().split('@'))) \ | ||
| if rrl_args.device_ids else [None] | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: I found that passing in integer device ID would get the tensors pegged to the GPU memory but the GPU compute utilization remains at 0, as shown by Example run passing in integer device ID: Example run passing in |
||
| rrl_args.gpus = len(rrl_args.device_ids) | ||
| rrl_args.nodes = 1 | ||
| rrl_args.world_size = rrl_args.gpus * rrl_args.nodes | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| import os | ||
| import os, json | ||
| import numpy as np | ||
| import torch | ||
| from torch.utils.data.dataset import random_split | ||
|
|
@@ -14,15 +14,37 @@ | |
| DATA_DIR = './dataset' | ||
|
|
||
|
|
||
| def read_settings(settings_path): | ||
| if os.path.exists(settings_path): | ||
| with open(settings_path, 'r') as f: | ||
| settings = json.load(f) | ||
| else: | ||
| settings = { | ||
| 'normalize_continuous': True, | ||
| 'one_hot_encode_features': True, | ||
| 'impute_continuous': True, | ||
| # of shape [continious columns, lower bounds, upper bounds] | ||
| 'bounds': None | ||
| # alternatively, pass in individual bounds | ||
| # lower_bound: [continuous cols] | ||
| # upper_bound: [continuous cols] | ||
| } | ||
| return settings | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: I added this new setting file so that the user can pass in CR/CL bounds as well as controlling normalization and one-hot encoding etc. (those are currently hard-coded) |
||
|
|
||
|
|
||
| def get_data_loader(dataset, world_size, rank, batch_size, k=0, pin_memory=False, save_best=True): | ||
| data_path = os.path.join(DATA_DIR, dataset + '.data') | ||
| info_path = os.path.join(DATA_DIR, dataset + '.info') | ||
| settings_path = os.path.join(DATA_DIR, dataset + '.settings.json') | ||
| X_df, y_df, f_df, label_pos = read_csv(data_path, info_path, shuffle=True) | ||
|
|
||
| db_enc = DBEncoder(f_df, discrete=False) | ||
| settings = read_settings(settings_path) | ||
| db_enc = DBEncoder(f_df, discrete=False, | ||
| one_hot_encode_features=settings['one_hot_encode_features'], | ||
| impute_continuous=settings['impute_continuous']) | ||
| db_enc.fit(X_df, y_df) | ||
|
|
||
| X, y = db_enc.transform(X_df, y_df, normalized=True, keep_stat=True) | ||
| X, y = db_enc.transform(X_df, y_df, normalized=settings['normalize_continuous'], keep_stat=True) | ||
|
|
||
| kf = KFold(n_splits=5, shuffle=True, random_state=0) | ||
| train_index, test_index = list(kf.split(X_df))[k] | ||
|
|
@@ -45,15 +67,21 @@ def get_data_loader(dataset, world_size, rank, batch_size, k=0, pin_memory=False | |
| valid_loader = DataLoader(valid_sub, batch_size=batch_size, shuffle=False, pin_memory=pin_memory) | ||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=pin_memory) | ||
|
|
||
| return db_enc, train_loader, valid_loader, test_loader | ||
| if settings['bounds'] is not None and 'lower_bounds' not in settings: | ||
| bounds = settings['bounds'] | ||
| settings['lower_bounds'] = np.array([bounds[col][0] for col in db_enc.X_fname[db_enc.discrete_flen:]]) | ||
| settings['upper_bounds'] = np.array([bounds[col][1] for col in db_enc.X_fname[db_enc.discrete_flen:]]) | ||
| return db_enc, train_loader, valid_loader, test_loader, settings | ||
|
|
||
|
|
||
| def train_model(gpu, args): | ||
| def train_model(gpu, args, distributed=True): | ||
| rank = args.nr * args.gpus + gpu | ||
| dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) | ||
| if distributed: | ||
| dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) | ||
| torch.manual_seed(42) | ||
| device_id = args.device_ids[gpu] | ||
| torch.cuda.set_device(device_id) | ||
| if device_id and device_id.type == 'cuda': | ||
| torch.cuda.set_device(device_id) | ||
|
|
||
| if gpu == 0: | ||
| writer = SummaryWriter(args.folder_path) | ||
|
|
@@ -63,8 +91,9 @@ def train_model(gpu, args): | |
| is_rank0 = False | ||
|
|
||
| dataset = args.data_set | ||
| db_enc, train_loader, valid_loader, _ = get_data_loader(dataset, args.world_size, rank, args.batch_size, | ||
| k=args.ith_kfold, pin_memory=True, save_best=args.save_best) | ||
| db_enc, train_loader, valid_loader, _, settings = get_data_loader(dataset, args.world_size, rank, args.batch_size, | ||
| k=args.ith_kfold, pin_memory=True, | ||
| save_best=args.save_best) | ||
|
|
||
| X_fname = db_enc.X_fname | ||
| y_fname = db_enc.y_fname | ||
|
|
@@ -74,11 +103,14 @@ def train_model(gpu, args): | |
| rrl = RRL(dim_list=[(discrete_flen, continuous_flen)] + list(map(int, args.structure.split('@'))) + [len(y_fname)], | ||
| device_id=device_id, | ||
| use_not=args.use_not, | ||
| cl=settings.get('lower_bounds', None), | ||
| cr=settings.get('upper_bounds', None), | ||
| is_rank0=is_rank0, | ||
| log_file=args.log, | ||
| writer=writer, | ||
| save_best=args.save_best, | ||
| estimated_grad=args.estimated_grad, | ||
| distributed=distributed, | ||
| save_path=args.model) | ||
|
|
||
| rrl.train_model( | ||
|
|
@@ -106,16 +138,18 @@ def load_model(path, device_id, log_file=None, distributed=True): | |
| stat_dict = checkpoint['model_state_dict'] | ||
| for key in list(stat_dict.keys()): | ||
| # remove 'module.' prefix | ||
| stat_dict[key[7:]] = stat_dict.pop(key) | ||
| if key.startswith('module.'): | ||
| stat_dict[key[7:]] = stat_dict.pop(key) | ||
| rrl.net.load_state_dict(checkpoint['model_state_dict']) | ||
| return rrl | ||
|
|
||
|
|
||
| def test_model(args): | ||
| rrl = load_model(args.model, args.device_ids[0], log_file=args.test_res, distributed=False) | ||
| dataset = args.data_set | ||
| db_enc, train_loader, _, test_loader = get_data_loader(dataset, 4, 0, args.batch_size, args.ith_kfold, save_best=False) | ||
| rrl.test(test_loader=test_loader, set_name='Test') | ||
| db_enc, train_loader, _, test_loader, _ = get_data_loader(dataset, 4, 0, args.batch_size, args.ith_kfold, | ||
| save_best=False) | ||
| rrl.test(test_loader=test_loader, set_name='Test', labels=db_enc.y_fname) | ||
| with open(args.rrl_file, 'w') as rrl_file: | ||
| rrl.rule_print(db_enc.X_fname, db_enc.y_fname, train_loader, file=rrl_file, mean=db_enc.mean, std=db_enc.std) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,7 @@ def backward(ctx, grad_output): | |
| class BinarizeLayer(nn.Module): | ||
| """Implement the feature discretization and binarization.""" | ||
|
|
||
| def __init__(self, n, input_dim, use_not=False, left=None, right=None): | ||
| def __init__(self, n, input_dim, use_not=False, cl=None, cr=None, left=None, right=None): | ||
| super(BinarizeLayer, self).__init__() | ||
| self.n = n | ||
| self.input_dim = input_dim | ||
|
|
@@ -39,12 +39,17 @@ def __init__(self, n, input_dim, use_not=False, left=None, right=None): | |
| self.register_buffer('right', right) | ||
|
|
||
| if self.input_dim[1] > 0: | ||
| if self.left is not None and self.right is not None: | ||
| if cl is not None and cr is not None: # bounds are specified | ||
| cl = torch.tensor(cl).type(torch.float).t() | ||
| cr = torch.tensor(cr).type(torch.float).t() | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: here we can pass in the cl/cr bounds directly. |
||
| elif self.left is not None and self.right is not None: | ||
| cl = self.left + torch.rand(self.n, self.input_dim[1]) * (self.right - self.left) | ||
| cr = self.left + torch.rand(self.n, self.input_dim[1]) * (self.right - self.left) | ||
| else: | ||
| cl = 3. * (2. * torch.rand(self.n, self.input_dim[1]) - 1.) | ||
| cr = 3. * (2. * torch.rand(self.n, self.input_dim[1]) - 1.) | ||
| assert torch.Size([self.n, self.input_dim[1]]) == cl.size() | ||
| assert torch.Size([self.n, self.input_dim[1]]) == cr.size() | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: and verify the shapes are correct. |
||
| self.register_buffer('cl', cl) | ||
| self.register_buffer('cr', cr) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,7 +14,7 @@ | |
|
|
||
|
|
||
| class MLLP(nn.Module): | ||
| def __init__(self, dim_list, use_not=False, left=None, right=None, estimated_grad=False): | ||
| def __init__(self, dim_list, use_not=False, cl=None, cr=None, left=None, right=None, estimated_grad=False): | ||
| super(MLLP, self).__init__() | ||
|
|
||
| self.dim_list = dim_list | ||
|
|
@@ -30,7 +30,7 @@ def __init__(self, dim_list, use_not=False, left=None, right=None, estimated_gra | |
| num += self.layer_list[-2].output_dim | ||
|
|
||
| if i == 1: | ||
| layer = BinarizeLayer(dim_list[i], num, self.use_not, self.left, self.right) | ||
| layer = BinarizeLayer(dim_list[i], num, self.use_not, cl=cl, cr=cr, left=self.left, right=self.right) | ||
| layer_name = 'binary{}'.format(i) | ||
| elif i == len(dim_list) - 1: | ||
| layer = LRLayer(dim_list[i], num) | ||
|
|
@@ -77,7 +77,7 @@ def layer_list(self): | |
|
|
||
| class RRL: | ||
| def __init__(self, dim_list, device_id, use_not=False, is_rank0=False, log_file=None, writer=None, left=None, | ||
| right=None, save_best=False, estimated_grad=False, save_path=None, distributed=True): | ||
| right=None, cl=None, cr=None, save_best=False, estimated_grad=False, save_path=None, distributed=True): | ||
| super(RRL, self).__init__() | ||
| self.dim_list = dim_list | ||
| self.use_not = use_not | ||
|
|
@@ -99,9 +99,11 @@ def __init__(self, dim_list, device_id, use_not=False, is_rank0=False, log_file= | |
| logging.basicConfig(level=logging.DEBUG, filename=log_file, filemode='w', format=log_format) | ||
| self.writer = writer | ||
|
|
||
| self.net = MLLP(dim_list, use_not=use_not, left=left, right=right, estimated_grad=estimated_grad) | ||
| self.net = MLLP(dim_list, use_not=use_not, cl=cl, cr=cr, left=left, right=right, | ||
| estimated_grad=estimated_grad) | ||
|
|
||
| self.net.cuda(self.device_id) | ||
| if self.device_id and self.device_id.type == 'cuda': | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: the condition allows the program to run in CPU mode as well. |
||
| self.net.cuda(self.device_id) | ||
| if distributed: | ||
| self.net = MyDistributedDataParallel(self.net, device_ids=[self.device_id]) | ||
|
|
||
|
|
@@ -161,8 +163,9 @@ def train_model(self, X=None, y=None, X_validation=None, y_validation=None, data | |
| ba_cnt = 0 | ||
| for X, y in data_loader: | ||
| ba_cnt += 1 | ||
| X = X.cuda(self.device_id, non_blocking=True) | ||
| y = y.cuda(self.device_id, non_blocking=True) | ||
| if self.device_id and self.device_id.type == 'cuda': | ||
| X = X.cuda(self.device_id, non_blocking=True) | ||
| y = y.cuda(self.device_id, non_blocking=True) | ||
| optimizer.zero_grad() # Zero the gradient buffers. | ||
| y_pred_mllp, y_pred_rrl = self.net.forward(X) | ||
| with torch.no_grad(): | ||
|
|
@@ -231,7 +234,8 @@ def train_model(self, X=None, y=None, X_validation=None, y_validation=None, data | |
| self.save_model() | ||
| return epoch_histc | ||
|
|
||
| def test(self, X=None, y=None, test_loader=None, batch_size=32, need_transform=True, set_name='Validation'): | ||
| def test(self, X=None, y=None, labels=None, test_loader=None, | ||
| batch_size=32, need_transform=True, set_name='Validation'): | ||
| if X is not None and y is not None and need_transform: | ||
| X, y = self.data_transform(X, y) | ||
| with torch.no_grad(): | ||
|
|
@@ -251,7 +255,8 @@ def test(self, X=None, y=None, test_loader=None, batch_size=32, need_transform=T | |
| y_pred_list = [] | ||
| y_pred_b_list = [] | ||
| for X, y in test_loader: | ||
| X = X.cuda(self.device_id, non_blocking=True) | ||
| if self.device_id and self.device_id.type == 'cuda': | ||
| X = X.cuda(self.device_id, non_blocking=True) | ||
| output = self.net.forward(X) | ||
| y_pred_list.append(output[0]) | ||
| y_pred_b_list.append(output[1]) | ||
|
|
@@ -275,7 +280,8 @@ def test(self, X=None, y=None, test_loader=None, batch_size=32, need_transform=T | |
| logging.info('On {} Set:\n\tAccuracy of RRL Model: {}' | ||
| '\n\tF1 Score of RRL Model: {}'.format(set_name, accuracy_b, f1_score_b)) | ||
| logging.info('On {} Set:\nPerformance of RRL Model: \n{}\n{}'.format( | ||
| set_name, metrics.confusion_matrix(y_true, y_pred_b_arg), metrics.classification_report(y_true, y_pred_b_arg))) | ||
| set_name, metrics.confusion_matrix(y_true, y_pred_b_arg), | ||
| metrics.classification_report(y_true, y_pred_b_arg, target_names=labels))) | ||
| logging.info('-' * 60) | ||
| return accuracy, accuracy_b, f1_score, f1_score_b | ||
|
|
||
|
|
@@ -289,7 +295,8 @@ def detect_dead_node(self, data_loader=None): | |
| layer.node_activation_cnt = torch.zeros(layer.output_dim, dtype=torch.double, device=self.device_id) | ||
| layer.forward_tot = 0 | ||
| for x, y in data_loader: | ||
| x = x.cuda(self.device_id) | ||
| if self.device_id and self.device_id.type == 'cuda': | ||
| x = x.cuda(self.device_id) | ||
| x_res = None | ||
| for i, layer in enumerate(self.net.layer_list[:-1]): | ||
| if i <= 1: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
| from scipy.sparse import issparse | ||
| from sklearn import preprocessing | ||
| from sklearn.impute import SimpleImputer | ||
|
|
||
|
|
@@ -29,13 +30,14 @@ def read_csv(data_path, info_path, shuffle=False): | |
| class DBEncoder: | ||
| """Encoder used for data discretization and binarization.""" | ||
|
|
||
| def __init__(self, f_df, discrete=False, y_one_hot=True, drop='first'): | ||
| def __init__(self, f_df, discrete=False, y_one_hot=True, drop='first', | ||
| impute_continuous=True, one_hot_encode_features=True): | ||
| self.f_df = f_df | ||
| self.discrete = discrete | ||
| self.y_one_hot = y_one_hot | ||
| self.label_enc = preprocessing.OneHotEncoder(categories='auto') if y_one_hot else preprocessing.LabelEncoder() | ||
| self.feature_enc = preprocessing.OneHotEncoder(categories='auto', drop=drop) | ||
| self.imp = SimpleImputer(missing_values=np.nan, strategy='mean') | ||
| self.feature_enc = preprocessing.OneHotEncoder(categories='auto', drop=drop) if one_hot_encode_features else None | ||
| self.imp = SimpleImputer(missing_values=np.nan, strategy='mean') if impute_continuous else None | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: for dataset not requiring or already have one-hot encoding or imputation, they can now be skipped. |
||
| self.X_fname = None | ||
| self.y_fname = None | ||
| self.discrete_flen = None | ||
|
|
@@ -59,16 +61,18 @@ def fit(self, X_df, y_df): | |
| self.y_fname = list(self.label_enc.get_feature_names(y_df.columns)) if self.y_one_hot else y_df.columns | ||
|
|
||
| if not continuous_data.empty: | ||
| # Use mean as missing value for continuous columns if do not discretize them. | ||
| self.imp.fit(continuous_data.values) | ||
| if self.imp is not None: | ||
| # Use mean as missing value for continuous columns if do not discretize them. | ||
| self.imp.fit(continuous_data.values) | ||
| if not discrete_data.empty: | ||
| # One-hot encoding | ||
| self.feature_enc.fit(discrete_data) | ||
| feature_names = discrete_data.columns | ||
| self.X_fname = list(self.feature_enc.get_feature_names(feature_names)) | ||
| self.X_fname = discrete_data.columns.to_list() | ||
| if self.feature_enc is not None: | ||
| # One-hot encoding | ||
| self.feature_enc.fit(discrete_data) | ||
| self.X_fname = list(self.feature_enc.get_feature_names(self.X_fname)) | ||
| self.discrete_flen = len(self.X_fname) | ||
| if not self.discrete: | ||
| self.X_fname.extend(continuous_data.columns) | ||
| self.X_fname.extend(continuous_data.columns.to_list()) | ||
| else: | ||
| self.X_fname = continuous_data.columns | ||
| self.discrete_flen = 0 | ||
|
|
@@ -84,21 +88,25 @@ def transform(self, X_df, y_df, normalized=False, keep_stat=False): | |
| y = y.toarray() | ||
|
|
||
| if not continuous_data.empty: | ||
| # Use mean as missing value for continuous columns if we do not discretize them. | ||
| continuous_data = pd.DataFrame(self.imp.transform(continuous_data.values), | ||
| columns=continuous_data.columns) | ||
| if self.imp is not None: | ||
| # Use mean as missing value for continuous columns if we do not discretize them. | ||
| continuous_data = pd.DataFrame(self.imp.transform(continuous_data.values), | ||
| columns=continuous_data.columns) | ||
| if normalized: | ||
| if keep_stat: | ||
| self.mean = continuous_data.mean() | ||
| self.std = continuous_data.std() | ||
| continuous_data = (continuous_data - self.mean) / self.std | ||
| if not discrete_data.empty: | ||
| # One-hot encoding | ||
| discrete_data = self.feature_enc.transform(discrete_data) | ||
| if self.feature_enc is not None: | ||
| # One-hot encoding | ||
| discrete_data = self.feature_enc.transform(discrete_data) | ||
| if issparse(discrete_data): | ||
| discrete_data = discrete_data.toarray() | ||
| if not self.discrete: | ||
| X_df = pd.concat([pd.DataFrame(discrete_data.toarray()), continuous_data], axis=1) | ||
| X_df = pd.concat([pd.DataFrame(discrete_data), continuous_data], axis=1) | ||
| else: | ||
| X_df = pd.DataFrame(discrete_data.toarray()) | ||
| X_df = pd.DataFrame(discrete_data) | ||
| else: | ||
| X_df = continuous_data | ||
| return X_df.values, y | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see review comment on
args.pychanges