diff --git a/src/clm/commands/add_carbon.py b/src/clm/commands/add_carbon.py index 2593f44..658c8c5 100644 --- a/src/clm/commands/add_carbon.py +++ b/src/clm/commands/add_carbon.py @@ -25,7 +25,7 @@ def add_args(parser): return parser -def add_carbon(input_file, output_file): +def add_carbon(input_file, output_file, verbose=False): # make output directories os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True) @@ -49,11 +49,12 @@ def add_carbon(input_file, output_file): # loop over the input SMILES # output_smiles = list() for sm_idx, input_smiles in enumerate(tqdm(smiles)): - print( - "working on SMILES {} of {}: '{}' ...".format( - sm_idx, len(smiles), input_smiles + if verbose: + print( + "working on SMILES {} of {}: '{}' ...".format( + sm_idx, len(smiles), input_smiles + ) ) - ) """ code adapted from: https://github.com/ml-jku/mgenerators-failure-modes/blob/master/addcarbon.py diff --git a/src/clm/commands/create_training_sets.py b/src/clm/commands/create_training_sets.py index 7746cdc..e7c4430 100644 --- a/src/clm/commands/create_training_sets.py +++ b/src/clm/commands/create_training_sets.py @@ -189,6 +189,7 @@ def create_training_sets( generate_test_data = folds > 1 if generate_test_data: + smiles = smiles.tolist() np.random.shuffle(smiles) folds = np.array_split(smiles, folds) else: diff --git a/src/clm/datasets.py b/src/clm/datasets.py index 72c1e78..d737fcb 100644 --- a/src/clm/datasets.py +++ b/src/clm/datasets.py @@ -89,7 +89,10 @@ def __getitem__(self, idx): KeyError ): # some callers might only be interested in the descriptors encoded = None - return encoded, torch.Tensor(pd.to_numeric(row[self.descriptor_names])) + descriptors = torch.Tensor( + pd.to_numeric(row[self.descriptor_names]).to_numpy() + ) + return encoded, descriptors def get_validation(self, n_smiles): selected_indices = np.random.choice(self.validation_set.index, n_smiles)