From 7a4a7f4ac8e5b9db02df1b483029364cb2f6bee5 Mon Sep 17 00:00:00 2001 From: Seungchan An Date: Wed, 24 Dec 2025 17:49:30 -0500 Subject: [PATCH 1/4] avoid pandas Series FutureWarning --- src/clm/datasets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/clm/datasets.py b/src/clm/datasets.py index 72c1e78..24b7ba4 100644 --- a/src/clm/datasets.py +++ b/src/clm/datasets.py @@ -89,7 +89,10 @@ def __getitem__(self, idx): KeyError ): # some callers might only be interested in the descriptors encoded = None - return encoded, torch.Tensor(pd.to_numeric(row[self.descriptor_names])) + descriptors = torch.Tensor( + pd.to_numeric(row[self.descriptor_names]).to_numpy() + ) + return encoded, descriptors def get_validation(self, n_smiles): selected_indices = np.random.choice(self.validation_set.index, n_smiles) From b024835205b1bcdb5242772e439159dc79a2f46e Mon Sep 17 00:00:00 2001 From: Seungchan An Date: Wed, 24 Dec 2025 17:51:58 -0500 Subject: [PATCH 2/4] avoid SettingWithCopyWarning when shuffling SMILES --- src/clm/commands/create_training_sets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/clm/commands/create_training_sets.py b/src/clm/commands/create_training_sets.py index 7746cdc..e7c4430 100644 --- a/src/clm/commands/create_training_sets.py +++ b/src/clm/commands/create_training_sets.py @@ -189,6 +189,7 @@ def create_training_sets( generate_test_data = folds > 1 if generate_test_data: + smiles = smiles.tolist() np.random.shuffle(smiles) folds = np.array_split(smiles, folds) else: From b7830b17c691cfc749551735d76e957970e33107 Mon Sep 17 00:00:00 2001 From: Seungchan An Date: Wed, 24 Dec 2025 17:52:53 -0500 Subject: [PATCH 3/4] make add_carbon SMILES logging optional --- src/clm/commands/add_carbon.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/clm/commands/add_carbon.py b/src/clm/commands/add_carbon.py index 2593f44..658c8c5 100644 --- a/src/clm/commands/add_carbon.py +++ b/src/clm/commands/add_carbon.py @@ -25,7 +25,7 @@ def add_args(parser): return parser -def add_carbon(input_file, output_file): +def add_carbon(input_file, output_file, verbose=False): # make output directories os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True) @@ -49,11 +49,12 @@ def add_carbon(input_file, output_file): # loop over the input SMILES # output_smiles = list() for sm_idx, input_smiles in enumerate(tqdm(smiles)): - print( - "working on SMILES {} of {}: '{}' ...".format( - sm_idx, len(smiles), input_smiles + if verbose: + print( + "working on SMILES {} of {}: '{}' ...".format( + sm_idx, len(smiles), input_smiles + ) ) - ) """ code adapted from: https://github.com/ml-jku/mgenerators-failure-modes/blob/master/addcarbon.py From 353c00ea2fb93578169c6486df142bf2e6744770 Mon Sep 17 00:00:00 2001 From: Seungchan An Date: Wed, 24 Dec 2025 19:42:43 -0500 Subject: [PATCH 4/4] fix trailing whitespace --- src/clm/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clm/datasets.py b/src/clm/datasets.py index 24b7ba4..d737fcb 100644 --- a/src/clm/datasets.py +++ b/src/clm/datasets.py @@ -92,7 +92,7 @@ def __getitem__(self, idx): descriptors = torch.Tensor( pd.to_numeric(row[self.descriptor_names]).to_numpy() ) - return encoded, descriptors + return encoded, descriptors def get_validation(self, n_smiles): selected_indices = np.random.choice(self.validation_set.index, n_smiles)