diff --git a/scripts/multi_component_hydrogen_bond_propensity/mol2smiles.py b/scripts/multi_component_hydrogen_bond_propensity/mol2smiles.py new file mode 100644 index 0000000..0fccb50 --- /dev/null +++ b/scripts/multi_component_hydrogen_bond_propensity/mol2smiles.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# + +from ccdc.io import MoleculeReader +import os +import csv +import argparse +from utilities import file_list, string_scrubber, read_experimental_csv + + +def read_mol_file(directory, file): + '''Returns: identifier, smiles''' + mol_reader = MoleculeReader(os.path.join(directory, file)) + mol = mol_reader[0] + + return mol.identifier, mol.heaviest_component.smiles + + +def main(): + parser = argparse.ArgumentParser(__doc__) + parser.add_argument( + "--input_dir", + type=str, + required=True, + help="Directory containing API folders." + ) + parser.add_argument( + "--output_filename", + type=str, + required=True, + help="Filename of formatted .csv file containing SMILES." + ) + parser.add_argument( + "--experimental_csv", + type=str, + required=False, + help="Filename of formatted .csv file containing identifier names and experimental bool." + ) + parser.add_argument( + '--clean_id', + action='store_true', + help='Removes special characters from ids that may be problematic.' + ) + + args = parser.parse_args() + + output_path = os.path.join(args.input_dir, args.output_filename) + with open(output_path, 'w', newline='', encoding="utf-8") as output_file: + csvwriter = csv.writer(output_file, delimiter=',', quotechar='|') + csvwriter.writerow(['identifier', 'n_components', + 'component_a', 'component_b', 'neutral_a', 'neutral_b']) + + if args.experimental_csv: + experimental_dict = read_experimental_csv(args.experimental_csv) + + # API group directories contain one or more API files and a directory of coformers + API_groups = [name for name in os.listdir( + args.input_dir) if os.path.isdir(os.path.join(args.input_dir, name))] + + exp_replaced = combo_count = 0 + + with open(output_path, 'a+', newline='', encoding="utf-8") as output_file: + + for API_group in API_groups: + csvwriter = csv.writer(output_file, delimiter=',', quotechar='|') + API_group_path = os.path.join(args.input_dir, API_group) + + for API_file in file_list(API_group_path): + api_id, api_smiles = read_mol_file( + API_group_path, API_file) + print(api_id) + coformer_dir_path = os.path.join(API_group_path, 'coformers') + + for coformer_file in file_list(coformer_dir_path): + coformer_id, coformer_smiles = read_mol_file( + coformer_dir_path, coformer_file) + + combo_count += 1 + exp_bool = "?" + # Try to look up the experimental boolean in dictionary, if provided + if args.experimental_csv: + for (x, y) in [(api_id, coformer_id), (coformer_id, api_id)]: + if (x, y) in list(experimental_dict.keys()): + exp_bool = experimental_dict[(x, y)] + exp_replaced += 1 + + # Clean the ids if the option is turned on + if args.clean_id: + api_id = string_scrubber(api_id) + coformer_id = string_scrubber(coformer_id) + + n_components = 2 + if api_smiles == coformer_smiles: + n_components = 1 + + combo_id = ".".join([api_id, coformer_id, str(exp_bool)]) + csvwriter.writerow([f'"{combo_id}"', n_components, api_smiles, coformer_smiles, "", ""]) + + if args.experimental_csv: + print(f"Found experimental labels for {exp_replaced} out of {combo_count} combinations") + + +if __name__ == '__main__': + main() diff --git a/scripts/november_2023_morphology_webinar/morphology_plot.py b/scripts/november_2023_morphology_webinar/morphology_plot.py index 1e60e9c..3b90621 100644 --- a/scripts/november_2023_morphology_webinar/morphology_plot.py +++ b/scripts/november_2023_morphology_webinar/morphology_plot.py @@ -1,3 +1,11 @@ +#!/usr/bin/env python +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# import matplotlib # matplotlib.use('Agg') import matplotlib.pyplot as plt diff --git a/scripts/particle_rugosity/particle_rugosity.py b/scripts/particle_rugosity/particle_rugosity.py index 2ff1a15..3947cc9 100644 --- a/scripts/particle_rugosity/particle_rugosity.py +++ b/scripts/particle_rugosity/particle_rugosity.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# + # script for calculating the particle rugosity for a given crystal structure (.cif in local working directory, or refcode of a CSD entry) # requires a CCDC license, and loading of the CSD-python miniconda environment if run from Linux command line diff --git a/scripts/refcodes_with_properties/test_entry_property_calculator.py b/scripts/refcodes_with_properties/test_entry_property_calculator.py index c89488e..7d4bd51 100644 --- a/scripts/refcodes_with_properties/test_entry_property_calculator.py +++ b/scripts/refcodes_with_properties/test_entry_property_calculator.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# + import unittest from ccdc.io import EntryReader