From 24a8d870afa3802f68831d596d351fca0d5beb0b Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 22 Mar 2023 13:55:19 -0700 Subject: [PATCH 1/2] Gutting analysis, which will be moved to RNMC --- .gitignore | 3 +- HiPRGen/mc_analysis.py | 1262 ---------------------------------------- test.py | 364 +----------- 3 files changed, 6 insertions(+), 1623 deletions(-) delete mode 100644 HiPRGen/mc_analysis.py diff --git a/.gitignore b/.gitignore index 118ecb1..2c86349 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .ipynb_checkpoints/ -scratch \ No newline at end of file +scratch +HiPRGen.egg-info \ No newline at end of file diff --git a/HiPRGen/mc_analysis.py b/HiPRGen/mc_analysis.py deleted file mode 100644 index e6254c7..0000000 --- a/HiPRGen/mc_analysis.py +++ /dev/null @@ -1,1262 +0,0 @@ -from HiPRGen.report_generator import ReportGenerator -from HiPRGen.network_renderer import Renderer -from HiPRGen.network_loader import NetworkLoader -from HiPRGen.constants import ROOM_TEMP, KB -from HiPRGen.reaction_questions import marcus_barrier -from monty.serialization import dumpfn -import math -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.colors as mcolors -from itertools import chain -from multiprocessing import Pool - -def default_cost(free_energy): - return math.exp(min(10.0, free_energy) / (ROOM_TEMP * KB)) + 1 - - -def compute_starting_angle(l, step): - if l % 2 == 1: - return - int(l / 2) * step - else: - return - ((l / 2) * step) + 0.05 - - -def render_species(network_loader, path): - renderer = Renderer() - - starting_species_count = 0 - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - starting_species_count += 1 - - - left_angle_counter_step = 0.1 - left_angle_counter = math.pi + compute_starting_angle( - starting_species_count, - left_angle_counter_step - ) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - - renderer.new_node_boundary(species_id, left_angle_counter) - left_angle_counter += left_angle_counter_step - - else: - renderer.new_node(species_id) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - renderer.draw_node(species_id, radius=0.008) - else: - renderer.draw_node(species_id, radius=0.002) - - renderer.render(path) - - - - -def render_reactions_which_fired(network_loader, colors, path): - renderer = Renderer() - reactions_which_fired = set() - for seed in network_loader.trajectories: - for step in network_loader.trajectories[seed]: - reaction_id = network_loader.trajectories[seed][step][0] - reaction = network_loader.index_to_reaction(reaction_id) - reactions_which_fired.add(reaction_id) - - starting_species_count = 0 - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - starting_species_count += 1 - - - left_angle_counter_step = 0.1 - left_angle_counter = math.pi + compute_starting_angle( - starting_species_count, - left_angle_counter_step - ) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - - renderer.new_node_boundary(species_id, left_angle_counter) - left_angle_counter += left_angle_counter_step - - else: - renderer.new_node(species_id) - - for reaction_id in reactions_which_fired: - print(reaction_id) - reaction = network_loader.index_to_reaction(reaction_id) - for i in range(reaction['number_of_reactants']): - for j in range(reaction['number_of_products']): - reactant_id = reaction['reactants'][i] - product_id = reaction['products'][j] - dG = reaction['dG'] - renderer.draw_edge(reactant_id, product_id) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - renderer.draw_node(species_id, radius=0.008) - elif species_id in colors: - renderer.draw_node_square(species_id, color=colors[species_id], side=0.013) - else: - renderer.draw_node(species_id) - - renderer.render(path) - - -def render_reactions_which_fired_new_positions(network_loader, colors, path): - renderer = Renderer() - reactions_which_fired = set() - for seed in network_loader.trajectories: - for step in network_loader.trajectories[seed]: - reaction_id = network_loader.trajectories[seed][step][0] - reaction = network_loader.index_to_reaction(reaction_id) - reactions_which_fired.add(reaction_id) - - starting_species_count = 0 - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - starting_species_count += 1 - - - - left_angle_counter_step = 0.1 - left_angle_counter = math.pi + compute_starting_angle( - starting_species_count, - left_angle_counter_step - ) - right_angle_counter_step = 0.1 - right_angle_counter = compute_starting_angle( - len(colors), - right_angle_counter_step - ) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - - renderer.new_node_boundary(species_id, left_angle_counter) - left_angle_counter += left_angle_counter_step - - elif species_id in colors: - - renderer.new_node_boundary(species_id, right_angle_counter) - right_angle_counter += right_angle_counter_step - - else: - renderer.new_node(species_id) - - - - for reaction_id in reactions_which_fired: - print(reaction_id) - reaction = network_loader.index_to_reaction(reaction_id) - for i in range(reaction['number_of_reactants']): - for j in range(reaction['number_of_products']): - reactant_id = reaction['reactants'][i] - product_id = reaction['products'][j] - dG = reaction['dG'] - renderer.draw_edge(reactant_id, product_id) - - for species_id in range(network_loader.number_of_species): - if network_loader.initial_state_array[species_id] > 0: - renderer.draw_node(species_id, radius=0.008) - elif species_id in colors: - renderer.draw_node_square(species_id, color=colors[species_id], side=0.013) - else: - renderer.draw_node(species_id) - - renderer.render(path) - - - -class PathfindingTransfer: - def __init__(self,pathfinding,threshold): - self.pathfinding = pathfinding - self.threshold = threshold - - def __call__(self, species_id): - result = set() - pathways = self.pathfinding.compute_pathways(species_id) - pathways_sorted = sorted(pathways, key=lambda p: pathways[p]['weight']) - - for p in pathways_sorted[0:self.threshold]: - result.update(p) - - return result - - -def render_top_highlighted(pathfinding, colors, output_path, purple_id, num_threads=8, threshold=5): - renderer = Renderer(colors=[(0.7,0.7,0.7)]) - reactions_in_top_pathways = set() - species_in_top_pathways = set() - - pathfinding_transfer = PathfindingTransfer(pathfinding, threshold) - - with Pool(num_threads) as p: - for result in p.map(pathfinding_transfer, list(colors.keys())): - reactions_in_top_pathways.update(result) - - purple = colors[purple_id] - pathways_lol = pathfinding.compute_pathways(purple_id) - highlighted_reactions = sorted( - pathways_lol, - key=lambda p: pathways_lol[p]['weight'])[2] - - for reaction_id in reactions_in_top_pathways: - reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - - for i in range(reaction['number_of_reactants']): - reactant_id = reaction['reactants'][i] - species_in_top_pathways.add(reactant_id) - - for j in range(reaction['number_of_products']): - product_id = reaction['products'][j] - species_in_top_pathways.add(product_id) - - - starting_species_count = 0 - for species_id in range(pathfinding.network_loader.number_of_species): - if pathfinding.network_loader.initial_state_array[species_id] > 0: - starting_species_count += 1 - - - left_angle_counter_step = 0.1 - left_angle_counter = math.pi + compute_starting_angle( - starting_species_count, - left_angle_counter_step - ) - right_angle_counter_step = 0.1 - right_angle_counter = compute_starting_angle( - len(colors), - right_angle_counter_step - ) - - for species_id in range(pathfinding.network_loader.number_of_species): - if pathfinding.network_loader.initial_state_array[species_id] > 0: - - renderer.new_node_boundary(species_id, left_angle_counter) - left_angle_counter += left_angle_counter_step - - elif species_id in colors: - - renderer.new_node_boundary(species_id, right_angle_counter) - right_angle_counter += right_angle_counter_step - - else: - renderer.new_node(species_id) - - - for reaction_id in reactions_in_top_pathways: - print(reaction_id) - reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - for i in range(reaction['number_of_reactants']): - for j in range(reaction['number_of_products']): - reactant_id = reaction['reactants'][i] - product_id = reaction['products'][j] - renderer.draw_edge(reactant_id, product_id) - - for reaction_id in highlighted_reactions: - print(reaction_id) - reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - for i in range(reaction['number_of_reactants']): - for j in range(reaction['number_of_products']): - reactant_id = reaction['reactants'][i] - product_id = reaction['products'][j] - renderer.draw_edge(reactant_id, product_id,color=purple,width=0.003) - - - - for species_id in species_in_top_pathways: - - if pathfinding.network_loader.initial_state_array[species_id] > 0: - renderer.draw_node(species_id, radius=0.008) - elif species_id in colors: - renderer.draw_node_square(species_id, color=colors[species_id], side=0.013) - else: - renderer.draw_node(species_id) - - renderer.render(output_path) - - - -def render_top_pathways(pathfinding, colors, output_path, num_threads=8, threshold=5): - renderer = Renderer() - reactions_in_top_pathways = set() - species_in_top_pathways = set() - - pathfinding_transfer = PathfindingTransfer(pathfinding, threshold) - - with Pool(num_threads) as p: - for result in p.map(pathfinding_transfer, list(colors.keys())): - reactions_in_top_pathways.update(result) - - for reaction_id in reactions_in_top_pathways: - reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - - for i in range(reaction['number_of_reactants']): - reactant_id = reaction['reactants'][i] - species_in_top_pathways.add(reactant_id) - - for j in range(reaction['number_of_products']): - product_id = reaction['products'][j] - species_in_top_pathways.add(product_id) - - - starting_species_count = 0 - for species_id in range(pathfinding.network_loader.number_of_species): - if pathfinding.network_loader.initial_state_array[species_id] > 0: - starting_species_count += 1 - - - left_angle_counter_step = 0.1 - left_angle_counter = math.pi + compute_starting_angle( - starting_species_count, - left_angle_counter_step - ) - right_angle_counter_step = 0.1 - right_angle_counter = compute_starting_angle( - len(colors), - right_angle_counter_step - ) - - for species_id in range(pathfinding.network_loader.number_of_species): - if pathfinding.network_loader.initial_state_array[species_id] > 0: - - renderer.new_node_boundary(species_id, left_angle_counter) - left_angle_counter += left_angle_counter_step - - elif species_id in colors: - - renderer.new_node_boundary(species_id, right_angle_counter) - right_angle_counter += right_angle_counter_step - - else: - renderer.new_node(species_id) - - - for reaction_id in reactions_in_top_pathways: - print(reaction_id) - reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - for i in range(reaction['number_of_reactants']): - for j in range(reaction['number_of_products']): - reactant_id = reaction['reactants'][i] - product_id = reaction['products'][j] - renderer.draw_edge(reactant_id, product_id) - - - for species_id in species_in_top_pathways: - - if pathfinding.network_loader.initial_state_array[species_id] > 0: - renderer.draw_node(species_id, radius=0.008) - elif species_id in colors: - renderer.draw_node_square(species_id, color=colors[species_id], side=0.013) - else: - renderer.draw_node(species_id) - - renderer.render(output_path) - - -def redox_report( - network_loader, - redox_report_path, - params -): - - redox_reactions = network_loader.get_all_redox_reactions() - - for r in redox_reactions: - marcus_barrier(r, network_loader.mol_entries, params) - r['dG_barrier'] = r['marcus_barrier'] - - redox_reactions = sorted(redox_reactions, key=lambda r: r['marcus_barrier']) - - report_generator = ReportGenerator( - network_loader.mol_entries, - redox_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("redox report") - report_generator.emit_text("marcus barrier appears below the arrow") - for reaction in redox_reactions: - report_generator.emit_reaction(reaction) - - report_generator.finished() - - - -def coordination_report( - network_loader, - coordination_report_path, - formula, - charge -): - - for m in network_loader.mol_entries: - if m.formula == formula and m.charge == charge: - metal_id = m.ind - - coordination_reactions = network_loader.get_all_coordination_reactions(metal_id) - - report_generator = ReportGenerator( - network_loader.mol_entries, - coordination_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("coordination report") - for reaction in coordination_reactions: - report_generator.emit_reaction(reaction) - - report_generator.finished() - - -def decoordination_report( - network_loader, - decoordination_report_path, - formula, - charge -): - - for m in network_loader.mol_entries: - if m.formula == formula and m.charge == charge: - metal_id = m.ind - - decoordination_reactions = network_loader.get_all_decoordination_reactions( - metal_id) - - report_generator = ReportGenerator( - network_loader.mol_entries, - decoordination_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("decoordination report") - for reaction in decoordination_reactions: - report_generator.emit_reaction(reaction) - - report_generator.finished() - - - - - -def export_tally_to_json(network_loader, path): - reaction_tally = {} - reactions = {} - for seed in network_loader.trajectories: - for step in network_loader.trajectories[seed]: - reaction_id = network_loader.trajectories[seed][step][0] - - db_reaction = network_loader.index_to_reaction(reaction_id) - json_reactants = [ network_loader.mol_entries[i].entry_id - for i in db_reaction['reactants'] if i != -1] - json_products = [ network_loader.mol_entries[i].entry_id - for i in db_reaction['products'] if i != -1] - json_reaction = { - 'reactants' : json_reactants, - 'products' : json_products - } - - if reaction_id not in reactions: - reactions[reaction_id] = json_reaction - - - if reaction_id in reaction_tally: - reaction_tally[reaction_id] += 1 - else: - reaction_tally[reaction_id] = 1 - - dumpfn({ - 'pathways' : reaction_tally, - 'reactions' : reactions}, path) - - - - - -def reaction_tally_report( - network_loader, - reaction_tally_report_path, - cutoff=10): - - reaction_tally = {} - species_set = set() - for seed in network_loader.trajectories: - for step in network_loader.trajectories[seed]: - reaction_id = network_loader.trajectories[seed][step][0] - - reaction = network_loader.index_to_reaction(reaction_id) - - if reaction_id in reaction_tally: - reaction_tally[reaction_id] += 1 - else: - reaction_tally[reaction_id] = 1 - - for i in range(reaction['number_of_reactants']): - reactant_id = reaction['reactants'][i] - species_set.add(reactant_id) - - for j in range(reaction['number_of_products']): - product_id = reaction['products'][j] - species_set.add(product_id) - - - - report_generator = ReportGenerator( - network_loader.mol_entries, - reaction_tally_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("reaction tally report") - report_generator.emit_text( - "total number of reactions: " + - str(network_loader.number_of_reactions)) - report_generator.emit_text( - "number of reactions which fired: " + - str(len(reaction_tally.keys()))) - report_generator.emit_text( - "total number of species: " + - str(network_loader.number_of_species)) - report_generator.emit_text( - "number of species observed: " + - str(len(species_set))) - - - for (reaction_index, number) in sorted( - reaction_tally.items(), key=lambda pair: -pair[1]): - if number > cutoff: - report_generator.emit_text(str(number) + " occourances of:") - report_generator.emit_reaction( - network_loader.index_to_reaction(reaction_index), - label=str(reaction_index) - ) - report_generator.emit_newline() - - report_generator.finished() - -def species_report(network_loader, species_report_path): - """ - print all species - """ - report_generator = ReportGenerator( - network_loader.mol_entries, - species_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("species report") - for i in range(network_loader.number_of_species): - mol = network_loader.mol_entries[i] - report_generator.emit_text(str(mol.entry_id)) - report_generator.emit_text( - "formula: " + mol.formula) - - report_generator.emit_molecule(i) - report_generator.emit_newline() - - report_generator.finished() - -def reaction_report(network_loader, reaction_report_path): - """ - print all reactions. Warning: don't do this for a big network - """ - report_generator = ReportGenerator( - network_loader.mol_entries, - reaction_report_path, - rebuild_mol_pictures=False) - - report_generator.emit_text("reaction report") - for i in range(network_loader.number_of_reactions): - reaction = network_loader.index_to_reaction(i) - report_generator.emit_reaction(reaction) - report_generator.emit_newline() - - report_generator.finished() - - - - - -class Pathfinding: - """ - Given a chemical system, we are interested in exploring the reaction - pathways which produce species of interest. Reaction networks are a - useful tool for approaching the problem because they can be simulated - efficiently, even when the network has hundreds of millions of - reactions. Unfortunately, since we collapse all spacial aspects of the - system, identical molecules become indistinguishable (in reality, - identical molecules can be distinguished by their locations in - space). This creates the following problem: Suppose we are interested - in the production of species G from A and have the following - simulation trajectory: - - A -> Z + F - F -> X - A -> B + F - A -> C + H - A -> D + H - C -> E - D -> E - E + F -> G - - It is impossible to decide between the two pathways - - A -> B + F A -> C + H C -> E E + F -> G - A -> B + F A -> D + H D -> E E + F -> G - - If our model had a spacial aspect, we would be able to trace the - specific E used back to either a C or a D. Fundamentally, this - ambiguity is caused by sequence - - A -> C + H C -> E E -> D D + H -> A - - which is called a deficiency loop. To avoid this problem, we extract a - pathway which produces the target in the following way: Take the first - reaction x which produces the target molecule. Then recursively, take - the first reactions which produced the reactants of x. If a reactant - is a starting molecule, then stop. Applying the procedure to the above - sequence gives the pathway - - A -> C + H C -> E A -> Z + F E + F -> G - - Intuitively, this procedure is producing pathways which don't take - into account competition, since there is no guarantee that the first - molecule which is produced isn't immediately consumed by some - competing reaction. Since the focus of this software is monte carlo - simulation with thermodynamic rates, this isn't a problem as all - downhill reactions have the maximum possible rate constant, so - reactions sans competition are also likely to occour. - """ - - def __init__( - self, - network_loader - ): - - self.network_loader = network_loader - self.pathways = {} - - - def compute_pathway( - self, - species_id, - trajectory): - - pathway = [] - target_produced = False - - for step in trajectory: - reaction_id = trajectory[step][0] - reaction = self.network_loader.index_to_reaction(reaction_id) - if species_id in reaction['products']: - target_produced = True - break - - if target_produced: - pathway.append(reaction_id) - - - prefixes = [] - for i in range(reaction['number_of_reactants']): - reactant_id = reaction['reactants'][i] - if self.network_loader.initial_state_dict[reactant_id] == 0: - prefix = self.compute_pathway(reactant_id, trajectory) - prefixes.append(prefix) - - # early return if one of the final reaction in one - # of the prefixes gives both of the reactants - prefix_final_reaction = self.network_loader.index_to_reaction( - prefix[-1]) - - if (sorted(reaction['reactants']) == - sorted(prefix_final_reaction['products'])): - return prefix + pathway - - for prefix in prefixes: - pathway = prefix + pathway - - - return pathway - - - - def compute_pathways(self, species_id): - - if species_id not in self.pathways: - reaction_pathway_list = [] - for seed in self.network_loader.trajectories: - pathway = self.compute_pathway( - species_id, - self.network_loader.trajectories[seed] - ) - - if len(pathway) > 0: - reaction_pathway_list.append(pathway) - - self.pathways[species_id] = self.collect_duplicate_pathways( - reaction_pathway_list) - - return self.pathways[species_id] - - - def collect_duplicate_pathways( - self, pathways - ): - pathway_dict = {} - for pathway in pathways: - key = frozenset(pathway) - if key in pathway_dict: - pathway_dict[key]["frequency"] += 1 - else: - path_weight = self.compute_path_weight(pathway) - pathway_dict[key] = { - "pathway": pathway, - "frequency": 1, - "weight": path_weight, - } - - return pathway_dict - - - def compute_path_weight(self, pathway): - weight = 0.0 - for reaction_index in pathway: - reaction = self.network_loader.index_to_reaction(reaction_index) - weight += default_cost(reaction["dG"]) - return weight - - - -def export_pathways_to_json(pathfinding, species_id, path): - pathways = pathfinding.compute_pathways(species_id) - reactions = {} - for pathway in pathways: - for reaction_id in pathway: - db_reaction = pathfinding.network_loader.index_to_reaction(reaction_id) - json_reactants = [ pathfinding.network_loader.mol_entries[i].entry_id - for i in db_reaction['reactants'] if i != -1] - json_products = [ pathfinding.network_loader.mol_entries[i].entry_id - for i in db_reaction['products'] if i != -1] - json_reaction = { - 'reactants' : json_reactants, - 'products' : json_products - } - reactions[reaction_id] = json_reaction - - dumpfn({ - 'pathways' : list(pathways.values()), - 'reactions' : reactions}, path) - - - -def generate_pathway_report( - pathfinding, - species_id, - report_file_path, - number_of_pathways=100, - sort_by_frequency=False -): - - report_generator = ReportGenerator( - pathfinding.network_loader.mol_entries, - report_file_path, - rebuild_mol_pictures=False) - - pathways = pathfinding.compute_pathways(species_id) - - - report_generator.emit_text("pathway report for") - report_generator.emit_molecule(species_id) - - if sort_by_frequency: - report_generator.emit_text( - "top " + - str(number_of_pathways) + - " pathways sorted by frequency") - - else: - report_generator.emit_text( - "top " + - str(number_of_pathways) + - " pathways sorted by cost") - - report_generator.emit_newline() - report_generator.emit_initial_state(pathfinding.network_loader.initial_state_dict) - report_generator.emit_newpage() - - if sort_by_frequency: - def sort_function(item): - return -item[1]["frequency"] - - else: - def sort_function(item): - return item[1]["weight"] - - - count = 1 - for _, unique_pathway in sorted(pathways.items(), key=sort_function): - - frequency = unique_pathway["frequency"] - weight = unique_pathway["weight"] - - report_generator.emit_text("pathway " + str(count)) - report_generator.emit_text("path weight: " + str(weight)) - report_generator.emit_text(str(frequency) + " occurrences:") - - for reaction_index in unique_pathway["pathway"]: - reaction = pathfinding.network_loader.index_to_reaction(reaction_index) - report_generator.emit_reaction(reaction, label=str(reaction_index)) - - report_generator.emit_newpage() - count += 1 - if count > number_of_pathways: - break - - report_generator.finished() - - -class SimulationReplayer: - """ - class for rerunning through all the simulations. This is - relatively fast since we don't need to actually choose which - reactions fire / update reaction propensities. - """ - - def __init__(self, network_loader): - self.network_loader = network_loader - - - self.compute_expected_final_state() - self.compute_production_consumption_info() - self.compute_sink_data() - - - def compute_expected_final_state(self): - self.expected_final_state = np.zeros( - self.network_loader.number_of_species, - dtype=int - ) - - for seed in self.network_loader.trajectories: - state = np.copy(self.network_loader.initial_state_array) - for step in self.network_loader.trajectories[seed]: - reaction_index = self.network_loader.trajectories[seed][step][0] - time = self.network_loader.trajectories[seed][step][1] - reaction = self.network_loader.index_to_reaction(reaction_index) - - for i in range(reaction['number_of_reactants']): - reactant_index = reaction['reactants'][i] - state[reactant_index] -= 1 - - for j in range(reaction['number_of_products']): - product_index = reaction['products'][j] - state[product_index] += 1 - - self.expected_final_state += state - - self.expected_final_state = ( - self.expected_final_state / len(self.network_loader.trajectories)) - - def compute_production_consumption_info(self): - self.consuming_reactions = {} - self.producing_reactions = {} - - for i in range(self.network_loader.number_of_species): - self.consuming_reactions[i] = {} - self.producing_reactions[i] = {} - - for seed in self.network_loader.trajectories: - for step in self.network_loader.trajectories[seed]: - reaction_index = self.network_loader.trajectories[seed][step][0] - time = self.network_loader.trajectories[seed][step][1] - reaction = self.network_loader.index_to_reaction(reaction_index) - - for i in range(reaction['number_of_reactants']): - reactant_index = reaction['reactants'][i] - if reaction_index not in self.consuming_reactions[reactant_index]: - self.consuming_reactions[reactant_index][reaction_index] = 1 - else: - self.consuming_reactions[reactant_index][reaction_index] += 1 - - - for j in range(reaction['number_of_products']): - product_index = reaction['products'][j] - if reaction_index not in self.producing_reactions[product_index]: - self.producing_reactions[product_index][reaction_index] = 1 - else: - self.producing_reactions[product_index][reaction_index] += 1 - - def compute_state_time_series(self, seed): - state_dimension_size = len(self.network_loader.initial_state_array) - step_dimension_size = len(self.network_loader.trajectories[seed]) - time_series = np.zeros( - (step_dimension_size, state_dimension_size), - dtype=int) - - state = np.copy(self.network_loader.initial_state_array) - for step in self.network_loader.trajectories[seed]: - reaction_index = self.network_loader.trajectories[seed][step][0] - time = self.network_loader.trajectories[seed][step][1] - reaction = self.network_loader.index_to_reaction(reaction_index) - - for i in range(reaction['number_of_reactants']): - reactant_index = reaction['reactants'][i] - state[reactant_index] -= 1 - - for j in range(reaction['number_of_products']): - product_index = reaction['products'][j] - state[product_index] += 1 - - time_series[step] = state - - return time_series - - - def time_series_graph( - self, - seeds, - species_of_interest, - path, - colors = list(mcolors.TABLEAU_COLORS.values()), - styles = ['solid', 'dotted', 'dashed', 'dashdot'], - internal_index_labels=True - ): - - - max_trajectory_length = 0 - for seed in seeds: - max_trajectory_length = max( - max_trajectory_length, - len(self.network_loader.trajectories[seed])) - - total_time_series = np.zeros( - (max_trajectory_length, self.network_loader.number_of_species), - dtype=int - ) - - for seed in seeds: - total_time_series += pad_time_series( - self.compute_state_time_series(seed), - max_trajectory_length) - - total_time_series = total_time_series / len(seeds) - - background_species = set() - for index in range(self.network_loader.number_of_species): - for step in range(max_trajectory_length): - if (total_time_series[step,index] > 0.1 and - index not in species_of_interest): - background_species.add(index) - - - line_dict = {} - i = 0 - for species_index in species_of_interest: - r = i % len(colors) - q = i // len(colors) - line_dict[species_index] = (colors[r], styles[q]) - i += 1 - - - fig, (ax0, ax1, ax2) = plt.subplots( - 3, 1, - figsize=(5,10), - gridspec_kw={'height_ratios':[2,2,1]}) - - y_max = 0 - for step in range(total_time_series.shape[0]): - for species_index in species_of_interest: - y_max = max(y_max, total_time_series[step,species_index]) - - ax0.set_xlim([0,total_time_series.shape[0]]) - ax0.set_ylim([0,y_max+1]) - - ax1.set_xlim([0,total_time_series.shape[0]]) - ax1.set_ylim([0,(y_max+1)/10]) - - - ticks = np.arange(0, total_time_series.shape[0]) - for i, species_index in enumerate(background_species): - ax0.plot(ticks, - total_time_series[:, species_index], - color=mcolors.hsv_to_rgb((0,0,0.9)) - ) - - ax1.plot(ticks, - total_time_series[:, species_index], - color=mcolors.hsv_to_rgb((0,0,0.9)) - ) - - - - for species_index in species_of_interest: - - - ax0.plot(ticks, - total_time_series[:, species_index], - color=line_dict[species_index][0], - linestyle=line_dict[species_index][1] - ) - - ax1.plot(ticks, - total_time_series[:, species_index], - color=line_dict[species_index][0], - linestyle=line_dict[species_index][1] - ) - - - - # creating a legend - ax2.yaxis.set_visible(False) - ax2.xaxis.set_visible(False) - ax2.set_axis_off() - ax2.set_xlim([0,1]) - ax2.set_ylim([0,1]) - - - i = 0 - for species_index in species_of_interest: - if internal_index_labels: - label = str(species_index) - else: - label = self.network_loader.mol_entries[species_index].entry_id - - r = i % 10 - q = i // 10 - pos_x = [q * 0.3 + 0, q * 0.3 + 0.05] - pos_y = [r * 0.1 + 0.05, r * 0.1 + 0.05] - text_pos_x = q * 0.3 + 0.06 - text_pos_y = r * 0.1 + 0.05 - - ax2.plot( - pos_x, pos_y, - color=line_dict[species_index][0], - linestyle=line_dict[species_index][1]) - - ax2.text(text_pos_x, text_pos_y, - label, - fontsize=8, - horizontalalignment='left', - verticalalignment='center') - i += 1 - - fig.savefig(path, transparent=True) - - - def compute_sink_data(self): - max_ratio = 1e10 - self.sink_data = {} - - for i in range(self.network_loader.number_of_species): - number_of_consuming_reactions = sum( - self.consuming_reactions[i].values()) - number_of_distinct_consuming_reactions = len( - self.consuming_reactions[i].keys()) - number_of_producing_reactions = sum( - self.producing_reactions[i].values()) - number_of_distinct_producing_reactions = len( - self.producing_reactions[i].keys()) - - - if number_of_consuming_reactions != 0: - ratio = number_of_producing_reactions / number_of_consuming_reactions - else: - ratio = max_ratio - - expected_value = self.expected_final_state[i] - - self.sink_data[i] = { - "species_index" : i, - "number_of_consuming_reactions" : number_of_consuming_reactions, - "number_of_distinct_consuming_reactions" - : number_of_distinct_consuming_reactions, - "number_of_producing_reactions" : number_of_producing_reactions, - "number_of_distinct_producing_reactions" - : number_of_distinct_producing_reactions, - "ratio" : ratio, - "expected_value" : expected_value - } - - self.sinks = [ - i for i in range(self.network_loader.number_of_species) - if self.sink_filter(i)] - - def sink_filter(self, species_index): - sink_data = self.sink_data[species_index] - - number_of_consuming_reactions = sink_data["number_of_consuming_reactions"] - number_of_distinct_consuming_reactions = sink_data[ - "number_of_distinct_consuming_reactions"] - number_of_producing_reactions = sink_data["number_of_producing_reactions"] - number_of_distinct_producing_reactions = sink_data[ - "number_of_distinct_producing_reactions"] - ratio = sink_data["ratio"] - expected_value = sink_data["expected_value"] - - mol = self.network_loader.mol_entries[species_index] - if (number_of_consuming_reactions + number_of_producing_reactions > 0 and - ratio > 1.5 and - expected_value > 0.1 and - mol.spin_multiplicity == 1): - return True - else: - return False - - -def export_consumption_to_json(simulation_replayer, species_index, path): - network_loader = simulation_replayer.network_loader - producing_reactions = simulation_replayer.producing_reactions[species_index] - consuming_reactions = simulation_replayer.consuming_reactions[species_index] - reactions = {} - for reaction_id in chain(producing_reactions, consuming_reactions) : - db_reaction = network_loader.index_to_reaction(reaction_id) - json_reactants = [ network_loader.mol_entries[i].entry_id - for i in db_reaction['reactants'] if i != -1] - json_products = [ network_loader.mol_entries[i].entry_id - for i in db_reaction['products'] if i != -1] - json_reaction = { - 'reactants' : json_reactants, - 'products' : json_products - } - reactions[reaction_id] = json_reaction - - dumpfn({ - 'reactions' : reactions, - 'producing_reactions' : producing_reactions, - 'consuming_reactions' : consuming_reactions}, - path) - -def pad_time_series(time_series, max_number_of_steps): - num_steps = time_series.shape[0] - state_size = time_series.shape[1] - padded_time_series = np.zeros( - (max_number_of_steps, state_size), - dtype=int) - - for step in range(max_number_of_steps): - if step < num_steps: - padded_time_series[step] = time_series[step] - else: - padded_time_series[step] = time_series[num_steps-1] - - return padded_time_series - - - -def export_sinks_to_json(simulation_replayer, path): - sink_data = simulation_replayer.sink_data - sink_data_json = {} - for i in sink_data: - mol = simulation_replayer.network_loader.mol_entries[i] - sink_data_json[mol.entry_id] = sink_data[i] - - dumpfn(sink_data_json, path) - -def export_species_report_to_json(network_loader, path): - data = {} - for i in range(network_loader.number_of_species): - data[i] = network_loader.mol_entries[i].entry_id - - dumpfn(data, path) - - -def consumption_report( - simulation_replayer, - species_index, - consumption_report_path -): - - sink_data = simulation_replayer.sink_data[species_index] - - producing_reactions = simulation_replayer.producing_reactions[species_index] - consuming_reactions = simulation_replayer.consuming_reactions[species_index] - - report_generator = ReportGenerator( - simulation_replayer.network_loader.mol_entries, - consumption_report_path, - rebuild_mol_pictures=False) - - - report_generator.emit_text("P/C ratio: " + - str(sink_data["ratio"])) - report_generator.emit_text("expected val: " + - str(sink_data["expected_value"])) - report_generator.emit_newline() - report_generator.emit_text("consuming reactions:") - for (reaction_index, number) in sorted( - consuming_reactions.items(), - key=lambda item: -item[1]): - - reaction = simulation_replayer.network_loader.index_to_reaction( - reaction_index) - report_generator.emit_text(str(number) + " occourances:") - report_generator.emit_reaction(reaction) - - - report_generator.emit_text("producing reactions:") - for (reaction_index, number) in sorted( - producing_reactions.items(), - key=lambda item: -item[1]): - - reaction = simulation_replayer.network_loader.index_to_reaction( - reaction_index) - - report_generator.emit_text(str(number) + " occourances:") - report_generator.emit_reaction(reaction) - - - report_generator.finished() - - -def sink_report( - simulation_replayer, - sink_report_path -): - - - report_generator = ReportGenerator( - simulation_replayer.network_loader.mol_entries, - sink_report_path, - rebuild_mol_pictures=False) - - sinks_sorted = sorted( - simulation_replayer.sinks, - key = lambda i: -simulation_replayer.sink_data[i]["ratio"]) - - for species_index in sinks_sorted: - - sink_data = simulation_replayer.sink_data[species_index] - - number_of_consuming_reactions = sink_data[ - "number_of_consuming_reactions"] - number_of_distinct_consuming_reactions = sink_data[ - "number_of_distinct_consuming_reactions"] - number_of_producing_reactions = sink_data[ - "number_of_producing_reactions"] - number_of_distinct_producing_reactions = sink_data[ - "number_of_distinct_producing_reactions"] - ratio = sink_data["ratio"] - expected_value = sink_data["expected_value"] - - - report_generator.emit_text("P/C ratio: " + str(ratio)) - report_generator.emit_text("expected val: " + str(expected_value)) - - report_generator.emit_text( - "produced: " + str(number_of_producing_reactions)) - - report_generator.emit_text( - str(number_of_distinct_producing_reactions) + - " distinct producing reactions") - - report_generator.emit_text( - "consumed: " + str(number_of_consuming_reactions)) - - report_generator.emit_text( - str(number_of_distinct_consuming_reactions) + - " distinct consuming reactions") - report_generator.emit_molecule(species_index) - report_generator.emit_newline() - - report_generator.finished() diff --git a/test.py b/test.py index 8f7965d..bf3b644 100644 --- a/test.py +++ b/test.py @@ -1,17 +1,11 @@ import os import sys import subprocess -import sqlite3 -import pickle - from HiPRGen.network_loader import NetworkLoader -from HiPRGen.initial_state import find_mol_entry_from_xyz_and_charge from monty.serialization import loadfn, dumpfn from HiPRGen.species_filter import species_filter from HiPRGen.bucketing import bucket -from HiPRGen.report_generator import ReportGenerator -from HiPRGen.initial_state import insert_initial_state from HiPRGen.constants import ROOM_TEMP, Terminal from HiPRGen.reaction_filter_payloads import ( DispatcherPayload, @@ -21,8 +15,6 @@ from HiPRGen.species_questions import ( mg_species_decision_tree, li_species_decision_tree, - positive_penalty, - species_default_true ) from HiPRGen.reaction_questions import ( @@ -30,19 +22,6 @@ ) -from HiPRGen.mc_analysis import ( - reaction_tally_report, - species_report, - Pathfinding, - SimulationReplayer, - generate_pathway_report, - sink_report, - consumption_report, - redox_report, - coordination_report, - decoordination_report -) - # Since HiPRGen uses an end-to-end testing approach rather than testing # each individual function, we have decided to use the tests as # documentation, by explaining every single line through the first test. @@ -62,15 +41,14 @@ print("usage: python test.py number_of_threads") quit() - number_of_threads = sys.argv[1] + class bcolors: PASS = '\u001b[32;1m' FAIL = '\u001b[31;1m' ENDC = '\u001b[0m' - # HiPRGen is organized as a pipeline, where all the relevent data is # stored in a sqlite database between phases. For this reason, during # a single run of the full pipeline, it makes sense to store all the @@ -84,13 +62,11 @@ class bcolors: subprocess.run(['mkdir', './scratch']) - def li_test(): - # folder is the where we store all our intermediate databases folder = './scratch/li_test' - subprocess.run(['mkdir', folder ]) + subprocess.run(['mkdir', folder]) # The initial input to the pipeline is a list of LIBE or MADEIRA # dataset entries. We provide two examples in the data foloder. @@ -178,148 +154,13 @@ def li_test(): ] ) - # After we have generated the mol_entries, we refer to molecules by - # their index. The function find_mol_entry_from_xyz_and_charge can - # help find the indices of specific species to be used in the initial - # condition for propagating trajectories and/or trajectory analysis. - Li_plus_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/Li.xyz', - 1) - - EC_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/EC.xyz', - 0) - - LEDC_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/LEDC.xyz', - 0) - - - # After generating a reaction network, it is stored in rn.sqlite. We - # use Monte Carlo simulation to interrogate the network, and for that - # we need to define an initial condition. - initial_state = { - Li_plus_id : 30, - EC_id : 30 - } - - # The initial state and the trajectories (after simulation) are stored in - # a seperate database from the network, here called initial_state.sqlite. - # This facilitates running multiple independent simulations of the same - # network with different initial conditions at the same time, if desired. - insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') - - - # GMC is a high performance reaction network Monte Carlo simulator using the - # Gillespie algorithm: https://github.com/BlauGroup/RNMC. Here we run 1000 - # trajectories each of 200 steps. - subprocess.run([ - 'GMC', - '--reaction_database=' + folder + '/rn.sqlite', - '--initial_state_database=' + folder + '/initial_state.sqlite', - '--number_of_simulations=1000', - '--base_seed=1000', - '--thread_count=' + number_of_threads, - '--step_cutoff=200' - ]) - # The network loader builds a python object around a reaction network # and the molecules to make it easier to use them. network_loader = NetworkLoader( folder + '/rn.sqlite', - folder + '/mol_entries.pickle', - folder + '/initial_state.sqlite' + folder + '/mol_entries.pickle' ) - network_loader.load_trajectories() - network_loader.load_initial_state() - - - - # HiPRGen has analysis tools to understand what happened in our simulation. - # The output files are written into the same folder in which the reaction - # network is stored. - - # This report is empty, but we use it to generate the molecule pictures. - # This is an expensive operation, so we only want do do it once. - report_generator = ReportGenerator( - network_loader.mol_entries, - folder + '/dummy.tex', - rebuild_mol_pictures=True) - - - # The tally report shows reactions sorted by the number of times fired. - reaction_tally_report( - network_loader, - folder + '/reaction_tally.tex' - ) - # Run `pdflatex reaction_tally.tex` in `scratch/li_test` to generate - # the tally report PDF. - - - # The species report shows every specie in the network and their IDs. - species_report(network_loader, folder + '/species_report.tex') - # Run `pdflatex species_report.tex` in `scratch/li_test` to generate - # the species report PDF. - - - # Pathfinding is a central goal of HiPRGen / GMC. See mc_analysis.py for - # further documentation of the Pathfinding class. - pathfinding = Pathfinding(network_loader) - - - # The pathway report shows all the ways that a target species was - # produced in the simulation trajectories, where each simulation only - # contributes the shortest path responsible for the first formation - # of the target species to the report. The report can be sorted by - # pathway frequency, but instead here we sort by pathway cost. Note - # that the test network has ~5000 reactions while production networks - # have between 50-100 million reactions. - generate_pathway_report( - pathfinding, - LEDC_id, - folder + '/LEDC_pathways.tex', - sort_by_frequency=False - ) - # Run `pdflatex LEDC_pathways.tex` in `scratch/li_test` to generate - # the LEDC pathway report PDF. - - - # The simulation replayer sweeps through all trajectories in order - # to extract additional information that is used for consumption - # reports and sink reports. - simulation_replayer = SimulationReplayer(network_loader) - - - # The consumption report shows reactions which consumed a target - # species, sorted by the number of times the reaction fired. - consumption_report(simulation_replayer, - LEDC_id, - folder + '/LEDC_consumption_report.tex') - # Run `pdflatex LEDC_consumption_report.tex` in `scratch/li_test` - # to generate the LEDC consumption report PDF. - - - # The sink report shows species which have a production to - # consumption ratio of greater than 3/2 and which have an expected - # value above 0.1. These are two of the three heuristic criteria - # that we use to identify network products. The third criteria is - # that each network product must have a shortest path with cost - # less than 10. This can be checked by generating pathway reports - # to each species shown in the sink report. For the curious reader, - # we note that generating pathway reports to the six species in the - # sink report will show that only Li2CO3, C2H4, LiEDC-, and DLEMC - # have sufficiently low-cost paths to pass the third criteria and - # thus to be considered products of the test network used here. - sink_report(simulation_replayer, folder + '/sink_report.tex') - # Run `pdflatex sink_report.tex` in `scratch/li_test` to generate - # the sink report PDF. - - - tests_passed = True if network_loader.number_of_species == 190: print(bcolors.PASS + @@ -331,8 +172,6 @@ def li_test(): bcolors.ENDC) tests_passed = False - - if network_loader.number_of_reactions == 4921: print(bcolors.PASS + "li_test: correct number of reactions" + @@ -348,17 +187,14 @@ def li_test(): def mg_test(): - folder = './scratch/mg_test' - subprocess.run(['mkdir', folder ]) + subprocess.run(['mkdir', folder]) mol_json = './data/sam_G2.json' species_decision_tree = mg_species_decision_tree database_entries = loadfn(mol_json) - - mol_entries = species_filter( database_entries, folder + '/mol_entries.pickle', @@ -367,11 +203,8 @@ def mg_test(): coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy) ) - - bucket(mol_entries, folder + '/buckets.sqlite') - dispatcher_payload = DispatcherPayload( folder + '/buckets.sqlite', folder + '/rn.sqlite', @@ -388,7 +221,6 @@ def mg_test(): Terminal.DISCARD ) - dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') dumpfn(worker_payload, folder + '/worker_payload.json') @@ -406,84 +238,11 @@ def mg_test(): ] ) - - mg_g2_plus_plus_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/mgg2.xyz', - 2) - - c2h4_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/c2h4.xyz', - 0) - - c2h6_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/c2h6.xyz', - 0) - - initial_state = { - 33 : 30, - 81 : 30 - } - - - insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') - - - subprocess.run([ - 'GMC', - '--reaction_database=' + folder + '/rn.sqlite', - '--initial_state_database=' + folder + '/initial_state.sqlite', - '--number_of_simulations=1000', - '--base_seed=1000', - '--thread_count=' + number_of_threads, - '--step_cutoff=200' - ]) - - - network_loader = NetworkLoader( folder + '/rn.sqlite', folder + '/mol_entries.pickle', - folder + '/initial_state.sqlite' ) - network_loader.load_trajectories() - network_loader.load_initial_state() - - - - report_generator = ReportGenerator( - network_loader.mol_entries, - folder + '/dummy.tex', - rebuild_mol_pictures=True) - - reaction_tally_report( - network_loader, - folder + '/reaction_tally.tex' - ) - - pathfinding = Pathfinding(network_loader) - - generate_pathway_report( - pathfinding, - c2h6_id, - folder + '/C2H6_pathways.tex', - sort_by_frequency=False - ) - - generate_pathway_report( - pathfinding, - c2h4_id, - folder + '/C2H4_pathways.tex', - sort_by_frequency=False - ) - - - - species_report(network_loader, folder + '/species_report.tex') - tests_passed = True if network_loader.number_of_species == 83: print(bcolors.PASS + @@ -495,8 +254,6 @@ def mg_test(): bcolors.ENDC) tests_passed = False - - if network_loader.number_of_reactions == 788: print(bcolors.PASS + "mg_test: correct number of reactions" + @@ -510,119 +267,6 @@ def mg_test(): return tests_passed -def flicho_test(): - - - folder = './scratch/flicho_test' - subprocess.run(['mkdir', folder ]) - - mol_json = './data/flicho_test.json' - database_entries = loadfn(mol_json) - species_decision_tree = li_species_decision_tree - - - mol_entries = species_filter( - database_entries, - mol_entries_pickle_location=folder + '/mol_entries.pickle', - species_report=folder + '/unfiltered_species_report.tex', - species_decision_tree=species_decision_tree, - coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy), - ) - - - bucket(mol_entries, folder + '/buckets.sqlite') - - params = { - 'temperature' : ROOM_TEMP, - 'electron_free_energy' : -1.4 - } - - dispatcher_payload = DispatcherPayload( - folder + '/buckets.sqlite', - folder + '/rn.sqlite', - folder + '/reaction_report.tex' - ) - - worker_payload = WorkerPayload( - folder + '/buckets.sqlite', - default_reaction_decision_tree, - params, - Terminal.DISCARD - ) - - - dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') - dumpfn(worker_payload, folder + '/worker_payload.json') - - subprocess.run( - [ - 'mpirun', - '--use-hwthread-cpus', - '-n', - number_of_threads, - 'python', - 'run_network_generation.py', - folder + '/mol_entries.pickle', - folder + '/dispatcher_payload.json', - folder + '/worker_payload.json' - ] - ) - - Li_plus_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/Li.xyz', - 1) - - EC_id = find_mol_entry_from_xyz_and_charge( - mol_entries, - './xyz_files/EC.xyz', - 0) - - initial_state = { - Li_plus_id : 30, - EC_id : 30 - } - - insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite') - - - subprocess.run([ - 'GMC', - '--reaction_database=' + folder + '/rn.sqlite', - '--initial_state_database=' + folder + '/initial_state.sqlite', - '--number_of_simulations=1000', - '--base_seed=1000', - '--thread_count=' + number_of_threads, - '--step_cutoff=200' - ]) - - network_loader = NetworkLoader( - folder + '/rn.sqlite', - folder + '/mol_entries.pickle', - folder + '/initial_state.sqlite' - ) - - network_loader.load_trajectories() - network_loader.load_initial_state() - - report_generator = ReportGenerator( - network_loader.mol_entries, - folder + '/dummy.tex', - rebuild_mol_pictures=True) - - coordination_report( - network_loader, - folder + '/coodination_report.tex', - 'Li1', - 1) - - decoordination_report( - network_loader, - folder + '/decoodination_report.tex', - 'Li1', - 1) - - tests = [ mg_test, li_test, From 6ce072f8e038c8a2ca4bba642ef8452fdfa1404c Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 12 Apr 2023 09:59:54 -0700 Subject: [PATCH 2/2] Small tweaks to test --- test.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/test.py b/test.py index bf3b644..af45bf4 100644 --- a/test.py +++ b/test.py @@ -56,6 +56,7 @@ class bcolors: # set and a magnesium set. Since the lithium test set is older, we shall # document that instead of the mg test set. + if os.path.isdir('./scratch'): subprocess.run(['rm', '-r', './scratch']) @@ -76,7 +77,6 @@ def li_test(): # through the species decision tree to discard molecules. species_decision_tree = li_species_decision_tree - # There is one non-local part of species filtering: we consider two # molecules to be equivalent if they have the same total charge, # composition, and covalent bonds, even if they have different metal @@ -95,13 +95,11 @@ def li_test(): coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy), ) - # Once we have generated our molecule list, we generate the bucket database - # which is how we break up the reaction filtering amongst all avaliable workers. - # It gets stored in the buckets.sqlite database. + # which is how we break up the reaction filtering amongst all avaliable + # workers. It gets stored in the buckets.sqlite database. bucket(mol_entries, folder + '/buckets.sqlite') - # Reaction filtering is paralellized using MPI, so we need to spawn # an MPI instance to run it. This is why we can't just start # reaction filtering by calling a python function. We pass the @@ -117,8 +115,8 @@ def li_test(): # HiPRGen.reaction_questions params = { - 'temperature' : ROOM_TEMP, - 'electron_free_energy' : -1.4 + 'temperature': ROOM_TEMP, + 'electron_free_energy': -1.4 } dispatcher_payload = DispatcherPayload( @@ -134,7 +132,6 @@ def li_test(): Terminal.DISCARD ) - # The dispatcher and worker payloads are passed through the MPI barrier # as JSON blobs dispatcher_payload and worker_payload dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json') @@ -215,8 +212,8 @@ def mg_test(): folder + '/buckets.sqlite', default_reaction_decision_tree, { - 'temperature' : ROOM_TEMP, - 'electron_free_energy' : -2.06 + 'temperature': ROOM_TEMP, + 'electron_free_energy': -2.06 }, Terminal.DISCARD )