From 24a8d870afa3802f68831d596d351fca0d5beb0b Mon Sep 17 00:00:00 2001
From: Evan Walter Clark Spotte-Smith <espottesmith@gmail.com>
Date: Wed, 22 Mar 2023 13:55:19 -0700
Subject: [PATCH 1/2] Gutting analysis, which will be moved to RNMC

---
 .gitignore             |    3 +-
 HiPRGen/mc_analysis.py | 1262 ----------------------------------------
 test.py                |  364 +-----------
 3 files changed, 6 insertions(+), 1623 deletions(-)
 delete mode 100644 HiPRGen/mc_analysis.py

diff --git a/.gitignore b/.gitignore
index 118ecb1..2c86349 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__/
 .ipynb_checkpoints/
-scratch
\ No newline at end of file
+scratch
+HiPRGen.egg-info
\ No newline at end of file
diff --git a/HiPRGen/mc_analysis.py b/HiPRGen/mc_analysis.py
deleted file mode 100644
index e6254c7..0000000
--- a/HiPRGen/mc_analysis.py
+++ /dev/null
@@ -1,1262 +0,0 @@
-from HiPRGen.report_generator import ReportGenerator
-from HiPRGen.network_renderer import Renderer
-from HiPRGen.network_loader import NetworkLoader
-from HiPRGen.constants import ROOM_TEMP, KB
-from HiPRGen.reaction_questions import marcus_barrier
-from monty.serialization import dumpfn
-import math
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.colors as mcolors
-from itertools import chain
-from multiprocessing import Pool
-
-def default_cost(free_energy):
-    return math.exp(min(10.0, free_energy) / (ROOM_TEMP * KB)) + 1
-
-
-def compute_starting_angle(l, step):
-    if l % 2 == 1:
-        return - int(l / 2) * step
-    else:
-        return - ((l / 2) * step) + 0.05
-
-
-def render_species(network_loader, path):
-    renderer = Renderer()
-
-    starting_species_count = 0
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            starting_species_count += 1
-
-
-    left_angle_counter_step = 0.1
-    left_angle_counter = math.pi + compute_starting_angle(
-        starting_species_count,
-        left_angle_counter_step
-    )
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-
-            renderer.new_node_boundary(species_id, left_angle_counter)
-            left_angle_counter += left_angle_counter_step
-
-        else:
-            renderer.new_node(species_id)
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            renderer.draw_node(species_id, radius=0.008)
-        else:
-            renderer.draw_node(species_id, radius=0.002)
-
-    renderer.render(path)
-
-
-
-
-def render_reactions_which_fired(network_loader, colors, path):
-    renderer = Renderer()
-    reactions_which_fired = set()
-    for seed in network_loader.trajectories:
-        for step in network_loader.trajectories[seed]:
-            reaction_id = network_loader.trajectories[seed][step][0]
-            reaction = network_loader.index_to_reaction(reaction_id)
-            reactions_which_fired.add(reaction_id)
-
-    starting_species_count = 0
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            starting_species_count += 1
-
-
-    left_angle_counter_step = 0.1
-    left_angle_counter = math.pi + compute_starting_angle(
-        starting_species_count,
-        left_angle_counter_step
-    )
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-
-            renderer.new_node_boundary(species_id, left_angle_counter)
-            left_angle_counter += left_angle_counter_step
-
-        else:
-            renderer.new_node(species_id)
-
-    for reaction_id in reactions_which_fired:
-        print(reaction_id)
-        reaction = network_loader.index_to_reaction(reaction_id)
-        for i in range(reaction['number_of_reactants']):
-            for j in range(reaction['number_of_products']):
-                reactant_id = reaction['reactants'][i]
-                product_id = reaction['products'][j]
-                dG = reaction['dG']
-                renderer.draw_edge(reactant_id, product_id)
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            renderer.draw_node(species_id, radius=0.008)
-        elif species_id in colors:
-            renderer.draw_node_square(species_id, color=colors[species_id], side=0.013)
-        else:
-            renderer.draw_node(species_id)
-
-    renderer.render(path)
-
-
-def render_reactions_which_fired_new_positions(network_loader, colors, path):
-    renderer = Renderer()
-    reactions_which_fired = set()
-    for seed in network_loader.trajectories:
-        for step in network_loader.trajectories[seed]:
-            reaction_id = network_loader.trajectories[seed][step][0]
-            reaction = network_loader.index_to_reaction(reaction_id)
-            reactions_which_fired.add(reaction_id)
-
-    starting_species_count = 0
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            starting_species_count += 1
-
-
-
-    left_angle_counter_step = 0.1
-    left_angle_counter = math.pi + compute_starting_angle(
-        starting_species_count,
-        left_angle_counter_step
-    )
-    right_angle_counter_step = 0.1
-    right_angle_counter = compute_starting_angle(
-        len(colors),
-        right_angle_counter_step
-    )
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-
-            renderer.new_node_boundary(species_id, left_angle_counter)
-            left_angle_counter += left_angle_counter_step
-
-        elif species_id in colors:
-
-            renderer.new_node_boundary(species_id, right_angle_counter)
-            right_angle_counter += right_angle_counter_step
-
-        else:
-            renderer.new_node(species_id)
-
-
-
-    for reaction_id in reactions_which_fired:
-        print(reaction_id)
-        reaction = network_loader.index_to_reaction(reaction_id)
-        for i in range(reaction['number_of_reactants']):
-            for j in range(reaction['number_of_products']):
-                reactant_id = reaction['reactants'][i]
-                product_id = reaction['products'][j]
-                dG = reaction['dG']
-                renderer.draw_edge(reactant_id, product_id)
-
-    for species_id in range(network_loader.number_of_species):
-        if network_loader.initial_state_array[species_id] > 0:
-            renderer.draw_node(species_id, radius=0.008)
-        elif species_id in colors:
-            renderer.draw_node_square(species_id, color=colors[species_id], side=0.013)
-        else:
-            renderer.draw_node(species_id)
-
-    renderer.render(path)
-
-
-
-class PathfindingTransfer:
-    def __init__(self,pathfinding,threshold):
-        self.pathfinding = pathfinding
-        self.threshold = threshold
-
-    def __call__(self, species_id):
-        result = set()
-        pathways = self.pathfinding.compute_pathways(species_id)
-        pathways_sorted = sorted(pathways, key=lambda p: pathways[p]['weight'])
-
-        for p in pathways_sorted[0:self.threshold]:
-            result.update(p)
-
-        return result
-
-
-def render_top_highlighted(pathfinding, colors, output_path, purple_id, num_threads=8, threshold=5):
-    renderer = Renderer(colors=[(0.7,0.7,0.7)])
-    reactions_in_top_pathways = set()
-    species_in_top_pathways = set()
-
-    pathfinding_transfer = PathfindingTransfer(pathfinding, threshold)
-
-    with Pool(num_threads) as p:
-        for result in p.map(pathfinding_transfer, list(colors.keys())):
-            reactions_in_top_pathways.update(result)
-
-    purple = colors[purple_id]
-    pathways_lol = pathfinding.compute_pathways(purple_id)
-    highlighted_reactions = sorted(
-        pathways_lol,
-        key=lambda p: pathways_lol[p]['weight'])[2]
-
-    for reaction_id in reactions_in_top_pathways:
-        reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-
-        for i in range(reaction['number_of_reactants']):
-            reactant_id = reaction['reactants'][i]
-            species_in_top_pathways.add(reactant_id)
-
-        for j in range(reaction['number_of_products']):
-            product_id = reaction['products'][j]
-            species_in_top_pathways.add(product_id)
-
-
-    starting_species_count = 0
-    for species_id in range(pathfinding.network_loader.number_of_species):
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-            starting_species_count += 1
-
-
-    left_angle_counter_step = 0.1
-    left_angle_counter = math.pi + compute_starting_angle(
-        starting_species_count,
-        left_angle_counter_step
-    )
-    right_angle_counter_step = 0.1
-    right_angle_counter = compute_starting_angle(
-        len(colors),
-        right_angle_counter_step
-    )
-
-    for species_id in range(pathfinding.network_loader.number_of_species):
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-
-            renderer.new_node_boundary(species_id, left_angle_counter)
-            left_angle_counter += left_angle_counter_step
-
-        elif species_id in colors:
-
-            renderer.new_node_boundary(species_id, right_angle_counter)
-            right_angle_counter += right_angle_counter_step
-
-        else:
-            renderer.new_node(species_id)
-
-
-    for reaction_id in reactions_in_top_pathways:
-        print(reaction_id)
-        reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-        for i in range(reaction['number_of_reactants']):
-            for j in range(reaction['number_of_products']):
-                reactant_id = reaction['reactants'][i]
-                product_id = reaction['products'][j]
-                renderer.draw_edge(reactant_id, product_id)
-
-    for reaction_id in highlighted_reactions:
-        print(reaction_id)
-        reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-        for i in range(reaction['number_of_reactants']):
-            for j in range(reaction['number_of_products']):
-                reactant_id = reaction['reactants'][i]
-                product_id = reaction['products'][j]
-                renderer.draw_edge(reactant_id, product_id,color=purple,width=0.003)
-
-
-
-    for species_id in species_in_top_pathways:
-
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-            renderer.draw_node(species_id, radius=0.008)
-        elif species_id in colors:
-            renderer.draw_node_square(species_id, color=colors[species_id], side=0.013)
-        else:
-            renderer.draw_node(species_id)
-
-    renderer.render(output_path)
-
-
-
-def render_top_pathways(pathfinding, colors, output_path, num_threads=8, threshold=5):
-    renderer = Renderer()
-    reactions_in_top_pathways = set()
-    species_in_top_pathways = set()
-
-    pathfinding_transfer = PathfindingTransfer(pathfinding, threshold)
-
-    with Pool(num_threads) as p:
-        for result in p.map(pathfinding_transfer, list(colors.keys())):
-            reactions_in_top_pathways.update(result)
-
-    for reaction_id in reactions_in_top_pathways:
-        reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-
-        for i in range(reaction['number_of_reactants']):
-            reactant_id = reaction['reactants'][i]
-            species_in_top_pathways.add(reactant_id)
-
-        for j in range(reaction['number_of_products']):
-            product_id = reaction['products'][j]
-            species_in_top_pathways.add(product_id)
-
-
-    starting_species_count = 0
-    for species_id in range(pathfinding.network_loader.number_of_species):
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-            starting_species_count += 1
-
-
-    left_angle_counter_step = 0.1
-    left_angle_counter = math.pi + compute_starting_angle(
-        starting_species_count,
-        left_angle_counter_step
-    )
-    right_angle_counter_step = 0.1
-    right_angle_counter = compute_starting_angle(
-        len(colors),
-        right_angle_counter_step
-    )
-
-    for species_id in range(pathfinding.network_loader.number_of_species):
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-
-            renderer.new_node_boundary(species_id, left_angle_counter)
-            left_angle_counter += left_angle_counter_step
-
-        elif species_id in colors:
-
-            renderer.new_node_boundary(species_id, right_angle_counter)
-            right_angle_counter += right_angle_counter_step
-
-        else:
-            renderer.new_node(species_id)
-
-
-    for reaction_id in reactions_in_top_pathways:
-        print(reaction_id)
-        reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-        for i in range(reaction['number_of_reactants']):
-            for j in range(reaction['number_of_products']):
-                reactant_id = reaction['reactants'][i]
-                product_id = reaction['products'][j]
-                renderer.draw_edge(reactant_id, product_id)
-
-
-    for species_id in species_in_top_pathways:
-
-        if pathfinding.network_loader.initial_state_array[species_id] > 0:
-            renderer.draw_node(species_id, radius=0.008)
-        elif species_id in colors:
-            renderer.draw_node_square(species_id, color=colors[species_id], side=0.013)
-        else:
-            renderer.draw_node(species_id)
-
-    renderer.render(output_path)
-
-
-def redox_report(
-        network_loader,
-        redox_report_path,
-        params
-):
-
-    redox_reactions = network_loader.get_all_redox_reactions()
-
-    for r in redox_reactions:
-        marcus_barrier(r, network_loader.mol_entries, params)
-        r['dG_barrier'] = r['marcus_barrier']
-
-    redox_reactions = sorted(redox_reactions, key=lambda r: r['marcus_barrier'])
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        redox_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("redox report")
-    report_generator.emit_text("marcus barrier appears below the arrow")
-    for reaction in redox_reactions:
-        report_generator.emit_reaction(reaction)
-
-    report_generator.finished()
-
-
-
-def coordination_report(
-        network_loader,
-        coordination_report_path,
-        formula,
-        charge
-):
-
-    for m in network_loader.mol_entries:
-        if m.formula == formula and m.charge == charge:
-            metal_id = m.ind
-
-    coordination_reactions = network_loader.get_all_coordination_reactions(metal_id)
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        coordination_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("coordination report")
-    for reaction in coordination_reactions:
-        report_generator.emit_reaction(reaction)
-
-    report_generator.finished()
-
-
-def decoordination_report(
-        network_loader,
-        decoordination_report_path,
-        formula,
-        charge
-):
-
-    for m in network_loader.mol_entries:
-        if m.formula == formula and m.charge == charge:
-            metal_id = m.ind
-
-    decoordination_reactions = network_loader.get_all_decoordination_reactions(
-        metal_id)
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        decoordination_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("decoordination report")
-    for reaction in decoordination_reactions:
-        report_generator.emit_reaction(reaction)
-
-    report_generator.finished()
-
-
-
-
-
-def export_tally_to_json(network_loader, path):
-    reaction_tally = {}
-    reactions = {}
-    for seed in network_loader.trajectories:
-        for step in network_loader.trajectories[seed]:
-            reaction_id = network_loader.trajectories[seed][step][0]
-
-            db_reaction = network_loader.index_to_reaction(reaction_id)
-            json_reactants = [ network_loader.mol_entries[i].entry_id
-                               for i in db_reaction['reactants'] if i != -1]
-            json_products = [ network_loader.mol_entries[i].entry_id
-                              for i in db_reaction['products'] if i != -1]
-            json_reaction = {
-                'reactants' : json_reactants,
-                'products' : json_products
-            }
-
-            if reaction_id not in reactions:
-                reactions[reaction_id] = json_reaction
-
-
-            if reaction_id in reaction_tally:
-                reaction_tally[reaction_id] += 1
-            else:
-                reaction_tally[reaction_id] = 1
-
-    dumpfn({
-        'pathways' : reaction_tally,
-        'reactions' : reactions}, path)
-
-
-
-
-
-def reaction_tally_report(
-        network_loader,
-        reaction_tally_report_path,
-        cutoff=10):
-
-    reaction_tally = {}
-    species_set = set()
-    for seed in network_loader.trajectories:
-        for step in network_loader.trajectories[seed]:
-            reaction_id = network_loader.trajectories[seed][step][0]
-
-            reaction = network_loader.index_to_reaction(reaction_id)
-
-            if reaction_id in reaction_tally:
-                reaction_tally[reaction_id] += 1
-            else:
-                reaction_tally[reaction_id] = 1
-
-            for i in range(reaction['number_of_reactants']):
-                reactant_id = reaction['reactants'][i]
-                species_set.add(reactant_id)
-
-            for j in range(reaction['number_of_products']):
-                product_id = reaction['products'][j]
-                species_set.add(product_id)
-
-
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        reaction_tally_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("reaction tally report")
-    report_generator.emit_text(
-        "total number of reactions: " +
-        str(network_loader.number_of_reactions))
-    report_generator.emit_text(
-        "number of reactions which fired: " +
-        str(len(reaction_tally.keys())))
-    report_generator.emit_text(
-        "total number of species: " +
-        str(network_loader.number_of_species))
-    report_generator.emit_text(
-        "number of species observed: " +
-        str(len(species_set)))
-
-
-    for (reaction_index, number) in sorted(
-            reaction_tally.items(), key=lambda pair: -pair[1]):
-        if number > cutoff:
-            report_generator.emit_text(str(number) + " occourances of:")
-            report_generator.emit_reaction(
-                network_loader.index_to_reaction(reaction_index),
-                label=str(reaction_index)
-            )
-            report_generator.emit_newline()
-
-    report_generator.finished()
-
-def species_report(network_loader, species_report_path):
-    """
-    print all species
-    """
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        species_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("species report")
-    for i in range(network_loader.number_of_species):
-        mol = network_loader.mol_entries[i]
-        report_generator.emit_text(str(mol.entry_id))
-        report_generator.emit_text(
-            "formula: " + mol.formula)
-
-        report_generator.emit_molecule(i)
-        report_generator.emit_newline()
-
-    report_generator.finished()
-
-def reaction_report(network_loader, reaction_report_path):
-    """
-    print all reactions. Warning: don't do this for a big network
-    """
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        reaction_report_path,
-        rebuild_mol_pictures=False)
-
-    report_generator.emit_text("reaction report")
-    for i in range(network_loader.number_of_reactions):
-        reaction = network_loader.index_to_reaction(i)
-        report_generator.emit_reaction(reaction)
-        report_generator.emit_newline()
-
-    report_generator.finished()
-
-
-
-
-
-class Pathfinding:
-    """
-    Given a chemical system, we are interested in exploring the reaction
-    pathways which produce species of interest. Reaction networks are a
-    useful tool for approaching the problem because they can be simulated
-    efficiently, even when the network has hundreds of millions of
-    reactions. Unfortunately, since we collapse all spacial aspects of the
-    system, identical molecules become indistinguishable (in reality,
-    identical molecules can be distinguished by their locations in
-    space). This creates the following problem: Suppose we are interested
-    in the production of species G from A and have the following
-    simulation trajectory:
-
-      A -> Z + F
-      F -> X
-      A -> B + F
-      A -> C + H
-      A -> D + H
-      C -> E
-      D -> E
-      E + F -> G
-
-    It is impossible to decide between the two pathways
-
-      A -> B + F    A -> C + H        C -> E        E + F -> G
-      A -> B + F    A -> D + H        D -> E        E + F -> G
-
-    If our model had a spacial aspect, we would be able to trace the
-    specific E used back to either a C or a D. Fundamentally, this
-    ambiguity is caused by sequence
-
-      A -> C + H        C -> E         E -> D        D + H -> A
-
-    which is called a deficiency loop. To avoid this problem, we extract a
-    pathway which produces the target in the following way: Take the first
-    reaction x which produces the target molecule. Then recursively, take
-    the first reactions which produced the reactants of x. If a reactant
-    is a starting molecule, then stop. Applying the procedure to the above
-    sequence gives the pathway
-
-    A -> C + H        C -> E         A -> Z + F        E + F -> G
-
-    Intuitively, this procedure is producing pathways which don't take
-    into account competition, since there is no guarantee that the first
-    molecule which is produced isn't immediately consumed by some
-    competing reaction. Since the focus of this software is monte carlo
-    simulation with thermodynamic rates, this isn't a problem as all
-    downhill reactions have the maximum possible rate constant, so
-    reactions sans competition are also likely to occour.
-    """
-
-    def __init__(
-            self,
-            network_loader
-    ):
-
-        self.network_loader = network_loader
-        self.pathways = {}
-
-
-    def compute_pathway(
-            self,
-            species_id,
-            trajectory):
-
-        pathway = []
-        target_produced = False
-
-        for step in trajectory:
-            reaction_id = trajectory[step][0]
-            reaction = self.network_loader.index_to_reaction(reaction_id)
-            if species_id in reaction['products']:
-                target_produced = True
-                break
-
-        if target_produced:
-            pathway.append(reaction_id)
-
-
-            prefixes = []
-            for i in range(reaction['number_of_reactants']):
-                reactant_id = reaction['reactants'][i]
-                if self.network_loader.initial_state_dict[reactant_id] == 0:
-                    prefix = self.compute_pathway(reactant_id, trajectory)
-                    prefixes.append(prefix)
-
-                    # early return if one of the final reaction in one
-                    # of the prefixes gives both of the reactants
-                    prefix_final_reaction = self.network_loader.index_to_reaction(
-                        prefix[-1])
-
-                    if (sorted(reaction['reactants']) ==
-                        sorted(prefix_final_reaction['products'])):
-                        return prefix + pathway
-
-            for prefix in prefixes:
-                pathway = prefix + pathway
-
-
-        return pathway
-
-
-
-    def compute_pathways(self, species_id):
-
-        if species_id not in self.pathways:
-            reaction_pathway_list = []
-            for seed in self.network_loader.trajectories:
-                pathway = self.compute_pathway(
-                    species_id,
-                    self.network_loader.trajectories[seed]
-                )
-
-                if len(pathway) > 0:
-                    reaction_pathway_list.append(pathway)
-
-            self.pathways[species_id] = self.collect_duplicate_pathways(
-                reaction_pathway_list)
-
-        return self.pathways[species_id]
-
-
-    def collect_duplicate_pathways(
-        self, pathways
-    ):
-        pathway_dict = {}
-        for pathway in pathways:
-            key = frozenset(pathway)
-            if key in pathway_dict:
-                pathway_dict[key]["frequency"] += 1
-            else:
-                path_weight = self.compute_path_weight(pathway)
-                pathway_dict[key] = {
-                    "pathway": pathway,
-                    "frequency": 1,
-                    "weight": path_weight,
-                }
-
-        return pathway_dict
-
-
-    def compute_path_weight(self, pathway):
-        weight = 0.0
-        for reaction_index in pathway:
-            reaction = self.network_loader.index_to_reaction(reaction_index)
-            weight += default_cost(reaction["dG"])
-        return weight
-
-
-
-def export_pathways_to_json(pathfinding, species_id, path):
-    pathways = pathfinding.compute_pathways(species_id)
-    reactions = {}
-    for pathway in pathways:
-        for reaction_id in pathway:
-            db_reaction = pathfinding.network_loader.index_to_reaction(reaction_id)
-            json_reactants = [ pathfinding.network_loader.mol_entries[i].entry_id
-                               for i in db_reaction['reactants'] if i != -1]
-            json_products = [ pathfinding.network_loader.mol_entries[i].entry_id
-                               for i in db_reaction['products'] if i != -1]
-            json_reaction = {
-                'reactants' : json_reactants,
-                'products' : json_products
-                }
-            reactions[reaction_id] = json_reaction
-
-    dumpfn({
-        'pathways' : list(pathways.values()),
-        'reactions' : reactions}, path)
-
-
-
-def generate_pathway_report(
-        pathfinding,
-        species_id,
-        report_file_path,
-        number_of_pathways=100,
-        sort_by_frequency=False
-):
-
-    report_generator = ReportGenerator(
-        pathfinding.network_loader.mol_entries,
-        report_file_path,
-        rebuild_mol_pictures=False)
-
-    pathways = pathfinding.compute_pathways(species_id)
-
-
-    report_generator.emit_text("pathway report for")
-    report_generator.emit_molecule(species_id)
-
-    if sort_by_frequency:
-        report_generator.emit_text(
-            "top " +
-            str(number_of_pathways) +
-            " pathways sorted by frequency")
-
-    else:
-        report_generator.emit_text(
-            "top " +
-            str(number_of_pathways) +
-            " pathways sorted by cost")
-
-    report_generator.emit_newline()
-    report_generator.emit_initial_state(pathfinding.network_loader.initial_state_dict)
-    report_generator.emit_newpage()
-
-    if sort_by_frequency:
-        def sort_function(item):
-            return -item[1]["frequency"]
-
-    else:
-        def sort_function(item):
-            return item[1]["weight"]
-
-
-    count = 1
-    for _, unique_pathway in sorted(pathways.items(), key=sort_function):
-
-        frequency = unique_pathway["frequency"]
-        weight = unique_pathway["weight"]
-
-        report_generator.emit_text("pathway " + str(count))
-        report_generator.emit_text("path weight: " + str(weight))
-        report_generator.emit_text(str(frequency) + " occurrences:")
-
-        for reaction_index in unique_pathway["pathway"]:
-            reaction = pathfinding.network_loader.index_to_reaction(reaction_index)
-            report_generator.emit_reaction(reaction, label=str(reaction_index))
-
-        report_generator.emit_newpage()
-        count += 1
-        if count > number_of_pathways:
-            break
-
-    report_generator.finished()
-
-
-class SimulationReplayer:
-    """
-    class for rerunning through all the simulations. This is
-    relatively fast since we don't need to actually choose which
-    reactions fire / update reaction propensities.
-    """
-
-    def __init__(self, network_loader):
-        self.network_loader = network_loader
-
-
-        self.compute_expected_final_state()
-        self.compute_production_consumption_info()
-        self.compute_sink_data()
-
-
-    def compute_expected_final_state(self):
-        self.expected_final_state = np.zeros(
-            self.network_loader.number_of_species,
-            dtype=int
-        )
-
-        for seed in self.network_loader.trajectories:
-            state = np.copy(self.network_loader.initial_state_array)
-            for step in self.network_loader.trajectories[seed]:
-                reaction_index = self.network_loader.trajectories[seed][step][0]
-                time = self.network_loader.trajectories[seed][step][1]
-                reaction = self.network_loader.index_to_reaction(reaction_index)
-
-                for i in range(reaction['number_of_reactants']):
-                    reactant_index = reaction['reactants'][i]
-                    state[reactant_index] -= 1
-
-                for j in range(reaction['number_of_products']):
-                    product_index = reaction['products'][j]
-                    state[product_index] += 1
-
-            self.expected_final_state += state
-
-        self.expected_final_state = (
-            self.expected_final_state / len(self.network_loader.trajectories))
-
-    def compute_production_consumption_info(self):
-        self.consuming_reactions = {}
-        self.producing_reactions = {}
-
-        for i in range(self.network_loader.number_of_species):
-            self.consuming_reactions[i] = {}
-            self.producing_reactions[i] = {}
-
-        for seed in self.network_loader.trajectories:
-            for step in self.network_loader.trajectories[seed]:
-                reaction_index = self.network_loader.trajectories[seed][step][0]
-                time = self.network_loader.trajectories[seed][step][1]
-                reaction = self.network_loader.index_to_reaction(reaction_index)
-
-                for i in range(reaction['number_of_reactants']):
-                    reactant_index = reaction['reactants'][i]
-                    if reaction_index not in self.consuming_reactions[reactant_index]:
-                        self.consuming_reactions[reactant_index][reaction_index] = 1
-                    else:
-                        self.consuming_reactions[reactant_index][reaction_index] += 1
-
-
-                for j in range(reaction['number_of_products']):
-                    product_index = reaction['products'][j]
-                    if reaction_index not in self.producing_reactions[product_index]:
-                        self.producing_reactions[product_index][reaction_index] = 1
-                    else:
-                        self.producing_reactions[product_index][reaction_index] += 1
-
-    def compute_state_time_series(self, seed):
-        state_dimension_size = len(self.network_loader.initial_state_array)
-        step_dimension_size = len(self.network_loader.trajectories[seed])
-        time_series = np.zeros(
-            (step_dimension_size, state_dimension_size),
-            dtype=int)
-
-        state = np.copy(self.network_loader.initial_state_array)
-        for step in self.network_loader.trajectories[seed]:
-            reaction_index = self.network_loader.trajectories[seed][step][0]
-            time = self.network_loader.trajectories[seed][step][1]
-            reaction = self.network_loader.index_to_reaction(reaction_index)
-
-            for i in range(reaction['number_of_reactants']):
-                reactant_index = reaction['reactants'][i]
-                state[reactant_index] -= 1
-
-            for j in range(reaction['number_of_products']):
-                product_index = reaction['products'][j]
-                state[product_index] += 1
-
-            time_series[step] = state
-
-        return time_series
-
-
-    def time_series_graph(
-            self,
-            seeds,
-            species_of_interest,
-            path,
-            colors = list(mcolors.TABLEAU_COLORS.values()),
-            styles = ['solid', 'dotted', 'dashed', 'dashdot'],
-            internal_index_labels=True
-    ):
-
-
-        max_trajectory_length = 0
-        for seed in seeds:
-            max_trajectory_length = max(
-                max_trajectory_length,
-                len(self.network_loader.trajectories[seed]))
-
-        total_time_series = np.zeros(
-            (max_trajectory_length, self.network_loader.number_of_species),
-            dtype=int
-            )
-
-        for seed in seeds:
-            total_time_series += pad_time_series(
-                self.compute_state_time_series(seed),
-                max_trajectory_length)
-
-        total_time_series = total_time_series / len(seeds)
-
-        background_species = set()
-        for index in range(self.network_loader.number_of_species):
-            for step in range(max_trajectory_length):
-                if (total_time_series[step,index] > 0.1 and
-                    index not in species_of_interest):
-                    background_species.add(index)
-
-
-        line_dict = {}
-        i = 0
-        for species_index in species_of_interest:
-            r = i % len(colors)
-            q = i // len(colors)
-            line_dict[species_index] = (colors[r], styles[q])
-            i += 1
-
-
-        fig, (ax0, ax1, ax2) = plt.subplots(
-            3, 1,
-            figsize=(5,10),
-            gridspec_kw={'height_ratios':[2,2,1]})
-
-        y_max = 0
-        for step in range(total_time_series.shape[0]):
-            for species_index in species_of_interest:
-                y_max = max(y_max, total_time_series[step,species_index])
-
-        ax0.set_xlim([0,total_time_series.shape[0]])
-        ax0.set_ylim([0,y_max+1])
-
-        ax1.set_xlim([0,total_time_series.shape[0]])
-        ax1.set_ylim([0,(y_max+1)/10])
-
-
-        ticks = np.arange(0, total_time_series.shape[0])
-        for i, species_index in enumerate(background_species):
-            ax0.plot(ticks,
-                     total_time_series[:, species_index],
-                     color=mcolors.hsv_to_rgb((0,0,0.9))
-                     )
-
-            ax1.plot(ticks,
-                     total_time_series[:, species_index],
-                     color=mcolors.hsv_to_rgb((0,0,0.9))
-                     )
-
-
-
-        for species_index in species_of_interest:
-
-
-            ax0.plot(ticks,
-                     total_time_series[:, species_index],
-                     color=line_dict[species_index][0],
-                     linestyle=line_dict[species_index][1]
-                     )
-
-            ax1.plot(ticks,
-                     total_time_series[:, species_index],
-                     color=line_dict[species_index][0],
-                     linestyle=line_dict[species_index][1]
-                     )
-
-
-
-        # creating a legend
-        ax2.yaxis.set_visible(False)
-        ax2.xaxis.set_visible(False)
-        ax2.set_axis_off()
-        ax2.set_xlim([0,1])
-        ax2.set_ylim([0,1])
-
-
-        i = 0
-        for species_index in species_of_interest:
-            if internal_index_labels:
-                label = str(species_index)
-            else:
-                label = self.network_loader.mol_entries[species_index].entry_id
-
-            r = i % 10
-            q = i // 10
-            pos_x = [q * 0.3 + 0, q * 0.3 + 0.05]
-            pos_y = [r * 0.1 + 0.05, r * 0.1 + 0.05]
-            text_pos_x = q * 0.3 + 0.06
-            text_pos_y = r * 0.1 + 0.05
-
-            ax2.plot(
-                pos_x, pos_y,
-                color=line_dict[species_index][0],
-                linestyle=line_dict[species_index][1])
-
-            ax2.text(text_pos_x, text_pos_y,
-                     label,
-                     fontsize=8,
-                     horizontalalignment='left',
-                     verticalalignment='center')
-            i += 1
-
-        fig.savefig(path, transparent=True)
-
-
-    def compute_sink_data(self):
-        max_ratio = 1e10
-        self.sink_data = {}
-
-        for i in range(self.network_loader.number_of_species):
-            number_of_consuming_reactions = sum(
-                self.consuming_reactions[i].values())
-            number_of_distinct_consuming_reactions = len(
-                self.consuming_reactions[i].keys())
-            number_of_producing_reactions = sum(
-                self.producing_reactions[i].values())
-            number_of_distinct_producing_reactions = len(
-                self.producing_reactions[i].keys())
-
-
-            if number_of_consuming_reactions != 0:
-                ratio = number_of_producing_reactions / number_of_consuming_reactions
-            else:
-                ratio = max_ratio
-
-            expected_value = self.expected_final_state[i]
-
-            self.sink_data[i] = {
-                "species_index" : i,
-                "number_of_consuming_reactions" : number_of_consuming_reactions,
-                "number_of_distinct_consuming_reactions"
-                : number_of_distinct_consuming_reactions,
-                "number_of_producing_reactions" : number_of_producing_reactions,
-                "number_of_distinct_producing_reactions"
-                : number_of_distinct_producing_reactions,
-                "ratio" : ratio,
-                "expected_value" : expected_value
-            }
-
-        self.sinks = [
-            i for i in range(self.network_loader.number_of_species)
-            if self.sink_filter(i)]
-
-    def sink_filter(self, species_index):
-        sink_data = self.sink_data[species_index]
-
-        number_of_consuming_reactions = sink_data["number_of_consuming_reactions"]
-        number_of_distinct_consuming_reactions = sink_data[
-            "number_of_distinct_consuming_reactions"]
-        number_of_producing_reactions = sink_data["number_of_producing_reactions"]
-        number_of_distinct_producing_reactions = sink_data[
-            "number_of_distinct_producing_reactions"]
-        ratio = sink_data["ratio"]
-        expected_value = sink_data["expected_value"]
-
-        mol = self.network_loader.mol_entries[species_index]
-        if (number_of_consuming_reactions + number_of_producing_reactions > 0  and
-            ratio > 1.5 and
-            expected_value > 0.1 and
-            mol.spin_multiplicity == 1):
-            return True
-        else:
-            return False
-
-
-def export_consumption_to_json(simulation_replayer, species_index, path):
-    network_loader = simulation_replayer.network_loader
-    producing_reactions = simulation_replayer.producing_reactions[species_index]
-    consuming_reactions = simulation_replayer.consuming_reactions[species_index]
-    reactions = {}
-    for reaction_id in chain(producing_reactions, consuming_reactions) :
-        db_reaction = network_loader.index_to_reaction(reaction_id)
-        json_reactants = [ network_loader.mol_entries[i].entry_id
-                           for i in db_reaction['reactants'] if i != -1]
-        json_products = [ network_loader.mol_entries[i].entry_id
-                           for i in db_reaction['products'] if i != -1]
-        json_reaction = {
-            'reactants' : json_reactants,
-            'products' : json_products
-            }
-        reactions[reaction_id] = json_reaction
-
-        dumpfn({
-            'reactions' : reactions,
-            'producing_reactions' : producing_reactions,
-            'consuming_reactions' : consuming_reactions},
-               path)
-
-def pad_time_series(time_series, max_number_of_steps):
-    num_steps = time_series.shape[0]
-    state_size = time_series.shape[1]
-    padded_time_series = np.zeros(
-        (max_number_of_steps, state_size),
-        dtype=int)
-
-    for step in range(max_number_of_steps):
-        if step < num_steps:
-            padded_time_series[step] = time_series[step]
-        else:
-            padded_time_series[step] = time_series[num_steps-1]
-
-    return padded_time_series
-
-
-
-def export_sinks_to_json(simulation_replayer, path):
-    sink_data = simulation_replayer.sink_data
-    sink_data_json = {}
-    for i in sink_data:
-        mol = simulation_replayer.network_loader.mol_entries[i]
-        sink_data_json[mol.entry_id] = sink_data[i]
-
-    dumpfn(sink_data_json, path)
-
-def export_species_report_to_json(network_loader, path):
-    data = {}
-    for i in range(network_loader.number_of_species):
-        data[i] = network_loader.mol_entries[i].entry_id
-
-    dumpfn(data, path)
-
-
-def consumption_report(
-        simulation_replayer,
-        species_index,
-        consumption_report_path
-):
-
-    sink_data = simulation_replayer.sink_data[species_index]
-
-    producing_reactions = simulation_replayer.producing_reactions[species_index]
-    consuming_reactions = simulation_replayer.consuming_reactions[species_index]
-
-    report_generator = ReportGenerator(
-        simulation_replayer.network_loader.mol_entries,
-        consumption_report_path,
-        rebuild_mol_pictures=False)
-
-
-    report_generator.emit_text("P/C ratio: " +
-                               str(sink_data["ratio"]))
-    report_generator.emit_text("expected val: " +
-                               str(sink_data["expected_value"]))
-    report_generator.emit_newline()
-    report_generator.emit_text("consuming reactions:")
-    for (reaction_index, number) in sorted(
-            consuming_reactions.items(),
-            key=lambda item: -item[1]):
-
-        reaction = simulation_replayer.network_loader.index_to_reaction(
-            reaction_index)
-        report_generator.emit_text(str(number) + " occourances:")
-        report_generator.emit_reaction(reaction)
-
-
-    report_generator.emit_text("producing reactions:")
-    for (reaction_index, number) in sorted(
-            producing_reactions.items(),
-            key=lambda item: -item[1]):
-
-        reaction = simulation_replayer.network_loader.index_to_reaction(
-            reaction_index)
-
-        report_generator.emit_text(str(number) + " occourances:")
-        report_generator.emit_reaction(reaction)
-
-
-    report_generator.finished()
-
-
-def sink_report(
-        simulation_replayer,
-        sink_report_path
-):
-
-
-    report_generator = ReportGenerator(
-        simulation_replayer.network_loader.mol_entries,
-        sink_report_path,
-        rebuild_mol_pictures=False)
-
-    sinks_sorted = sorted(
-        simulation_replayer.sinks,
-        key = lambda i: -simulation_replayer.sink_data[i]["ratio"])
-
-    for species_index in sinks_sorted:
-
-        sink_data = simulation_replayer.sink_data[species_index]
-
-        number_of_consuming_reactions = sink_data[
-            "number_of_consuming_reactions"]
-        number_of_distinct_consuming_reactions = sink_data[
-            "number_of_distinct_consuming_reactions"]
-        number_of_producing_reactions = sink_data[
-            "number_of_producing_reactions"]
-        number_of_distinct_producing_reactions = sink_data[
-            "number_of_distinct_producing_reactions"]
-        ratio = sink_data["ratio"]
-        expected_value = sink_data["expected_value"]
-
-
-        report_generator.emit_text("P/C ratio: " + str(ratio))
-        report_generator.emit_text("expected val: " + str(expected_value))
-
-        report_generator.emit_text(
-            "produced: " + str(number_of_producing_reactions))
-
-        report_generator.emit_text(
-            str(number_of_distinct_producing_reactions) +
-            " distinct producing reactions")
-
-        report_generator.emit_text(
-            "consumed: " + str(number_of_consuming_reactions))
-
-        report_generator.emit_text(
-            str(number_of_distinct_consuming_reactions) +
-            " distinct consuming reactions")
-        report_generator.emit_molecule(species_index)
-        report_generator.emit_newline()
-
-    report_generator.finished()
diff --git a/test.py b/test.py
index 8f7965d..bf3b644 100644
--- a/test.py
+++ b/test.py
@@ -1,17 +1,11 @@
 import os
 import sys
 import subprocess
-import sqlite3
-import pickle
-
 
 from HiPRGen.network_loader import NetworkLoader
-from HiPRGen.initial_state import find_mol_entry_from_xyz_and_charge
 from monty.serialization import loadfn, dumpfn
 from HiPRGen.species_filter import species_filter
 from HiPRGen.bucketing import bucket
-from HiPRGen.report_generator import ReportGenerator
-from HiPRGen.initial_state import insert_initial_state
 from HiPRGen.constants import ROOM_TEMP, Terminal
 from HiPRGen.reaction_filter_payloads import (
     DispatcherPayload,
@@ -21,8 +15,6 @@
 from HiPRGen.species_questions import (
     mg_species_decision_tree,
     li_species_decision_tree,
-    positive_penalty,
-    species_default_true
 )
 
 from HiPRGen.reaction_questions import (
@@ -30,19 +22,6 @@
 
 )
 
-from HiPRGen.mc_analysis import (
-    reaction_tally_report,
-    species_report,
-    Pathfinding,
-    SimulationReplayer,
-    generate_pathway_report,
-    sink_report,
-    consumption_report,
-    redox_report,
-    coordination_report,
-    decoordination_report
-)
-
 # Since HiPRGen uses an end-to-end testing approach rather than testing
 # each individual function, we have decided to use the tests as
 # documentation, by explaining every single line through the first test.
@@ -62,15 +41,14 @@
     print("usage: python test.py number_of_threads")
     quit()
 
-
 number_of_threads = sys.argv[1]
 
+
 class bcolors:
     PASS = '\u001b[32;1m'
     FAIL = '\u001b[31;1m'
     ENDC = '\u001b[0m'
 
-
 # HiPRGen is organized as a pipeline, where all the relevent data is
 # stored in a sqlite database between phases. For this reason, during
 # a single run of the full pipeline, it makes sense to store all the
@@ -84,13 +62,11 @@ class bcolors:
 subprocess.run(['mkdir', './scratch'])
 
 
-
 def li_test():
 
-
     # folder is the where we store all our intermediate databases
     folder = './scratch/li_test'
-    subprocess.run(['mkdir', folder ])
+    subprocess.run(['mkdir', folder])
 
     # The initial input to the pipeline is a list of LIBE or MADEIRA
     # dataset entries. We provide two examples in the data foloder.
@@ -178,148 +154,13 @@ def li_test():
         ]
     )
 
-    # After we have generated the mol_entries, we refer to molecules by
-    # their index. The function find_mol_entry_from_xyz_and_charge can
-    # help find the indices of specific species to be used in the initial
-    # condition for propagating trajectories and/or trajectory analysis.
-    Li_plus_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/Li.xyz',
-        1)
-
-    EC_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/EC.xyz',
-        0)
-
-    LEDC_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/LEDC.xyz',
-        0)
-
-
-    # After generating a reaction network, it is stored in rn.sqlite. We
-    # use Monte Carlo simulation to interrogate the network, and for that
-    # we need to define an initial condition.
-    initial_state = {
-        Li_plus_id : 30,
-        EC_id : 30
-    }
-
-    # The initial state and the trajectories (after simulation) are stored in
-    # a seperate database from the network, here called initial_state.sqlite.
-    # This facilitates running multiple independent simulations of the same
-    # network with different initial conditions at the same time, if desired.
-    insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
-
-
-    # GMC is a high performance reaction network Monte Carlo simulator using the
-    # Gillespie algorithm: https://github.com/BlauGroup/RNMC. Here we run 1000
-    # trajectories each of 200 steps.
-    subprocess.run([
-        'GMC',
-        '--reaction_database=' + folder + '/rn.sqlite',
-        '--initial_state_database=' + folder + '/initial_state.sqlite',
-        '--number_of_simulations=1000',
-        '--base_seed=1000',
-        '--thread_count=' + number_of_threads,
-        '--step_cutoff=200'
-    ])
-
     # The network loader builds a python object around a reaction network
     # and the molecules to make it easier to use them.
     network_loader = NetworkLoader(
         folder + '/rn.sqlite',
-        folder + '/mol_entries.pickle',
-        folder + '/initial_state.sqlite'
+        folder + '/mol_entries.pickle'
         )
 
-    network_loader.load_trajectories()
-    network_loader.load_initial_state()
-
-
-
-    # HiPRGen has analysis tools to understand what happened in our simulation.
-    # The output files are written into the same folder in which the reaction
-    # network is stored.
-
-    # This report is empty, but we use it to generate the molecule pictures.
-    # This is an expensive operation, so we only want do do it once.
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        folder + '/dummy.tex',
-        rebuild_mol_pictures=True)
-
-
-    # The tally report shows reactions sorted by the number of times fired.
-    reaction_tally_report(
-        network_loader,
-        folder + '/reaction_tally.tex'
-    )
-    # Run `pdflatex reaction_tally.tex` in `scratch/li_test` to generate
-    # the tally report PDF.
-
-
-    # The species report shows every specie in the network and their IDs.
-    species_report(network_loader, folder + '/species_report.tex')
-    # Run `pdflatex species_report.tex` in `scratch/li_test` to generate
-    # the species report PDF.
-
-
-    # Pathfinding is a central goal of HiPRGen / GMC. See mc_analysis.py for
-    # further documentation of the Pathfinding class.
-    pathfinding = Pathfinding(network_loader)
-
-
-    # The pathway report shows all the ways that a target species was
-    # produced in the simulation trajectories, where each simulation only
-    # contributes the shortest path responsible for the first formation
-    # of the target species to the report. The report can be sorted by
-    # pathway frequency, but instead here we sort by pathway cost. Note
-    # that the test network has ~5000 reactions while production networks
-    # have between 50-100 million reactions.
-    generate_pathway_report(
-        pathfinding,
-        LEDC_id,
-        folder + '/LEDC_pathways.tex',
-        sort_by_frequency=False
-    )
-    # Run `pdflatex LEDC_pathways.tex` in `scratch/li_test` to generate
-    # the LEDC pathway report PDF.
-
-
-    # The simulation replayer sweeps through all trajectories in order
-    # to extract additional information that is used for consumption
-    # reports and sink reports.
-    simulation_replayer = SimulationReplayer(network_loader)
-
-
-    # The consumption report shows reactions which consumed a target
-    # species, sorted by the number of times the reaction fired.
-    consumption_report(simulation_replayer,
-                       LEDC_id,
-                       folder + '/LEDC_consumption_report.tex')
-    # Run `pdflatex LEDC_consumption_report.tex` in `scratch/li_test`
-    # to generate the LEDC consumption report PDF.
-
-
-    # The sink report shows species which have a production to
-    # consumption ratio of greater than 3/2 and which have an expected
-    # value above 0.1. These are two of the three heuristic criteria
-    # that we use to identify network products. The third criteria is
-    # that each network product must have a shortest path with cost
-    # less than 10. This can be checked by generating pathway reports
-    # to each species shown in the sink report. For the curious reader,
-    # we note that generating pathway reports to the six species in the
-    # sink report will show that only Li2CO3, C2H4, LiEDC-, and DLEMC
-    # have sufficiently low-cost paths to pass the third criteria and
-    # thus to be considered products of the test network used here.
-    sink_report(simulation_replayer, folder + '/sink_report.tex')
-    # Run `pdflatex sink_report.tex` in `scratch/li_test` to generate
-    # the sink report PDF.
-
-
-
     tests_passed = True
     if network_loader.number_of_species == 190:
         print(bcolors.PASS +
@@ -331,8 +172,6 @@ def li_test():
               bcolors.ENDC)
         tests_passed = False
 
-
-
     if network_loader.number_of_reactions == 4921:
         print(bcolors.PASS +
               "li_test: correct number of reactions" +
@@ -348,17 +187,14 @@ def li_test():
 
 def mg_test():
 
-
     folder = './scratch/mg_test'
-    subprocess.run(['mkdir', folder ])
+    subprocess.run(['mkdir', folder])
 
     mol_json = './data/sam_G2.json'
     species_decision_tree = mg_species_decision_tree
 
     database_entries = loadfn(mol_json)
 
-
-
     mol_entries = species_filter(
         database_entries,
         folder + '/mol_entries.pickle',
@@ -367,11 +203,8 @@ def mg_test():
         coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy)
     )
 
-
-
     bucket(mol_entries, folder + '/buckets.sqlite')
 
-
     dispatcher_payload = DispatcherPayload(
         folder + '/buckets.sqlite',
         folder + '/rn.sqlite',
@@ -388,7 +221,6 @@ def mg_test():
         Terminal.DISCARD
     )
 
-
     dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
     dumpfn(worker_payload, folder + '/worker_payload.json')
 
@@ -406,84 +238,11 @@ def mg_test():
         ]
     )
 
-
-    mg_g2_plus_plus_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/mgg2.xyz',
-        2)
-
-    c2h4_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/c2h4.xyz',
-        0)
-
-    c2h6_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/c2h6.xyz',
-        0)
-
-    initial_state = {
-        33 : 30,
-        81 : 30
-    }
-
-
-    insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
-
-
-    subprocess.run([
-        'GMC',
-        '--reaction_database=' + folder + '/rn.sqlite',
-        '--initial_state_database=' + folder + '/initial_state.sqlite',
-        '--number_of_simulations=1000',
-        '--base_seed=1000',
-        '--thread_count=' + number_of_threads,
-        '--step_cutoff=200'
-    ])
-
-
-
     network_loader = NetworkLoader(
         folder + '/rn.sqlite',
         folder + '/mol_entries.pickle',
-        folder + '/initial_state.sqlite'
         )
 
-    network_loader.load_trajectories()
-    network_loader.load_initial_state()
-
-
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        folder + '/dummy.tex',
-        rebuild_mol_pictures=True)
-
-    reaction_tally_report(
-        network_loader,
-        folder + '/reaction_tally.tex'
-    )
-
-    pathfinding = Pathfinding(network_loader)
-
-    generate_pathway_report(
-        pathfinding,
-        c2h6_id,
-        folder + '/C2H6_pathways.tex',
-        sort_by_frequency=False
-    )
-
-    generate_pathway_report(
-        pathfinding,
-        c2h4_id,
-        folder + '/C2H4_pathways.tex',
-        sort_by_frequency=False
-    )
-
-
-
-    species_report(network_loader, folder + '/species_report.tex')
-
     tests_passed = True
     if network_loader.number_of_species == 83:
         print(bcolors.PASS +
@@ -495,8 +254,6 @@ def mg_test():
               bcolors.ENDC)
         tests_passed = False
 
-
-
     if network_loader.number_of_reactions == 788:
         print(bcolors.PASS +
               "mg_test: correct number of reactions" +
@@ -510,119 +267,6 @@ def mg_test():
     return tests_passed
 
 
-def flicho_test():
-
-
-    folder = './scratch/flicho_test'
-    subprocess.run(['mkdir', folder ])
-
-    mol_json = './data/flicho_test.json'
-    database_entries = loadfn(mol_json)
-    species_decision_tree = li_species_decision_tree
-
-
-    mol_entries = species_filter(
-        database_entries,
-        mol_entries_pickle_location=folder + '/mol_entries.pickle',
-        species_report=folder + '/unfiltered_species_report.tex',
-        species_decision_tree=species_decision_tree,
-        coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy),
-    )
-
-
-    bucket(mol_entries, folder + '/buckets.sqlite')
-
-    params = {
-        'temperature' : ROOM_TEMP,
-        'electron_free_energy' : -1.4
-    }
-
-    dispatcher_payload = DispatcherPayload(
-        folder + '/buckets.sqlite',
-        folder + '/rn.sqlite',
-        folder + '/reaction_report.tex'
-    )
-
-    worker_payload = WorkerPayload(
-        folder + '/buckets.sqlite',
-        default_reaction_decision_tree,
-        params,
-        Terminal.DISCARD
-    )
-
-
-    dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
-    dumpfn(worker_payload, folder + '/worker_payload.json')
-
-    subprocess.run(
-        [
-            'mpirun',
-            '--use-hwthread-cpus',
-            '-n',
-            number_of_threads,
-            'python',
-            'run_network_generation.py',
-            folder + '/mol_entries.pickle',
-            folder + '/dispatcher_payload.json',
-            folder + '/worker_payload.json'
-        ]
-    )
-
-    Li_plus_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/Li.xyz',
-        1)
-
-    EC_id = find_mol_entry_from_xyz_and_charge(
-        mol_entries,
-        './xyz_files/EC.xyz',
-        0)
-
-    initial_state = {
-        Li_plus_id : 30,
-        EC_id : 30
-    }
-
-    insert_initial_state(initial_state, mol_entries, folder + '/initial_state.sqlite')
-
-
-    subprocess.run([
-        'GMC',
-        '--reaction_database=' + folder + '/rn.sqlite',
-        '--initial_state_database=' + folder + '/initial_state.sqlite',
-        '--number_of_simulations=1000',
-        '--base_seed=1000',
-        '--thread_count=' + number_of_threads,
-        '--step_cutoff=200'
-    ])
-
-    network_loader = NetworkLoader(
-        folder + '/rn.sqlite',
-        folder + '/mol_entries.pickle',
-        folder + '/initial_state.sqlite'
-        )
-
-    network_loader.load_trajectories()
-    network_loader.load_initial_state()
-
-    report_generator = ReportGenerator(
-        network_loader.mol_entries,
-        folder + '/dummy.tex',
-        rebuild_mol_pictures=True)
-
-    coordination_report(
-        network_loader,
-        folder + '/coodination_report.tex',
-        'Li1',
-        1)
-
-    decoordination_report(
-        network_loader,
-        folder + '/decoodination_report.tex',
-        'Li1',
-        1)
-
-
 tests = [
     mg_test,
     li_test,

From 6ce072f8e038c8a2ca4bba642ef8452fdfa1404c Mon Sep 17 00:00:00 2001
From: Evan Walter Clark Spotte-Smith <espottesmith@gmail.com>
Date: Wed, 12 Apr 2023 09:59:54 -0700
Subject: [PATCH 2/2] Small tweaks to test

---
 test.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/test.py b/test.py
index bf3b644..af45bf4 100644
--- a/test.py
+++ b/test.py
@@ -56,6 +56,7 @@ class bcolors:
 # set and a magnesium set. Since the lithium test set is older, we shall
 # document that instead of the mg test set.
 
+
 if os.path.isdir('./scratch'):
     subprocess.run(['rm', '-r', './scratch'])
 
@@ -76,7 +77,6 @@ def li_test():
     # through the species decision tree to discard molecules.
     species_decision_tree = li_species_decision_tree
 
-
     # There is one non-local part of species filtering: we consider two
     # molecules to be equivalent if they have the same total charge,
     # composition, and covalent bonds, even if they have different metal
@@ -95,13 +95,11 @@ def li_test():
         coordimer_weight=lambda mol: (mol.penalty, mol.solvation_free_energy),
     )
 
-
     # Once we have generated our molecule list, we generate the bucket database
-    # which is how we break up the reaction filtering amongst all avaliable workers.
-    # It gets stored in the buckets.sqlite database.
+    # which is how we break up the reaction filtering amongst all avaliable
+    # workers. It gets stored in the buckets.sqlite database.
     bucket(mol_entries, folder + '/buckets.sqlite')
 
-
     # Reaction filtering is paralellized using MPI, so we need to spawn
     # an MPI instance to run it. This is why we can't just start
     # reaction filtering by calling a python function. We pass the
@@ -117,8 +115,8 @@ def li_test():
     # HiPRGen.reaction_questions
 
     params = {
-        'temperature' : ROOM_TEMP,
-        'electron_free_energy' : -1.4
+        'temperature': ROOM_TEMP,
+        'electron_free_energy': -1.4
     }
 
     dispatcher_payload = DispatcherPayload(
@@ -134,7 +132,6 @@ def li_test():
         Terminal.DISCARD
     )
 
-
     # The dispatcher and worker payloads are passed through the MPI barrier
     # as JSON blobs dispatcher_payload and worker_payload
     dumpfn(dispatcher_payload, folder + '/dispatcher_payload.json')
@@ -215,8 +212,8 @@ def mg_test():
         folder + '/buckets.sqlite',
         default_reaction_decision_tree,
         {
-            'temperature' : ROOM_TEMP,
-            'electron_free_energy' : -2.06
+            'temperature': ROOM_TEMP,
+            'electron_free_energy': -2.06
         },
         Terminal.DISCARD
     )