diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
index d3869b629..7f3604745 100644
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@@ -29,7 +29,6 @@ RUN yum -y install valgrind-ohpc
 RUN yum -y install boost-gnu9-mpich-ohpc
 
 RUN yum -y install python38-pip
-RUN pip3 install matplotlib
 
 RUN yum -y install procps-ng
 RUN yum -y install diffutils
@@ -143,14 +142,18 @@ RUN . /etc/profile.d/lmod.sh \
 RUN rm /tmp/doxygen-1.9.5.src.tar.gz
 
 RUN yum -y install graphviz
-RUN pip3 install gcovr
+RUN . /etc/profile.d/lmod.sh \ && pip3 install gcovr
 
 RUN yum -y install gsl-gnu9-ohpc
 
-RUN pip3 install "pybind11[global]"
-
 RUN . /etc/profile.d/lmod.sh \
-    && pip3 install mpi4py
+    && pip3 install "numpy<2" \
+    && pip3 install scipy \
+    && pip3 install matplotlib \
+    && pip3 install h5py \
+    && pip3 install "mpi4py<4"
+
+RUN pip3 install "pybind11[global]"
 
 # Register new libs installed into /usr/local/lib with linker
 RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/class.conf
diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp
index 8bdf6a5cc..2286035e9 100644
--- a/src/M2ulPhyS2Boltzmann.cpp
+++ b/src/M2ulPhyS2Boltzmann.cpp
@@ -36,7 +36,6 @@
 #include "M2ulPhyS.hpp"
 #include "tps2Boltzmann.hpp"
 
-// CPU version (just for starting up)
 void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) {
   assert(interface.IsInitialized());
 
diff --git a/src/cycle_avg_joule_coupling.cpp b/src/cycle_avg_joule_coupling.cpp
index 07ab637a1..4456a72c6 100644
--- a/src/cycle_avg_joule_coupling.cpp
+++ b/src/cycle_avg_joule_coupling.cpp
@@ -348,7 +348,23 @@ void CycleAvgJouleCoupling::interpElectricFieldFromEMToFlow() {
   if (flow_fespace->IsDGSpace()) {
     efieldR_->SetFromTrueDofs(interp_vals);
   } else {
-    assert(false);
+    Array<int> vdofs;
+    Vector elem_dof_vals;
+    int n0 = 0;
+    const int NE = flow_solver_->getMesh()->GetNE();
+    for (int i = 0; i < NE; i++) {
+      flow_fespace->GetElementDofs(i, vdofs);
+      const int nsp = flow_fespace->GetFE(i)->GetNodes().GetNPoints();
+      assert(nsp == vdofs.Size());
+      elem_dof_vals.SetSize(nsp);
+      for (int j = 0; j < nsp; j++) {
+        elem_dof_vals(j) = interp_vals(n0 + j);
+      }
+      efieldR_->SetSubVector(vdofs, elem_dof_vals);
+      n0 += nsp;
+    }
+    efieldR_->SetTrueVector();
+    efieldR_->SetFromTrueVector();
   }
   efieldR_->HostRead();
 
@@ -357,7 +373,23 @@ void CycleAvgJouleCoupling::interpElectricFieldFromEMToFlow() {
   if (flow_fespace->IsDGSpace()) {
     efieldI_->SetFromTrueDofs(interp_vals);
   } else {
-    assert(false);
+    Array<int> vdofs;
+    Vector elem_dof_vals;
+    int n0 = 0;
+    const int NE = flow_solver_->getMesh()->GetNE();
+    for (int i = 0; i < NE; i++) {
+      flow_fespace->GetElementDofs(i, vdofs);
+      const int nsp = flow_fespace->GetFE(i)->GetNodes().GetNPoints();
+      assert(nsp == vdofs.Size());
+      elem_dof_vals.SetSize(nsp);
+      for (int j = 0; j < nsp; j++) {
+        elem_dof_vals(j) = interp_vals(n0 + j);
+      }
+      efieldI_->SetSubVector(vdofs, elem_dof_vals);
+      n0 += nsp;
+    }
+    efieldI_->SetTrueVector();
+    efieldI_->SetFromTrueVector();
   }
   efieldI_->HostRead();
 #else
diff --git a/src/loMach.hpp b/src/loMach.hpp
index ba5a9066a..dcff8254e 100644
--- a/src/loMach.hpp
+++ b/src/loMach.hpp
@@ -57,6 +57,7 @@
 // forward-declaration for Tps support class
 namespace TPS {
 class Tps;
+class Tps2Boltzmann;
 }
 
 #include <tps_config.h>
@@ -258,6 +259,9 @@ class LoMachSolver : public TPS::PlasmaSolver {
   void evaluatePlasmaConductivityGF() override { thermo_->evaluatePlasmaConductivityGF(); }
 
   mfem::ParGridFunction *getJouleHeatingGF() override { return thermo_->getJouleHeatingGF(); }
+
+  void push(TPS::Tps2Boltzmann &interface) override { thermo_->push(interface); }
+  void fetch(TPS::Tps2Boltzmann &interface) override { thermo_->fetch(interface); }
 };
 
 #endif  // LOMACH_HPP_
diff --git a/src/pytps/__init__.py b/src/pytps/__init__.py
new file mode 100644
index 000000000..6188ef019
--- /dev/null
+++ b/src/pytps/__init__.py
@@ -0,0 +1,5 @@
+from .tabulated_reaction_rates import TabulatedSolver
+from .utils import master_print, resolve_runFile, libtps
+
+
+
diff --git a/src/pytps/tabulated_reaction_rates.py b/src/pytps/tabulated_reaction_rates.py
new file mode 100644
index 000000000..04d4f0776
--- /dev/null
+++ b/src/pytps/tabulated_reaction_rates.py
@@ -0,0 +1,64 @@
+import h5py
+import scipy
+import scipy.interpolate
+import numpy as np
+from .utils import master_print, libtps
+
+
+class TabulatedSolver:
+    def __init__(self, comm, config):
+        self.comm = comm
+        self.config = config
+        self.species_densities = None
+        self.efield = None
+        self.heavy_temperature = None
+
+        self.tables = self._read_tables()
+        self.rates = []
+
+    def _findPythonReactions(self):
+        filenames = []
+        nreactions = self.config.getint("reactions","number_of_reactions",fallback=0)
+        for ir in range(nreactions):
+            sublist = self.config["reactions/reaction{0:d}".format(ir+1)]
+            rtype = sublist["model"]
+            if rtype == "bte":
+                filenames.append(sublist["tabulated/filename"].strip("'"))
+
+        return filenames
+
+    def _read_tables(self):
+        filenames = self._findPythonReactions()
+        
+        tables = []
+        for filename in filenames:
+            with h5py.File(filename, 'r') as fid:
+                Tcoeff = fid['table'][:]
+
+            tables.append(scipy.interpolate.interp1d(Tcoeff[:,0], Tcoeff[:,1], kind='linear',
+                                                     bounds_error=False, fill_value='extrapolate'))
+
+        return tables
+    
+    def fetch(self, interface):
+        n_reactions =interface.nComponents(libtps.t2bIndex.ReactionRates)
+        for r in range(n_reactions):
+            master_print(self.comm,"Reaction ", r+1, ": ", interface.getReactionEquation(r))
+        self.species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False)
+        self.efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False)
+        self.heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False)
+
+        efieldAngularFreq = interface.EfieldAngularFreq()
+        master_print(self.comm,"Electric field angular frequency: ", efieldAngularFreq)
+
+    def solve(self):
+        self.rates = []
+        for table in self.tables:
+            self.rates.append(table(self.heavy_temperature))
+
+    def push(self, interface):
+        rates =  np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False)
+        offset = 0
+        for rate in self.rates:
+            rates[offset:offset+rate.shape[0]] = rate
+            offset = offset+rate.shape[0]
diff --git a/src/pytps/utils.py b/src/pytps/utils.py
new file mode 100644
index 000000000..b2e81dbca
--- /dev/null
+++ b/src/pytps/utils.py
@@ -0,0 +1,24 @@
+from mpi4py import MPI
+import os
+import sys
+
+def master_print(comm: MPI.Comm, *args, **kwargs) -> None:
+    if comm.rank == 0:
+        print(*args, **kwargs)
+
+def resolve_runFile(cl_args):
+    ini_name = ''
+    if '-run' in cl_args:
+        ini_name = cl_args[cl_args.index('-run') + 1 ]
+    elif '--runFile' in sys.argv:
+        ini_name = cl_args[cl_args.index('--runFile') + 1 ]
+    else:
+        print("Could not parse command line in python. GOOD BYE!")
+        exit(-1)
+    return ini_name
+
+
+base = os.path.abspath(os.path.dirname( sys.argv[0] ) )
+print(os.path.join(base, ".libs"))
+sys.path.append(os.path.join(base, ".libs"))
+import libtps
diff --git a/src/reactingFlow.cpp b/src/reactingFlow.cpp
index 9e37af02c..36a24a1d4 100644
--- a/src/reactingFlow.cpp
+++ b/src/reactingFlow.cpp
@@ -41,6 +41,7 @@
 #include "loMach.hpp"
 #include "loMach_options.hpp"
 #include "radiation.hpp"
+#include "tps2Boltzmann.hpp"
 
 using namespace mfem;
 using namespace mfem::common;
@@ -363,6 +364,11 @@ ReactingFlow::ReactingFlow(mfem::ParMesh *pmesh, LoMachOptions *loMach_opts, tem
       tpsP_->getRequiredInput((basepath + "/radius").c_str(), R);
       rxnModelParamsHost.push_back(Vector({R}));
 
+    } else if (model == "bte") {
+      reactionModels[r - 1] = GRIDFUNCTION_RXN;
+      int index;
+      tpsP_->getRequiredInput((basepath + "/bte/index").c_str(), index);
+      chemistryInput_.reactionInputs[r - 1].indexInput = index;
     } else {
       grvy_printf(GRVY_ERROR, "\nUnknown reaction_model -> %s", model.c_str());
       exit(ERROR);
@@ -421,7 +427,7 @@ ReactingFlow::ReactingFlow(mfem::ParMesh *pmesh, LoMachOptions *loMach_opts, tem
           equilibriumConstantParams[p + r * gpudata::MAXCHEMPARAMS];
     }
 
-    if (reactionModels[r] != TABULATED_RXN) {
+    if (reactionModels[r] != TABULATED_RXN && reactionModels[r] != GRIDFUNCTION_RXN) {
       assert(rxn_param_idx < rxnModelParamsHost.size());
       chemistryInput_.reactionInputs[r].modelParams = rxnModelParamsHost[rxn_param_idx].Read();
       rxn_param_idx += 1;
@@ -2835,6 +2841,71 @@ double species_uniform(const Vector &coords, double t) {
   return yn;
 }
 
+void ReactingFlow::push(TPS::Tps2Boltzmann &interface) {
+  assert(interface.IsInitialized());
+
+  // const int nscalardofs(vfes_->GetNDofs());
+
+  mfem::ParGridFunction *species =
+      new mfem::ParGridFunction(&interface.NativeFes(TPS::Tps2Boltzmann::Index::SpeciesDensities));
+
+  mfem::Vector speciesInt(sDofInt_*interface.Nspecies());
+  double *species_data = speciesInt.HostWrite();
+
+  const double *dataRho = rn_.HostRead();
+  const double *dataY = Yn_.HostRead();
+
+  double state_local[gpudata::MAXEQUATIONS];
+  double species_local[gpudata::MAXSPECIES];
+
+  for (int i = 0; i < gpudata::MAXEQUATIONS; ++i)
+    state_local[i] = 0.;
+
+  for (int i = 0; i < gpudata::MAXSPECIES; ++i)
+    species_local[i] = 0.;
+
+  for (int i = 0; i < sDofInt_; i++) {
+    state_local[0] = dataRho[i];
+    for (int asp = 0; asp < nActiveSpecies_; asp++)
+      state_local[dim_ + 2 + asp] = dataRho[i]*dataY[i+asp*sDofInt_];
+    mixture_->computeNumberDensities(state_local, species_local);
+
+    for (int sp = 0; sp < interface.Nspecies(); sp++)
+      species_data[i + sp * sDofInt_] = AVOGADRONUMBER * species_local[sp];
+  }
+
+  species->SetFromTrueDofs(speciesInt);
+  Tn_gf_.SetFromTrueDofs(Tn_);
+  interface.interpolateFromNativeFES(*species, TPS::Tps2Boltzmann::Index::SpeciesDensities);
+  interface.interpolateFromNativeFES(Tn_gf_, TPS::Tps2Boltzmann::Index::HeavyTemperature);
+  interface.interpolateFromNativeFES(Tn_gf_, TPS::Tps2Boltzmann::Index::ElectronTemperature);
+
+  interface.setTimeStep(this->dt_);
+  interface.setCurrentTime(this->time_);
+
+  delete species;
+}
+
+void ReactingFlow::fetch(TPS::Tps2Boltzmann &interface) {
+  // NOTE: Chemistry is only address by truedofs indexes.
+  mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates)));
+  externalReactionRates_gf_.reset(new mfem::ParGridFunction(reaction_rates_fes));
+  interface.interpolateToNativeFES(*externalReactionRates_gf_, TPS::Tps2Boltzmann::Index::ReactionRates);
+  externalReactionRates_gf_->GetTrueDofs(externalReactionRates_);
+  int size = sDofInt_;
+#if defined(_CUDA_) || defined(_HIP_)
+  const double *data(externalReactionRates_.Read());
+  // int size(externalReactionRates_->FESpace()->GetNDofs());
+  assert(externalReactionRates_gf_->FESpace()->GetOrdering() == mfem::Ordering::byNODES);
+  gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_);
+#else
+  // chemistry_->setGridFunctionRates(*externalReactionRates_gf_);
+  const double *data(externalReactionRates_.HostRead());
+  chemistry_->setRates(data, size);
+#endif
+}
+
+
 #if 0
 void ReactingFlow::uniformInlet() {
   int myRank;
diff --git a/src/reactingFlow.hpp b/src/reactingFlow.hpp
index 1387ffced..56fc1bc35 100644
--- a/src/reactingFlow.hpp
+++ b/src/reactingFlow.hpp
@@ -36,6 +36,7 @@
 // forward-declaration for Tps support class
 namespace TPS {
 class Tps;
+class Tps2Boltzmann;
 }
 
 #include <hdf5.h>
@@ -102,6 +103,9 @@ class ReactingFlow : public ThermoChemModelBase {
   PerfectMixture *mixture_ = NULL;
   ArgonMixtureTransport *transport_ = NULL;
   Chemistry *chemistry_ = NULL;
+  // External reaction rates when chemistry is implemented using the BTE option
+  std::unique_ptr<ParGridFunction> externalReactionRates_gf_;  // Has repeated interface dofs.
+  Vector externalReactionRates_;  // Only true data
 
   std::vector<std::string> speciesNames_;
   std::map<std::string, int> atomMap_;
@@ -415,5 +419,8 @@ class ReactingFlow : public ThermoChemModelBase {
 
   void evalSubstepNumber();
   void readTableWrapper(std::string inputPath, TableInput &result);
+
+  void push(TPS::Tps2Boltzmann &interface) override;
+  void fetch(TPS::Tps2Boltzmann &interface) override;
 };
 #endif  // REACTINGFLOW_HPP_
diff --git a/src/reaction.cpp b/src/reaction.cpp
index d119f58be..ef4db5d19 100644
--- a/src/reaction.cpp
+++ b/src/reaction.cpp
@@ -88,8 +88,8 @@ MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp)
 MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {}
 
 MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double *data, int size) {
-  data_ = data + comp_ * size_;
   size_ = size;
+  data_ = data + comp_ * size_;
 }
 
 void GridFunctionReaction::setGridFunction(const mfem::GridFunction &f) {
@@ -103,6 +103,7 @@ void GridFunctionReaction::setGridFunction(const mfem::GridFunction &f) {
 #endif
 }
 
+
 MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unused]] const double &T_h,
                                                                      [[maybe_unused]] const double &T_e,
                                                                      const int &dofindex,
diff --git a/src/thermo_chem_base.hpp b/src/thermo_chem_base.hpp
index 78477259c..003934964 100644
--- a/src/thermo_chem_base.hpp
+++ b/src/thermo_chem_base.hpp
@@ -39,6 +39,7 @@
 
 namespace TPS {
 class Tps;
+class Tps2Boltzmann;
 }
 
 class IODataOrganizer;
@@ -197,6 +198,16 @@ class ThermoChemModelBase {
     std::cout << "ERROR: " << __func__ << " remains unimplemented" << std::endl;
     exit(1);
   }
+
+  virtual void push(TPS::Tps2Boltzmann &interface) {
+    std::cout << "ERROR: " << __func__ << " remains unimplemented" << std::endl;
+    exit(1);
+  }
+
+  virtual void fetch(TPS::Tps2Boltzmann &interface) {
+    std::cout << "ERROR: " << __func__ << " remains unimplemented" << std::endl;
+    exit(1);
+  }
 };
 
 /**
diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py
index bc1976955..7fd4e8023 100755
--- a/src/tps-bte_0d3v.py
+++ b/src/tps-bte_0d3v.py
@@ -15,6 +15,7 @@
 import scipy.cluster
 import threading
 import datetime
+import h5py
 # Use asynchronous stream ordered memory
 #cp.cuda.set_allocator(cp.cuda.MemoryAsyncPool().malloc)
 
@@ -57,11 +58,20 @@ def min_mean_max(a, comm: MPI.Comm):
 
 # set path to C++ TPS library
 path = os.path.abspath(os.path.dirname(sys.argv[0]))
-sys.path.append(path + "/.libs")
+sys.path.append(path)
+sys.path.append(path + "/../../torch-chemistry")
+import argon.scripts.synthetic_cs as synthetic_cs
 sys.path.append(path + "/../../boltzmann/BESolver/python")
-import libtps
+
+crs_folder = path + "/../../torch-chemistry/argon/scripts/crs_lxcat"
+if not os.path.exists(crs_folder):
+    os.makedirs(crs_folder)
+
+import pytps    
+from pytps import libtps, TabulatedSolver
 from   bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver
 import utils as bte_utils
+print(bte_utils)
 
 WITH_PARLA = 0
 if WITH_PARLA:
@@ -69,7 +79,8 @@ def min_mean_max(a, comm: MPI.Comm):
         from parla import Parla
         from parla.tasks import spawn, TaskSpace
         from parla.devices import cpu, gpu
-    except:
+    except Exception as e:
+        print(e)
         print("Error occured during Parla import. Please make sure Parla is installed properly.")
         sys.exit(0)
 
@@ -122,7 +133,7 @@ class BoltzmannSolverParams():
     n0            = 3.22e22 #[m^{-3}]
     
     rand_seed       = 0
-    use_clstr_inp   = False
+    use_clstr_inp   = True
     clstr_maxiter   = 10
     clstr_threshold = 1e-3
     
@@ -142,17 +153,14 @@ class TPSINDEX():
     # in future we need to setup this methodically
     # here key denotes the idx running from 0, nreactions-1
     # value denotes the reaction index in the qoi array
-    RR_IDX   = {0 : 4 , 1 : 5 , 2 : 6, 3 : 7, 4 : 1 , 5 : 2, 6 : 3 }
-    
+    RR_IDX    = {0 : 2 , 1 : 4 , 2 : 1, 3 : 3}
     
-    ION_IDX  = 3
-    ELE_IDX  = 4
-    NEU_IDX  = 5
+    ION_IDX  = 1
+    ELE_IDX  = 2
+    NEU_IDX  = 3
     EX1_IDX  = 0
-    EX2_IDX  = 1
-    EX3_IDX  = 2
     
-    MOLE_FRAC_IDX = {0: NEU_IDX, 1: EX1_IDX , 2: EX2_IDX , 3: EX3_IDX} 
+    MOLE_FRAC_IDX = {0: NEU_IDX, 1: EX1_IDX} 
 
 def k_means(x, num_clusters, xi=None, max_iter=1000, thresh=1e-12, rand_seed=0, xp=np):
     assert x.ndim == 2, "observations must me 2d array"
@@ -319,7 +327,15 @@ def grid_setup(self, interface):
         Te                               = xp.array([Te_b[b_idx]  for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te 
         vth                              = np.sqrt(2* self.param.kB * Te * self.param.ev_to_K  /self.param.me)
         ev_max                           = (6 * vth / self.param.c_gamma)**2 
-        self.bte_solver                  = BoltzmannSolver(self.param, ev_max , Te , nr, lm_modes, self.param.n_grids, self.param.collisions)
+
+        # generate crs Tg depended crs data 
+        col_cs = list()
+        for idx in range(self.param.n_grids):
+            cs_fname = "%s/crs_rank_%06d_npes_%06d_%06d.txt"%(crs_folder,self.rankG, self.npesG, idx) 
+            synthetic_cs.gen_lxcat_file(cs_fname, Te_b[idx] * self.param.ev_to_K)
+            col_cs.append(cs_fname)
+
+        self.bte_solver                  = BoltzmannSolver(self.param, ev_max , Te , nr, lm_modes, self.param.n_grids, col_cs)
 
         # compute BTE operators
         for grid_idx in range(self.param.n_grids):
@@ -344,7 +360,7 @@ def grid_setup(self, interface):
         #         active_grid_idx.append(grid_idx)
         
         # self.active_grid_idx  = active_grid_idx #[i for i in range(self.param.n_grids)]
-        self.active_grid_idx  = [2,3]#[i for i in range(self.param.n_grids)]
+        self.active_grid_idx  = [i for i in range(self.param.n_grids)]
         self.sub_clusters_run = False
         return
 
@@ -359,13 +375,15 @@ def solve_wo_parla(self):
         n_grids                 = self.param.n_grids
         gidx_to_device_map      = self.gidx_to_device_map
         
-        self.qoi         = [None for grid_idx in range(self.param.n_grids)]
-        self.ff          = [None for grid_idx in range(self.param.n_grids)]
-        coll_list        = self.bte_solver.get_collision_list()
-        coll_names       = self.bte_solver.get_collision_names()
+        self.qoi                = [None for grid_idx in range(self.param.n_grids)]
+        self.ff                 = [None for grid_idx in range(self.param.n_grids)]
+        
         
         if csv_write: 
-            data_csv = np.empty((self.tps_npts, 8 + len(coll_list)))
+            for grid_idx in range(n_grids):
+                assert len(self.bte_solver.get_collision_list()[i]) == len(self.bte_solver.get_collision_list()[0])
+            
+            data_csv = np.empty((self.tps_npts, 8 + len(self.bte_solver.get_collision_list()[0])))
         
         t1 = time()
         for grid_idx in range(n_grids):
@@ -376,9 +394,10 @@ def solve_wo_parla(self):
                     ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type)
                     self.qoi[grid_idx] = qoi
                     self.ff [grid_idx] = ff
-                except:
+                except Exception as e:
+                    print(e)
                     print("solver failed for v-space gird no %d"%(grid_idx), flush=True)
-                    sys.exit(-1)
+                    self.comm.Abort(0)
             else:
                 with xp.cuda.Device(dev_id):
                     try:
@@ -386,15 +405,20 @@ def solve_wo_parla(self):
                         ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type)
                         self.qoi[grid_idx] = qoi
                         self.ff [grid_idx] = ff
-                    except:
+                    except Exception as e:
+                        print(e)
                         print("solver failed for v-space gird no %d"%(grid_idx), flush=True)
-                        sys.exit(-1)
+                        self.comm.Abort(0)
                     
         t2 = time()
         print("time for boltzmann v-space solve = %.4E"%(t2- t1), flush=True)
         
         if (self.param.export_csv ==1 or self.param.plot_data==1):
             for grid_idx in range(n_grids):
+
+                coll_list      = self.bte_solver.get_collision_list()[grid_idx]
+                coll_names     = self.bte_solver.get_collision_names()[grid_idx]
+
                 dev_id   = gidx_to_device_map(grid_idx, n_grids)
                 ff       = self.ff[grid_idx]
                 qoi      = self.qoi[grid_idx]
@@ -483,7 +507,9 @@ def fetch(self, interface):
         efield                  = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts))
         species_densities       = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts)
         
-        cs_avail_species        = self.bte_solver._avail_species
+        cs_avail_species        = self.bte_solver._avail_species[0]
+        for i in range(len(self.bte_solver._avail_species)):
+            assert cs_avail_species == self.bte_solver._avail_species[i]
         
         n0                      = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX]
         ns_by_n0                = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts))
@@ -497,21 +523,23 @@ def fetch(self, interface):
         Ey                      = efield[1]
         
         ne                      = species_densities[TPSINDEX.ELE_IDX]
-        coll_list               = self.bte_solver.get_collision_list()
-        coll_names              = self.bte_solver.get_collision_names()
-        cs_data                 = self.bte_solver.get_cross_section_data()
-        
-        cs_species              = list()
-        for col_idx, (k,v) in enumerate(cs_data.items()):
-            cs_species.append(v["species"])
         
-        cs_species = list(sorted(set(cs_species), key=cs_species.index))
         data_csv   = np.concatenate([(Ex).reshape((-1, 1)),
                                      (Ey).reshape((-1, 1)),
                                      (Tg).reshape((-1, 1)),
                                      (ne/n0).reshape((-1, 1))] + [ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])] + [n0.reshape(-1, 1)], axis=1)
         
         for grid_idx in self.active_grid_idx:
+            coll_list               = self.bte_solver.get_collision_list()[grid_idx]
+            coll_names              = self.bte_solver.get_collision_names()[grid_idx]
+            cs_data                 = self.bte_solver.get_cross_section_data()[grid_idx]
+            
+            cs_species              = list()
+            for col_idx, (k,v) in enumerate(cs_data.items()):
+                cs_species.append(v["species"])
+            cs_species = list(sorted(set(cs_species), key=cs_species.index))
+            
+
             with open("%s/%s.csv"%(self.param.output_dir, "tps_fetch_grid_%02d_rank_%02d_npes_%02d"%(grid_idx, self.rankG, self.npesG)), 'w', encoding='UTF8') as f:
                 writer = csv.writer(f,delimiter=',')
                 # write the header
@@ -820,12 +848,8 @@ def solve(self):
         
         self.qoi                = [None for grid_idx in range(self.param.n_grids)]
         self.ff                 = [None for grid_idx in range(self.param.n_grids)]
-        coll_list               = self.bte_solver.get_collision_list()
-        coll_names              = self.bte_solver.get_collision_names()
-        
-        if csv_write: 
-            data_csv = np.empty((self.tps_npts, 8 + len(coll_list)))
-            
+
+
         for grid_idx in self.active_grid_idx:
             dev_id = gidx_to_device_map(grid_idx,n_grids)
             
@@ -836,9 +860,11 @@ def t1():
                     ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type)
                     self.ff[grid_idx]  = ff
                     self.qoi[grid_idx] = qoi
-                except:
+                except Exception as e:
+                    print(e)
                     print("rank [%d/%d] solver failed for v-space gird no %d"%(self.rankG, self.npesG, grid_idx), flush=True)
-                    sys.exit(-1)
+                    self.comm.Abort(0)
+                    #sys.exit(-1)
             
             with cp.cuda.Device(dev_id):
                 t1()
@@ -870,7 +896,6 @@ def t1():
                         qoi       = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False)
                         rr_cpu    = xp.asnumpy(qoi["rates"])
                         inp_mask  = xp.asnumpy(self.sub_cluster_c_lbl[grid_idx]) == np.arange(self.param.n_sub_clusters)[:, None]
-                        
                         rr_interp = np.zeros((n_reactions, len(gidx_to_pidx_map[grid_idx])))
                         
                         for c_idx in range(self.param.n_sub_clusters):
@@ -883,7 +908,7 @@ def t1():
                     
                     with cp.cuda.Device(dev_id):
                         t1()
-                        
+
                 rates = rates.reshape((-1))
                 rates[rates<0] = 0.0
         else:
@@ -922,7 +947,9 @@ async def fetch_asnyc(self, interface):
         efield                  = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts))
         species_densities       = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts)
         
-        cs_avail_species        = self.bte_solver._avail_species
+        cs_avail_species        = self.bte_solver._avail_species[0]
+        for i in range(len(self.bte_solver._avail_species)):
+            assert cs_avail_species == self.bte_solver._avail_species[i]
         
         n0                      = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX]
         ns_by_n0                = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts))
@@ -1240,16 +1267,13 @@ async def solve_async(self):
         self.ff                 = [None for grid_idx in range(self.param.n_grids)]
         num_gpus                = len(gpu)
         assert num_gpus         == self.num_gpus_per_node, "CuPy and Parla number of GPUs per node does not match %d vs. %d"%(num_gpus, self.num_gpus_per_node)
-        coll_list               = self.bte_solver.get_collision_list()
-        coll_names              = self.bte_solver.get_collision_names()
         
         if (use_gpu==1):
             parla_placement = [gpu(gidx_to_device_map(grid_idx,n_grids)) for grid_idx in range(n_grids)]
         else:
             parla_placement = [cpu for grid_idx in range(n_grids)]
 
-        if csv_write: 
-            data_csv = np.empty((self.tps_npts, 8 + len(coll_list)))
+        
             
         ts = TaskSpace("T")
         for grid_idx in self.active_grid_idx:
@@ -1340,9 +1364,10 @@ def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:st
         h_curr                  = self.bte_solver.normalized_distribution(grid_idx, h_curr)
         ff                      = h_curr
         qoi                     = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False)
-        coll_list               = self.bte_solver.get_collision_list()
-        coll_names              = self.bte_solver.get_collision_names()
-        cs_data                 = self.bte_solver.get_cross_section_data()
+        
+        coll_list               = self.bte_solver.get_collision_list()[grid_idx]
+        coll_names              = self.bte_solver.get_collision_names()[grid_idx]
+        cs_data                 = self.bte_solver.get_cross_section_data()[grid_idx]
         
         cs_species              = list()
         for col_idx, (k,v) in enumerate(cs_data.items()):
@@ -1780,6 +1805,19 @@ def __main__():
             interface = libtps.Tps2Boltzmann(tps)
             tps.initInterface(interface)
             tps.solveBegin()
+
+            ## Hack to set-up the correct initial condition
+            tps.push(interface)
+            ini_name = pytps.resolve_runFile(sys.argv)
+
+            print(ini_name)
+            config = configparser.ConfigParser()
+            config.read(ini_name)
+            tabulatedSolver = TabulatedSolver(comm, config)
+            tabulatedSolver.fetch(interface)
+            tabulatedSolver.solve()
+            tabulatedSolver.push(interface)
+            tps.fetch(interface)
             # --- first TPS step is needed to initialize the EM fields
             tps.solveStep()
             tps.push(interface)
@@ -1946,6 +1984,9 @@ def t1():
                 profile_tt[pp.TPS_FETCH].start()
                 tps.fetch(interface)
                 profile_tt[pp.TPS_FETCH].stop()
+
+                if (iter%cycle_freq==0):
+                    interface.saveDataCollection(cycle=(iter//cycle_freq), time=iter)
                 
                 tps_u  = 0
                 tps_v  = 0
diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py
index af2efc6c8..1aa62cd75 100755
--- a/src/tps-time-loop.py
+++ b/src/tps-time-loop.py
@@ -3,57 +3,20 @@
 import os
 import numpy as np
 
-from mpi4py import MPI
-
-class ArrheniusSolver:
-    def __init__(self):
-        self.UNIVERSALGASCONSTANT = 8.3144598;  # J * mol^(-1) * K^(-1)
-        self.species_densities = None
-        self.efield = None
-        self.heavy_temperature = None
-        self.reaction_rates = [None, None]
-        #Reaction 1: 'Ar + E => Ar.+1 + 2 E', 
-        #Reaction 2: 'Ar.+1 + 2 E => Ar + E'
-        self.A = [74072.331348, 5.66683445516e-20]
-        self.b = [1.511, 0.368]
-        self.E = [1176329.772504, -377725.908714] # [J/mol]
-
-    def fetch(self, interface):
-        n_reactions =interface.nComponents(libtps.t2bIndex.ReactionRates)
-        for r in range(n_reactions):
-            print("Reaction ", r+1, ": ", interface.getReactionEquation(r))
-        self.species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False)
-        self.efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False)
-        self.heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False)
-
-        efieldAngularFreq = interface.EfieldAngularFreq()
-        print("Electric field angular frequency: ", efieldAngularFreq)
-
-
-
-    def solve(self):
-        #A_ * pow(temp, b_) * exp(-E_ / UNIVERSALGASCONSTANT / temp);
-        self.reaction_rates = [A * np.power(self.heavy_temperature, b) * 
-                               np.exp(-E/(self.UNIVERSALGASCONSTANT * self.heavy_temperature))
-                               for A,b,E in zip(self.A, self.b, self.E) ]
-
-    def push(self, interface):
-        n_reactions =interface.nComponents(libtps.t2bIndex.ReactionRates)
-        if n_reactions >= 2:
-            rates =  np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False)
-            rates[0:self.heavy_temperature.shape[0]] = self.reaction_rates[0]
-            rates[self.heavy_temperature.shape[0]:] = self.reaction_rates[1]
+import configparser
 
+from mpi4py import MPI
 
+# set path to pyTPS library
+path = os.path.dirname( os.path.abspath(sys.argv[0]) )
+print(path)
+sys.path.append(path)
+import pytps
 
-# set path to C++ TPS library
-path = os.path.abspath(os.path.dirname(sys.argv[0]))
-sys.path.append(path + "/.libs")
-import libtps
 
 comm = MPI.COMM_WORLD
 # TPS solver
-tps = libtps.Tps(comm)
+tps = pytps.libtps.Tps(comm)
 
 tps.parseCommandLineArgs(sys.argv)
 tps.parseInput()
@@ -61,27 +24,32 @@ def push(self, interface):
 tps.chooseSolver()
 tps.initialize()
 
-boltzmann = ArrheniusSolver()
+ini_name = pytps.resolve_runFile(sys.argv)
+print(ini_name)
+config = configparser.ConfigParser()
+config.read(ini_name)
 
-interface = libtps.Tps2Boltzmann(tps)
+boltzmann = pytps.TabulatedSolver(comm, config)
+
+interface = pytps.libtps.Tps2Boltzmann(tps)
 tps.initInterface(interface)
 
 it = 0
 max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters")
-print("Max Iters: ", max_iters)
+pytps.master_print(comm,"Max Iters: ", max_iters)
 tps.solveBegin()
 
 while it < max_iters:
-    tps.solveStep()
     tps.push(interface)
     boltzmann.fetch(interface)
     boltzmann.solve()
     boltzmann.push(interface)
     interface.saveDataCollection(cycle=it, time=it)
     tps.fetch(interface)
+    tps.solveStep()
     
     it = it+1
-    print("it, ", it)
+    pytps.master_print(comm, "it, ", it)
 
 tps.solveEnd()
 
diff --git a/src/tps.py b/src/tps.py
index c92f658ef..4ca9763e9 100755
--- a/src/tps.py
+++ b/src/tps.py
@@ -1,23 +1,26 @@
 #!/usr/bin/env python3
-import sys
 import os
+import sys
+from time import perf_counter as time
 from mpi4py import MPI
 
-# set path to C++ TPS library
-path = os.path.abspath(os.path.dirname(sys.argv[0]))
-sys.path.append(path + "/.libs")
-import libtps as tps
+# set path to pyTPS library
+path = os.path.dirname( os.path.abspath(sys.argv[0]) )
+sys.path.append(os.path.join(path, ".libs"))
+import libtps
+
 
 comm = MPI.COMM_WORLD
 # TPS solver
-tps = tps.Tps(comm)
+tps = libtps.Tps(comm)
 
 tps.parseCommandLineArgs(sys.argv)
 tps.parseInput()
 tps.chooseDevices()
 tps.chooseSolver()
 tps.initialize()
-tps.solveStep()
 tps.solve()
 
+
+
 sys.exit (tps.getStatus())
diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp
index 689fded06..c646ee75e 100644
--- a/src/tps2Boltzmann.cpp
+++ b/src/tps2Boltzmann.cpp
@@ -79,7 +79,7 @@ void idenity_fun(const Vector &x, Vector &out) {
 }
 
 Tps2Boltzmann::Tps2Boltzmann(Tps *tps)
-    : NIndexes(7), tps_(tps), all_fes_(nullptr), save_to_paraview_dc(false), paraview_dc(nullptr) {
+    : NIndexes(7), tps_(tps), all_fes_(nullptr), use_h1fec_(false), save_to_paraview_dc(false), paraview_dc(nullptr) {
   // Assert we have a couple solver;
   assert(tps->isFlowEMCoupled());
 
@@ -92,6 +92,7 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps)
   tps->getRequiredInput("em/current_frequency", EfieldAngularFreq_);
   EfieldAngularFreq_ *= 2. * M_PI;
 
+  use_h1fec_ = tps->getInput("boltzmannInterface/h1fec", false);
   save_to_paraview_dc = tps->getInput("boltzmannInterface/save_to_paraview", false);
 
   offsets.SetSize(NIndexes + 1);
@@ -117,9 +118,12 @@ int Tps2Boltzmann::_countBTEReactions() {
 }
 
 void Tps2Boltzmann::init(TPS::PlasmaSolver *flowSolver) {
-  std::cout << "Tps2Boltzmann::init is called" << std::endl;
   mfem::ParMesh *pmesh(flowSolver->getMesh());
-  fec_ = new mfem::L2_FECollection(order_, pmesh->Dimension(), basis_type_);
+  if (use_h1fec_)
+    fec_ = new mfem::H1_FECollection(order_, pmesh->Dimension(), basis_type_);
+  else
+    fec_ = new mfem::L2_FECollection(order_, pmesh->Dimension(), basis_type_);
+
   switch (pmesh->Dimension()) {
     case 2:
       nEfieldComps_ = 2;
diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp
index 5dc39adb6..2d0c73bc0 100644
--- a/src/tps2Boltzmann.hpp
+++ b/src/tps2Boltzmann.hpp
@@ -170,6 +170,7 @@ class Tps2Boltzmann {
   double timestep_;
   double currentTime_;
 
+  bool use_h1fec_;
   bool save_to_paraview_dc;
   mfem::ParaViewDataCollection *paraview_dc;
   std::vector<std::string> reaction_eqs_;
diff --git a/test/cyl3d.python.test b/test/cyl3d.python.test
index 8036c1a96..23be6c507 100755
--- a/test/cyl3d.python.test
+++ b/test/cyl3d.python.test
@@ -9,6 +9,8 @@ EXE="../src/tps.py"
 setup() {
     SOLN_FILE=restart_output.sol.h5
     REF_FILE=ref_solns/cyl3d_coarse.4iters.h5
+    rm -rf planeData
+    mkdir planeData
 }
 
 @test "[$TEST] check for input file $RUNFILE" {
@@ -109,7 +111,7 @@ setup() {
     ./soln_differ $SOLN_FILE $REF_FILE
 }
 
-@test "[$TEST] verify serial restart consistency using 2 mpi tasks" {
+@test "[$TEST] verify singleFileWrite restart consistency using 2 mpi tasks" {
     RUNFILE=inputs/input.4iters.cyl.ini
     RUNFILE_PART=${RUNFILE}.part
     SOLN_FILE=restart_output.sol.h5
@@ -136,6 +138,33 @@ setup() {
     rm -f restart_output.sol.*.h5
 }
 
+@test "[$TEST] verify singleFileReadWRite restart consistency using 2 mpi tasks" {
+    RUNFILE=inputs/input.4iters.cyl.ini
+    RUNFILE_PART=${RUNFILE}.part
+    SOLN_FILE=restart_output.sol.h5
+
+    rm -f $RUNFILE_PART
+    rm -f $SOLN_FILE
+
+    # copy the run file, b/c I want to edit it
+    test -s $RUNFILE
+    cp $RUNFILE $RUNFILE_PART
+
+    # request single file restart
+    echo "[io]" >> $RUNFILE_PART
+    echo "restartMode = singleFileReadWrite" >> $RUNFILE_PART
+
+    # run 2 tasks
+    mpirun -np 2 $EXE --runFile $RUNFILE_PART
+    test -s $SOLN_FILE_SERIAL
+
+    ./soln_differ $SOLN_FILE $REF_FILE
+
+    # clean up if we got this far
+    rm -f $RUNFILE_PART
+    rm -f restart_output.sol.*.h5
+}
+
 @test "[$TEST] verify serial restart consistency using 4 mpi tasks" {
     RUNFILE=inputs/input.4iters.cyl.ini
     RUNFILE_PART=${RUNFILE}.part
@@ -194,7 +223,7 @@ setup() {
     cp $RUNFILE $RUNFILE_MOD
     echo "[io]" >> $RUNFILE_MOD
     echo "restartMode   = singleFileRead" >> $RUNFILE_MOD
-    echo "enableRestart = true" >> $RUNFILE_MOD
+    echo "enableRestart = True" >> $RUNFILE_MOD
 
     mpirun -np 4 $EXE --runFile $RUNFILE_MOD
     test -s partition.4p.h5
@@ -256,7 +285,7 @@ setup() {
     cp $RUNFILE $RUNFILE_MOD
     echo "[io]" >> $RUNFILE_MOD
     echo "restartMode   = singleFileWrite" >> $RUNFILE_MOD
-    echo "enableRestart = true" >> $RUNFILE_MOD
+    echo "enableRestart = True" >> $RUNFILE_MOD
 
     mpirun -np 2 $EXE --runFile $RUNFILE_MOD
     test -s partition.2p.h5
diff --git a/test/vpath.sh b/test/vpath.sh
index af1e5eead..1f323d5c2 100755
--- a/test/vpath.sh
+++ b/test/vpath.sh
@@ -29,6 +29,11 @@ if [ ! -d ref_solns ];then
     ln -s $testDir/ref_solns .
 fi
 
+# pytps
+if [ ! -d ../src/pytps ];then
+    ln -s $testDir/../src/pytps ../src/pytps
+fi
+
 # necessary binaries
 binaries="bats die.sh soln_differ count_gpus.sh sniff_mpirun.sh "
 binaries+="../src/tps.py ../src/tps-time-loop.py ../src/tps-bte_0d3v.py ../test/test_tps_splitcomm.py"