From 5b813b9255fcdb7d146e2732ccede1f2ae43ad09 Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Fri, 16 May 2025 19:43:30 -0400
Subject: [PATCH 1/7] =?UTF-8?q?Bump=20version:=200.0.5.dev9=20=E2=86=92=20?=
 =?UTF-8?q?0.0.5.dev10?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 setup.cfg             | 2 +-
 setup.py              | 2 +-
 zephyr_ml/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/setup.cfg b/setup.cfg
index 05eddc8..b5212e3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.5.dev9
+current_version = 0.0.5.dev10
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 9fc7774..2ac9a80 100644
--- a/setup.py
+++ b/setup.py
@@ -124,6 +124,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/zephyr',
-    version='0.0.5.dev9',
+    version='0.0.5.dev10',
     zip_safe=False,
 )
diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py
index 99e651b..6213460 100644
--- a/zephyr_ml/__init__.py
+++ b/zephyr_ml/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = 'MIT Data To AI Lab'
 __email__ = 'dai-lab@mit.edu'
-__version__ = '0.0.5.dev9'
+__version__ = '0.0.5.dev10'
 
 import os
 

From 5bb1e70db60c45906044bb64dc784a44a58176ce Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Sat, 24 May 2025 19:50:53 -0700
Subject: [PATCH 2/7] moved evaluation metrics to separate file

---
 zephyr_ml/core.py                             |  4 +-
 zephyr_ml/primitives/evaluation.py            | 64 +++++++++++++++++++
 ...imitives.evaluation.confusion_matrix.json} |  4 +-
 ...s.evaluation.roc_auc_score_and_curve.json} |  4 +-
 zephyr_ml/primitives/postprocessing.py        | 60 +----------------
 5 files changed, 71 insertions(+), 65 deletions(-)
 create mode 100644 zephyr_ml/primitives/evaluation.py
 rename zephyr_ml/primitives/jsons/{zephyr_ml.primitives.postprocessing.confusion_matrix.json => zephyr_ml.primitives.evaluation.confusion_matrix.json} (85%)
 rename zephyr_ml/primitives/jsons/{zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json => zephyr_ml.primitives.evaluation.roc_auc_score_and_curve.json} (84%)

diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py
index 2e5bb8d..8417207 100644
--- a/zephyr_ml/core.py
+++ b/zephyr_ml/core.py
@@ -21,8 +21,8 @@
     "sklearn.metrics.precision_score",
     "sklearn.metrics.f1_score",
     "sklearn.metrics.recall_score",
-    "zephyr_ml.primitives.postprocessing.confusion_matrix",
-    "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve",
+    "zephyr_ml.primitives.evaluation.confusion_matrix",
+    "zephyr_ml.primitives.evaluation.roc_auc_score_and_curve",
 ]
 
 LOGGER = logging.getLogger(__name__)
diff --git a/zephyr_ml/primitives/evaluation.py b/zephyr_ml/primitives/evaluation.py
new file mode 100644
index 0000000..0f51a59
--- /dev/null
+++ b/zephyr_ml/primitives/evaluation.py
@@ -0,0 +1,64 @@
+"""
+Evaluation metrics
+"""
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn import metrics
+
+
+def confusion_matrix(
+        y_true,
+        y_pred,
+        labels=None,
+        sample_weight=None,
+        normalize=None):
+    conf_matrix = metrics.confusion_matrix(
+        y_true, y_pred, labels=labels, sample_weight=sample_weight, normalize=normalize
+    )
+    fig = plt.figure()
+    ax = fig.add_axes(sns.heatmap(conf_matrix, annot=True, cmap="Blues"))
+
+    ax.set_title("Confusion Matrix\n")
+    ax.set_xlabel("\nPredicted Values")
+    ax.set_ylabel("Actual Values")
+
+    ax.xaxis.set_ticklabels(["False", "True"])
+    ax.yaxis.set_ticklabels(["False", "True"])
+
+    return conf_matrix, fig
+
+
+def roc_auc_score_and_curve(
+    y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True
+):
+    if y_proba.ndim > 1:
+        y_proba = y_proba[:, 1]
+    fpr, tpr, _ = metrics.roc_curve(
+        y_true,
+        y_proba,
+        pos_label=pos_label,
+        sample_weight=sample_weight,
+        drop_intermediate=drop_intermediate,
+    )
+    ns_probs = [0 for _ in range(len(y_true))]
+    ns_fpr, ns_tpr, _ = metrics.roc_curve(
+        y_true,
+        ns_probs,
+        pos_label=pos_label,
+        sample_weight=sample_weight,
+        drop_intermediate=drop_intermediate,
+    )
+
+    auc = metrics.roc_auc_score(y_true, y_proba)
+    fig, ax = plt.subplots(1, 1)
+
+    ax.plot(fpr, tpr, "ro")
+    ax.plot(fpr, tpr)
+    ax.plot(ns_fpr, ns_tpr, linestyle="--", color="green")
+
+    ax.set_ylabel("True Positive Rate")
+    ax.set_xlabel("False Positive Rate")
+    ax.set_title("AUC: %.3f" % auc)
+
+    return auc, fig
diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.confusion_matrix.json
similarity index 85%
rename from zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json
rename to zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.confusion_matrix.json
index 766ca5f..670357c 100644
--- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.confusion_matrix.json
+++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.confusion_matrix.json
@@ -1,12 +1,12 @@
 {
-  "name": "zephyr_ml.primitives.postprocessing.confusion_matrix",
+  "name": "zephyr_ml.primitives.evaluation.confusion_matrix",
   "contributors": ["Raymond Pan <rpan@mit.edu>"],
   "description": "Create and plot confusion matrix.",
   "classifiers": {
     "type": "helper"
   },
   "modalities": [],
-  "primitive": "zephyr_ml.primitives.postprocessing.confusion_matrix",
+  "primitive": "zephyr_ml.primitives.evaluation.confusion_matrix",
   "produce": {
     "args": [
       {
diff --git a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.roc_auc_score_and_curve.json
similarity index 84%
rename from zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json
rename to zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.roc_auc_score_and_curve.json
index 778bde9..2f18120 100644
--- a/zephyr_ml/primitives/jsons/zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve.json
+++ b/zephyr_ml/primitives/jsons/zephyr_ml.primitives.evaluation.roc_auc_score_and_curve.json
@@ -1,12 +1,12 @@
 {
-  "name": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve",
+  "name": "zephyr_ml.primitives.evaluation.roc_auc_score_and_curve",
   "contributors": ["Raymond Pan <rpan@mit.edu>"],
   "description": "Calculate ROC AUC score and plot curve.",
   "classifiers": {
     "type": "helper"
   },
   "modalities": [],
-  "primitive": "zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve",
+  "primitive": "zephyr_ml.primitives.evaluation.roc_auc_score_and_curve",
   "produce": {
     "args": [
       {
diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py
index 2ae0af1..b9621ff 100644
--- a/zephyr_ml/primitives/postprocessing.py
+++ b/zephyr_ml/primitives/postprocessing.py
@@ -4,14 +4,12 @@
 
 import logging
 
-import matplotlib.pyplot as plt
 import numpy as np
-import seaborn as sns
 import sklearn
-from sklearn import metrics
 
 LOGGER = logging.getLogger(__name__)
 
+
 METRICS = {
     "accuracy": sklearn.metrics.accuracy_score,
     "precision": sklearn.metrics.precision_score,
@@ -85,59 +83,3 @@ def apply_threshold(self, y_proba):
         binary = [1 if x else 0 for x in y_proba > self._threshold]
         return binary, self._threshold, self._scores
 
-
-def confusion_matrix(
-        y_true,
-        y_pred,
-        labels=None,
-        sample_weight=None,
-        normalize=None):
-    conf_matrix = metrics.confusion_matrix(
-        y_true, y_pred, labels=labels, sample_weight=sample_weight, normalize=normalize
-    )
-    fig = plt.figure()
-    ax = fig.add_axes(sns.heatmap(conf_matrix, annot=True, cmap="Blues"))
-
-    ax.set_title("Confusion Matrix\n")
-    ax.set_xlabel("\nPredicted Values")
-    ax.set_ylabel("Actual Values")
-
-    ax.xaxis.set_ticklabels(["False", "True"])
-    ax.yaxis.set_ticklabels(["False", "True"])
-
-    return conf_matrix, fig
-
-
-def roc_auc_score_and_curve(
-    y_true, y_proba, pos_label=None, sample_weight=None, drop_intermediate=True
-):
-    if y_proba.ndim > 1:
-        y_proba = y_proba[:, 1]
-    fpr, tpr, _ = metrics.roc_curve(
-        y_true,
-        y_proba,
-        pos_label=pos_label,
-        sample_weight=sample_weight,
-        drop_intermediate=drop_intermediate,
-    )
-    ns_probs = [0 for _ in range(len(y_true))]
-    ns_fpr, ns_tpr, _ = metrics.roc_curve(
-        y_true,
-        ns_probs,
-        pos_label=pos_label,
-        sample_weight=sample_weight,
-        drop_intermediate=drop_intermediate,
-    )
-
-    auc = metrics.roc_auc_score(y_true, y_proba)
-    fig, ax = plt.subplots(1, 1)
-
-    ax.plot(fpr, tpr, "ro")
-    ax.plot(fpr, tpr)
-    ax.plot(ns_fpr, ns_tpr, linestyle="--", color="green")
-
-    ax.set_ylabel("True Positive Rate")
-    ax.set_xlabel("False Positive Rate")
-    ax.set_title("AUC: %.3f" % auc)
-
-    return auc, fig

From a68fbabae6083c26b085fc8c40e737b1dedc1ab3 Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Sun, 25 May 2025 13:43:36 -0700
Subject: [PATCH 3/7] lint

---
 zephyr_ml/primitives/postprocessing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/zephyr_ml/primitives/postprocessing.py b/zephyr_ml/primitives/postprocessing.py
index b9621ff..f1ce832 100644
--- a/zephyr_ml/primitives/postprocessing.py
+++ b/zephyr_ml/primitives/postprocessing.py
@@ -82,4 +82,3 @@ def apply_threshold(self, y_proba):
 
         binary = [1 if x else 0 for x in y_proba > self._threshold]
         return binary, self._threshold, self._scores
-

From a14469b5127be6bdf5af9b1831bda773b1f8b844 Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Mon, 26 May 2025 23:22:12 -0700
Subject: [PATCH 4/7] error handling

---
 zephyr_ml/core.py | 87 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 16 deletions(-)

diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py
index 8417207..1c70fc8 100644
--- a/zephyr_ml/core.py
+++ b/zephyr_ml/core.py
@@ -125,7 +125,7 @@ def try_log_backwards_set_method_warning(self, name, next_step):
                         f"\tAll other steps' results will be considered stale."))
 
     def try_log_backwards_key_method_warning(self, name, next_step):
-        steps_in_between = self.get_steps_in_between(next_step, self.current_step+1)
+        steps_in_between = self.get_steps_in_between(next_step, self.current_step + 1)
         if len(steps_in_between) > 0:
             steps_in_between_str = (f"\tAny results produced by the following steps "
                                     f"will be considered stale:\n"
@@ -353,7 +353,7 @@ def __init__(self):
                 [self.get_train_test_split]),
             ([self.fit_pipeline], [self.set_fitted_pipeline], [self.get_fitted_pipeline]),
             ([self.predict, self.evaluate], [], [])
-            ]
+        ]
         self._guide_handler = GuideHandler(step_order)
 
     def GET_ENTITYSET_TYPES(self):
@@ -553,7 +553,9 @@ def generate_label_times(
             AssertionError: If entityset has not been generated or set or labeling_fn is
                 not a string and not callable.
         """
-        assert self._entityset is not None, "entityset has not been set"
+
+        if self._entityset is None:
+            raise ValueError("entityset has not been set")
 
         if isinstance(labeling_fn, str):  # get predefined labeling function
             labeling_fn_map = get_labeling_functions_map()
@@ -630,6 +632,9 @@ def get_label_times(self, visualize=False):
         Returns:
             tuple: (composeml.LabelTimes, dict) The label times and metadata.
         """
+        if self._label_times is None:
+            raise ValueError("Label times have not been set"
+                             "Call generate_label_times or set_label_times first.")
         if visualize:
             cp.label_times.plots.LabelPlots(self._label_times).distribution()
         return self._label_times, self._label_times_meta
@@ -724,7 +729,20 @@ def generate_feature_matrix(
         Returns:
             tuple: (pd.DataFrame, list, featuretools.EntitySet)
                 Feature matrix, feature definitions, and the processed entityset.
+
+        Raises:
+            ValueError: If required attributes are missing.
         """
+        if self._entityset is None:
+            raise ValueError(
+                "Entityset has not been set. Call generate_entityset or "
+                "set_entityset first.")
+
+        if self._label_times is None:
+            raise ValueError(
+                "Label times have not been set. Call generate_label_times or "
+                "set_label_times first.")
+
         entityset_copy = copy.deepcopy(self._entityset)
         # perform signal processing
         if signal_dataframe_name is not None and signal_column is not None:
@@ -784,6 +802,9 @@ def get_feature_matrix(self):
             tuple: (pd.DataFrame, str, list) The feature matrix, label column name,
                 and feature definitions.
         """
+        if self._feature_matrix is None:
+            raise ValueError("Feature matrix has not been generated. "
+                             "Call generate_feature_matrix or set_feature_matrix first.")
         return self._feature_matrix, self._label_col_name, self._features
 
     @guide
@@ -830,6 +851,11 @@ def generate_train_test_split(
         Returns:
             tuple: (X_train, X_test, y_train, y_test) The split feature matrices and labels.
         """
+        if self._feature_matrix is None:
+            raise ValueError(
+                "Feature matrix has not been generated. Call generate_feature_matrix "
+                "or set_feature_matrix first.")
+
         feature_matrix = self._feature_matrix.copy()
         labels = feature_matrix.pop(self._label_col_name)
 
@@ -880,7 +906,9 @@ def get_train_test_split(self):
         """
         if (self._X_train is None or self._X_test is None or
                 self._y_train is None or self._y_test is None):
-            return None
+            raise ValueError(
+                "Train-test split has not been generated. "
+                "Call generate_train_test_split or set_train_test_split first.")
         return self._X_train, self._X_test, self._y_train, self._y_test
 
     @guide
@@ -894,8 +922,8 @@ def set_fitted_pipeline(self, pipeline):
 
     @guide
     def fit_pipeline(
-            self, pipeline="xgb_classifier", pipeline_hyperparameters=None,
-            X=None, y=None, visual=False, **kwargs):
+            self, pipeline="xgb_classifier",
+            pipeline_hyperparameters=None, visual=False, **kwargs):
         """Fit a machine learning pipeline.
 
         Args:
@@ -905,28 +933,29 @@ def fit_pipeline(
                 - Dictionary with pipeline specification
                 - MLPipeline instance
             pipeline_hyperparameters (dict, optional): Hyperparameters for the pipeline.
-            X (pd.DataFrame, optional): Training features. If None, uses stored training set.
-            y (array-like, optional): Training labels. If None, uses stored training labels.
             visual (bool, optional): Whether to return visualization data. Defaults to False.
             **kwargs: Additional arguments passed to the pipeline's fit method.
 
         Returns:
             dict or None: If visual=True, returns visualization data dictionary.
+
+        Raises:
+            ValueError: If required attributes are missing.
         """
-        self._pipeline = self._get_mlpipeline(
-            pipeline, pipeline_hyperparameters)
+        if self._X_train is None or self._y_train is None:
+            raise ValueError(
+                "No training data provided. Call generate_train_test_split "
+                "or set_train_test_split first.")
 
-        if X is None:
-            X = self._X_train
-        if y is None:
-            y = self._y_train
+        self._pipeline = self._get_mlpipeline(pipeline, pipeline_hyperparameters)
 
         if visual:
             outputs_spec, visual_names = self._get_outputs_spec(False)
         else:
             outputs_spec = None
 
-        outputs = self._pipeline.fit(X, y, output_=outputs_spec, **kwargs)
+        outputs = self._pipeline.fit(X=self._X_train, y=self._y_train,
+                                     output_=outputs_spec, **kwargs)
 
         if visual and outputs is not None:
             return dict(zip(visual_names, outputs))
@@ -951,9 +980,22 @@ def predict(self, X=None, visual=False, **kwargs):
 
         Returns:
             array-like or tuple: Predictions, and if visual=True, also returns visualization data.
+
+        Raises:
+            ValueError: If required attributes or parameters are missing.
         """
-        if X is None:
+        if self._pipeline is None:
+            raise ValueError(
+                "No pipeline has been fitted. Call fit_pipeline or set_fitted_pipeline first.")
+
+        if X is None and self._X_test is None:
+            raise ValueError(
+                "No test data provided. Pass in test data or "
+                "call generate_train_test_split or set_train_test_split first.")
+
+        elif X is None:
             X = self._X_test
+
         if visual:
             outputs_spec, visual_names = self._get_outputs_spec()
         else:
@@ -984,9 +1026,22 @@ def evaluate(
 
         Returns:
             dict: A dictionary mapping metric names to their computed values.
+
+        Raises:
+            ValueError: If required attributes are missing.
         """
+        if self._pipeline is None:
+            raise ValueError(
+                "No pipeline has been fitted. Call fit_pipeline or set_fitted_pipeline first.")
+
+        if (X is None and self._X_test is None) or (y is None and self._y_test is None):
+            raise ValueError(
+                "No test data provided. Pass in test data or "
+                "call generate_train_test_split or set_train_test_split first.")
+
         if X is None:
             X = self._X_test
+
         if y is None:
             y = self._y_test
 

From ad9a1173319e07bde1f5a78fc76ac2b83c42e226 Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Sun, 1 Jun 2025 15:07:07 -0700
Subject: [PATCH 5/7] privatize other code

---
 tests/labeling/test_data_labeler.py           |   2 +-
 tests/labeling/test_helpers.py                |   2 +-
 tests/test_entityset.py                       |   6 +-
 tests/test_feature_engineering.py             |  24 +-
 tests/test_guide.py                           |   2 +-
 tests/test_metadata.py                        |   2 +-
 zephyr_ml/__init__.py                         |   2 -
 zephyr_ml/{entityset.py => _entityset.py}     |   4 +-
 ...engineering.py => _feature_engineering.py} |   0
 zephyr_ml/_guide_handler.py                   | 296 +++++++++++++++++
 zephyr_ml/{labeling => _labeling}/__init__.py |   6 +-
 .../{labeling => _labeling}/data_labeler.py   |   0
 .../_labeling/labeling_functions/__init__.py  |   5 +
 .../labeling_functions/brake_pad_presence.py  |   2 +-
 .../converter_replacement_presence.py         |   2 +-
 .../labeling_functions/planet_bearing.py      |   2 +-
 .../labeling_functions/total_power_loss.py    |   2 +-
 zephyr_ml/{labeling => _labeling}/utils.py    |   0
 zephyr_ml/{metadata.py => _metadata.py}       |   0
 zephyr_ml/core.py                             | 301 +-----------------
 .../labeling/labeling_functions/__init__.py   |   5 -
 21 files changed, 334 insertions(+), 331 deletions(-)
 rename zephyr_ml/{entityset.py => _entityset.py} (98%)
 rename zephyr_ml/{feature_engineering.py => _feature_engineering.py} (100%)
 create mode 100644 zephyr_ml/_guide_handler.py
 rename zephyr_ml/{labeling => _labeling}/__init__.py (88%)
 rename zephyr_ml/{labeling => _labeling}/data_labeler.py (100%)
 create mode 100644 zephyr_ml/_labeling/labeling_functions/__init__.py
 rename zephyr_ml/{labeling => _labeling}/labeling_functions/brake_pad_presence.py (97%)
 rename zephyr_ml/{labeling => _labeling}/labeling_functions/converter_replacement_presence.py (97%)
 rename zephyr_ml/{labeling => _labeling}/labeling_functions/planet_bearing.py (97%)
 rename zephyr_ml/{labeling => _labeling}/labeling_functions/total_power_loss.py (96%)
 rename zephyr_ml/{labeling => _labeling}/utils.py (100%)
 rename zephyr_ml/{metadata.py => _metadata.py} (100%)
 delete mode 100644 zephyr_ml/labeling/labeling_functions/__init__.py

diff --git a/tests/labeling/test_data_labeler.py b/tests/labeling/test_data_labeler.py
index a0176ff..ed36772 100644
--- a/tests/labeling/test_data_labeler.py
+++ b/tests/labeling/test_data_labeler.py
@@ -2,7 +2,7 @@
 
 import featuretools as ft
 
-from zephyr_ml.labeling import DataLabeler
+from zephyr_ml._labeling.data_labeler import DataLabeler
 
 
 class TestDataLabeler:
diff --git a/tests/labeling/test_helpers.py b/tests/labeling/test_helpers.py
index a43ca9b..95fdd38 100644
--- a/tests/labeling/test_helpers.py
+++ b/tests/labeling/test_helpers.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 
-from zephyr_ml.labeling.utils import (
+from zephyr_ml._labeling.utils import (
     aggregate_by_column, categorical_presence, denormalize, greater_than, keyword_in_text,
     merge_binary_labeling_functions, total_duration)
 
diff --git a/tests/test_entityset.py b/tests/test_entityset.py
index ab3ec76..580fb03 100644
--- a/tests/test_entityset.py
+++ b/tests/test_entityset.py
@@ -1,7 +1,7 @@
 import pandas as pd
 import pytest
 
-from zephyr_ml import _create_entityset
+from zephyr_ml._entityset import create_entityset
 
 
 @pytest.fixture
@@ -120,11 +120,11 @@ def scada_dfs(base_dfs):
 
 
 def create_pidata_entityset(pidata_dfs):
-    return _create_entityset(pidata_dfs, es_type="pidata")
+    return create_entityset(pidata_dfs, es_type="pidata")
 
 
 def create_scada_entityset(scada_dfs):
-    return _create_entityset(scada_dfs, es_type="scada")
+    return create_entityset(scada_dfs, es_type="scada")
 
 
 def test_create_pidata_missing_entities(pidata_dfs):
diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py
index e4c08ba..fbc6e54 100644
--- a/tests/test_feature_engineering.py
+++ b/tests/test_feature_engineering.py
@@ -1,8 +1,8 @@
 import pandas as pd
 import pytest
 
-from zephyr_ml import _create_entityset
-from zephyr_ml.feature_engineering import process_signals
+from zephyr_ml._entityset import create_entityset
+from zephyr_ml._feature_engineering import process_signals
 
 
 @pytest.fixture
@@ -122,12 +122,12 @@ def scada_dfs(base_dfs):
 
 @pytest.fixture
 def pidata_es(pidata_dfs):
-    return _create_entityset(pidata_dfs, "pidata")
+    return create_entityset(pidata_dfs, "pidata")
 
 
 @pytest.fixture
 def scada_es(scada_dfs):
-    return _create_entityset(scada_dfs, "scada")
+    return create_entityset(scada_dfs, "scada")
 
 
 @pytest.fixture
@@ -153,8 +153,8 @@ def test_process_signals_pidata(pidata_es, transformations, aggregations):
     replace_dataframe = False
     before = pidata_es['pidata'].copy()
 
-    process_signals(pidata_es, signal_dataframe_name, signal_column, transformations, aggregations,
-                    window_size, replace_dataframe)
+    process_signals(pidata_es, signal_dataframe_name, signal_column, transformations,
+                    aggregations, window_size, replace_dataframe)
 
     processed = pidata_es['pidata_processed'].copy()
     after = pidata_es['pidata'].copy()
@@ -189,8 +189,8 @@ def test_process_signals_pidata_replace(
     window_size = '1m'
     replace_dataframe = True
 
-    process_signals(pidata_es, signal_dataframe_name, signal_column, transformations, aggregations,
-                    window_size, replace_dataframe)
+    process_signals(pidata_es, signal_dataframe_name, signal_column, transformations,
+                    aggregations, window_size, replace_dataframe)
 
     processed = pidata_es['pidata'].copy()
 
@@ -224,8 +224,8 @@ def test_process_signals_scada(scada_es, transformations, aggregations):
     replace_dataframe = False
     before = scada_es['scada'].copy()
 
-    process_signals(scada_es, signal_dataframe_name, signal_column, transformations, aggregations,
-                    window_size, replace_dataframe)
+    process_signals(scada_es, signal_dataframe_name, signal_column, transformations,
+                    aggregations, window_size, replace_dataframe)
 
     expected = pd.DataFrame({
         "_index": [0, 1, 2],
@@ -256,8 +256,8 @@ def test_process_signals_scada_replace(
     window_size = '1m'
     replace_dataframe = True
 
-    process_signals(scada_es, signal_dataframe_name, signal_column, transformations, aggregations,
-                    window_size, replace_dataframe)
+    process_signals(scada_es, signal_dataframe_name, signal_column, transformations,
+                    aggregations, window_size, replace_dataframe)
 
     expected = pd.DataFrame({
         "_index": [0, 1, 2],
diff --git a/tests/test_guide.py b/tests/test_guide.py
index e0dcabc..db54bef 100644
--- a/tests/test_guide.py
+++ b/tests/test_guide.py
@@ -1,4 +1,4 @@
-from zephyr_ml.core import GuideHandler, guide
+from zephyr_ml._guide_handler import GuideHandler, guide
 
 
 class DummyObject:
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 8d8f923..6dd6f93 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -1,6 +1,6 @@
 import pytest
 
-from zephyr_ml.metadata import DEFAULT_ES_KWARGS, DEFAULT_ES_TYPE_KWARGS, get_mapped_kwargs
+from zephyr_ml._metadata import DEFAULT_ES_KWARGS, DEFAULT_ES_TYPE_KWARGS, get_mapped_kwargs
 
 
 def test_default_scada_mapped_kwargs():
diff --git a/zephyr_ml/__init__.py b/zephyr_ml/__init__.py
index 6213460..3e633d1 100644
--- a/zephyr_ml/__init__.py
+++ b/zephyr_ml/__init__.py
@@ -9,8 +9,6 @@
 import os
 
 from zephyr_ml.core import Zephyr
-from zephyr_ml.entityset import VALIDATE_DATA_FUNCTIONS, _create_entityset
-from zephyr_ml.labeling import DataLabeler
 
 MLBLOCKS_PRIMITIVES = os.path.join(
     os.path.dirname(__file__), "primitives", "jsons")
diff --git a/zephyr_ml/entityset.py b/zephyr_ml/_entityset.py
similarity index 98%
rename from zephyr_ml/entityset.py
rename to zephyr_ml/_entityset.py
index 514c325..8b3e4db 100644
--- a/zephyr_ml/entityset.py
+++ b/zephyr_ml/_entityset.py
@@ -2,7 +2,7 @@
 
 import featuretools as ft
 
-from zephyr_ml.metadata import get_mapped_kwargs
+from zephyr_ml._metadata import get_mapped_kwargs
 
 
 def _validate_data(dfs, es_type, es_kwargs):
@@ -159,7 +159,7 @@ def validate_vibrations_data(dfs, new_kwargs_mapping=None):
 }
 
 
-def _create_entityset(entities, es_type, new_kwargs_mapping=None):
+def create_entityset(entities, es_type, new_kwargs_mapping=None):
 
     validate_func = VALIDATE_DATA_FUNCTIONS[es_type]
     es_kwargs = validate_func(entities, new_kwargs_mapping)
diff --git a/zephyr_ml/feature_engineering.py b/zephyr_ml/_feature_engineering.py
similarity index 100%
rename from zephyr_ml/feature_engineering.py
rename to zephyr_ml/_feature_engineering.py
diff --git a/zephyr_ml/_guide_handler.py b/zephyr_ml/_guide_handler.py
new file mode 100644
index 0000000..995217b
--- /dev/null
+++ b/zephyr_ml/_guide_handler.py
@@ -0,0 +1,296 @@
+import logging
+from functools import wraps
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+class GuideHandler:
+
+    def __init__(self, ordered_steps):
+        self.cur_iteration = 0
+        self.current_step = -1
+        self.start_point = -1
+        self.ordered_steps = ordered_steps
+        self.set_methods = set()
+
+        self.producer_to_step_map = {}
+        self.getter_to_step_map = {}
+
+        self.iterations = []
+        for idx, (keys, sets, gets) in enumerate(self.ordered_steps):
+            self.iterations.append(-1)
+
+            for prod in keys:
+                self.producer_to_step_map[prod.__name__] = idx
+            for prod in sets:
+                self.producer_to_step_map[prod.__name__] = idx
+                self.set_methods.add(prod.__name__)
+
+            for get in gets:
+                self.getter_to_step_map[get.__name__] = idx
+
+    def or_join(self, methods):
+        return " or ".join([method.__name__ for method in methods])
+
+    def get_get_steps_in_between(self, cur_step, next_step):
+        step_strs = []
+        for step in range(cur_step + 1, next_step):
+            step_strs.append(
+                f"{step} {self.or_join(self.ordered_steps[step][2])}")
+        return step_strs
+
+    def get_last_up_to_date(self, next_step):
+        latest_up_to_date = 0
+        for step in range(next_step):
+            if self.iterations[step] == self.cur_iteration:
+                latest_up_to_date = step
+        return latest_up_to_date
+
+    def join_steps(self, step_strs):
+        return "\n\t".join(step_strs)
+
+    def get_steps_in_between(self, cur_step, next_step):
+        step_strs = []
+        for step in range(cur_step + 1, next_step):
+            option_strs = []
+            option_strs.extend(self.ordered_steps[step][0])
+            option_strs.extend(self.ordered_steps[step][1])
+            step_strs.append(f"{step}. {self.or_join(option_strs)}")
+        return step_strs
+
+    def log_next_producer_step(self, name):
+        next_step = self.current_step + 1
+
+        if next_step >= len(self.ordered_steps):
+            cur_step_name = self.or_join(self.ordered_steps[self.current_step][0])
+            LOGGER.warning((f"[GUIDE] DONE: {name}.\n"
+                            f"\tYou have reached the end of the "
+                            f"predictive engineering workflow.\n"
+                            f"\tYou can call {cur_step_name} again or re-perform previous steps "
+                            f"based on results."))
+        else:
+            next_step_name = self.or_join(self.ordered_steps[next_step][0])
+            LOGGER.warning(f"[GUIDE] DONE: {name}.\n"
+                           f"\tYou can perform the next step by calling {next_step_name}.")
+
+    def perform_producer_step(self, zephyr, method,
+                              *method_args, **method_kwargs):
+        step_num = self.producer_to_step_map[method.__name__]
+        res = method(zephyr, *method_args, **method_kwargs)
+        self.current_step = step_num
+        self.iterations[step_num] = self.cur_iteration
+        self.log_next_producer_step(method.__name__)
+        return res
+
+    def try_log_forward_set_method_warning(self, name, next_step):
+        if self.current_step != -1:
+            from_str = (f"Going from step {self.current_step} to "
+                        f"step {next_step} by performing {name}.")
+        else:
+            from_str = (f"Performing step {next_step} with {name}.")
+        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
+                        f"\t{from_str}\n"
+                        f"\tThis is a forward step via a set method.\n"
+                        f"\tAll previous steps' results will be considered stale."))
+
+    def try_log_backwards_set_method_warning(self, name, next_step):
+        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
+                        f"\tGoing from step {self.current_step} to "
+                        f"step {next_step} by performing {name}.\n"
+                        f"\tThis is a backwards step via a set method.\n"
+                        f"\tAll other steps' results will be considered stale."))
+
+    def try_log_backwards_key_method_warning(self, name, next_step):
+        steps_in_between = self.get_steps_in_between(next_step, self.current_step + 1)
+        if len(steps_in_between) > 0:
+            steps_in_between_str = (f"\tAny results produced by the following steps "
+                                    f"will be considered stale:\n"
+                                    f"\t{self.join_steps(steps_in_between)}")
+        else:
+            steps_in_between_str = ""
+
+        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
+                        f"\tGoing from step {self.current_step} to "
+                        f"step {next_step} by performing {name}.\n"
+                        f"\tThis is a backwards step via a key method.\n"
+                        f"{steps_in_between_str}"))
+
+    def log_get_inconsistent_warning(self, name, next_step):
+        prod_steps_str = self.or_join(self.ordered_steps[next_step][0])
+        prod_steps = f"{next_step}.{prod_steps_str}"
+        latest_up_to_date = self.get_last_up_to_date(next_step)
+        LOGGER.warning((f"[GUIDE] INCONSISTENCY WARNING: {name}.\n"
+                        f"Unable to perform {name} because"
+                        f"{prod_steps} has not been run yet.\n"
+                        f"Run steps starting at or before {latest_up_to_date}."))
+
+    def log_get_stale_warning(self, name, next_step):
+        latest_up_to_date = self.get_last_up_to_date(next_step)
+        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
+                        f"This data is potentially stale.\n"
+                        f"Re-run steps starting at or before {latest_up_to_date}"
+                        f"to ensure data is up to date."))
+
+    # tries to perform step if possible -> warns that data might be stale
+
+    def try_perform_forward_producer_step(
+            self, zephyr, method, *method_args, **method_kwargs):
+        name = method.__name__
+        next_step = self.producer_to_step_map[name]
+        if name in self.set_methods:  # set method will update start point and start new iteration
+            self.try_log_forward_set_method_warning(name, next_step)
+            self.start_point = next_step
+            self.cur_iteration += 1
+        # next_step == 0, set method (already warned), or previous step is up
+        # to term
+        res = self.perform_producer_step(
+            zephyr, method, *method_args, **method_kwargs)
+        return res
+
+    def try_perform_backward_producer_step(
+            self, zephyr, method, *method_args, **method_kwargs):
+        name = method.__name__
+        next_step = self.producer_to_step_map[name]
+        # starting new iteration
+        self.cur_iteration += 1
+        if next_step == 0 or name in self.set_methods:
+            self.start_point = next_step
+        else:  # key method
+            # mark everything from start point to next step as current term
+            for i in range(self.start_point, next_step):
+                if self.iterations[i] != -1:
+                    self.iterations[i] = self.cur_iteration
+
+        if name in self.set_methods:
+            self.try_log_backwards_set_method_warning(name, next_step)
+        else:
+            self.try_log_backwards_key_method_warning(name, next_step)
+
+        res = self.perform_producer_step(
+            zephyr, method, *method_args, **method_kwargs)
+
+        return res
+
+    def try_perform_producer_step(
+            self, zephyr, method, *method_args, **method_kwargs):
+        name = method.__name__
+        next_step = self.producer_to_step_map[name]
+        if next_step >= self.current_step:
+            res = self.try_perform_forward_producer_step(
+                zephyr, method, *method_args, **method_kwargs)
+            return res
+        else:
+            res = self.try_perform_backward_producer_step(
+                zephyr, method, *method_args, **method_kwargs)
+            return res
+
+    # dont update current step or terms
+
+    def try_perform_inconsistent_producer_step(  # add using stale and overwriting
+            self, zephyr, method, *method_args, **method_kwargs):
+        name = method.__name__
+        next_step = self.producer_to_step_map[name]
+        # inconsistent forward step: performing key method but previous step is
+        # not up to date
+        if (next_step >= self.current_step and
+                self.iterations[next_step - 1] != self.cur_iteration):
+            prev_step = next_step - 1
+            prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
+            prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
+            if next_step == len(self.ordered_steps) - 1:
+                final_text = (f"\tOtherwise, you can regenerate the data of the previous "
+                              f"step by calling {prev_key_method}, and then call {name} again.")
+            else:
+                corr_set_method = self.or_join(self.ordered_steps[next_step][1])
+                final_text = (f"\tIf you already have the data for THIS step, you can use "
+                              f"{corr_set_method} to set the data.\n"
+                              f"\tOtherwise, you can regenerate the data of the "
+                              f"previous step by calling {prev_key_method}, "
+                              f"and then call {name} again.")
+            LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: {name}\n"
+                           f"\tUnable to perform {name} because you are "
+                           f"performing a key method at step {next_step} but the result of the "
+                           f"previous step, step {prev_step}, is stale.\n"
+                           f"\tIf you want to use the stale result or "
+                           f"already have the data for step {prev_step}, you can use "
+                           f"{prev_set_method} to set the data.\n"
+                           f"{final_text}")
+        elif (next_step < self.current_step and
+              self.iterations[next_step - 1] != self.cur_iteration):
+            prev_step = next_step - 1
+            prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
+            prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
+
+            if next_step == len(self.ordered_steps) - 1:
+                final_text = (f"\tOtherwise, you can regenerate the data of the previous "
+                              f"step by calling {prev_key_method}, and then call {name} again.")
+            else:
+                corr_set_method = self.or_join(self.ordered_steps[next_step][1])
+                final_text = (f"\tIf you already have the data for THIS step, you can use "
+                              f"{corr_set_method} to set the data.\n"
+                              f"\tOtherwise, you can regenerate the data of the "
+                              f"previous step by calling {prev_key_method}, "
+                              f"and then call {name} again.")
+            LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: {name}\n"
+                           f"\tUnable to perform {name} because "
+                           f"you are going backwards and starting a new iteration by "
+                           f"performing a key method at step {next_step} but the result of the "
+                           f"previous step, step {prev_step}, is STALE.\n"
+                           f"\tIf you want to use the STALE result or "
+                           f"already have the data for step {prev_step}, you can use "
+                           f"{prev_set_method} to set the data.\n"
+                           f"{final_text}")
+
+    def try_perform_getter_step(
+            self, zephyr, method, *method_args, **method_kwargs):
+        name = method.__name__
+        # either inconsistent, stale, or up to date
+        step_num = self.getter_to_step_map[name]
+        step_iteration = self.iterations[step_num]
+        if step_iteration == -1:
+            self.log_get_inconsistent_warning(name, step_num)
+        elif step_iteration == self.cur_iteration:
+            res = method(zephyr, *method_args, **method_kwargs)
+            return res
+        else:
+            self.log_get_stale_warning(name, step_num)
+            res = method(zephyr, *method_args, **method_kwargs)
+            return res
+
+    def guide_step(self, zephyr, method, *method_args, **method_kwargs):
+        method_name = method.__name__
+        if method_name in self.producer_to_step_map:
+            # up-todate
+            next_step = self.producer_to_step_map[method_name]
+            if (next_step == 0 or  # 0 step always valid, starting new iteration
+                # set method always valid, but will update start point and
+                # start new iteration
+                method_name in self.set_methods or
+                    # key method valid if previous step is up to date
+                    self.iterations[next_step - 1] == self.cur_iteration):
+                # forward step only valid if set method or key method w/ no
+                # skips
+                res = self.try_perform_producer_step(
+                    zephyr, method, *method_args, **method_kwargs)
+                return res
+            else:  # stale or inconsistent
+                res = self.try_perform_inconsistent_producer_step(
+                    zephyr, method, *method_args, **method_kwargs)
+                return res
+        elif method_name in self.getter_to_step_map:
+            res = self.try_perform_getter_step(
+                zephyr, method, *method_args, **method_kwargs)
+            return res
+        else:
+            print(f"Method {method_name} does not need to be wrapped")
+
+
+def guide(method):
+
+    @wraps(method)
+    def guided_step(instance, *method_args, **method_kwargs):
+        return instance._guide_handler.guide_step(
+            instance, method, *method_args, **method_kwargs)
+
+    return guided_step
diff --git a/zephyr_ml/labeling/__init__.py b/zephyr_ml/_labeling/__init__.py
similarity index 88%
rename from zephyr_ml/labeling/__init__.py
rename to zephyr_ml/_labeling/__init__.py
index 60cb5d7..4a8c9bb 100644
--- a/zephyr_ml/labeling/__init__.py
+++ b/zephyr_ml/_labeling/__init__.py
@@ -1,6 +1,6 @@
-from zephyr_ml.labeling import utils
-from zephyr_ml.labeling.data_labeler import DataLabeler
-from zephyr_ml.labeling.labeling_functions import (
+from zephyr_ml._labeling import utils
+from zephyr_ml._labeling.data_labeler import DataLabeler
+from zephyr_ml._labeling.labeling_functions import (
     brake_pad_presence, converter_replacement_presence, gearbox_replace_presence, total_power_loss)
 
 LABELING_FUNCTIONS = [
diff --git a/zephyr_ml/labeling/data_labeler.py b/zephyr_ml/_labeling/data_labeler.py
similarity index 100%
rename from zephyr_ml/labeling/data_labeler.py
rename to zephyr_ml/_labeling/data_labeler.py
diff --git a/zephyr_ml/_labeling/labeling_functions/__init__.py b/zephyr_ml/_labeling/labeling_functions/__init__.py
new file mode 100644
index 0000000..241cbe1
--- /dev/null
+++ b/zephyr_ml/_labeling/labeling_functions/__init__.py
@@ -0,0 +1,5 @@
+from zephyr_ml._labeling.labeling_functions.brake_pad_presence import brake_pad_presence
+from zephyr_ml._labeling.labeling_functions.converter_replacement_presence import (
+    converter_replacement_presence)
+from zephyr_ml._labeling.labeling_functions.planet_bearing import gearbox_replace_presence
+from zephyr_ml._labeling.labeling_functions.total_power_loss import total_power_loss
diff --git a/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py b/zephyr_ml/_labeling/labeling_functions/brake_pad_presence.py
similarity index 97%
rename from zephyr_ml/labeling/labeling_functions/brake_pad_presence.py
rename to zephyr_ml/_labeling/labeling_functions/brake_pad_presence.py
index fe32c33..3d73b33 100644
--- a/zephyr_ml/labeling/labeling_functions/brake_pad_presence.py
+++ b/zephyr_ml/_labeling/labeling_functions/brake_pad_presence.py
@@ -1,4 +1,4 @@
-from zephyr_ml.labeling.utils import denormalize
+from zephyr_ml._labeling.utils import denormalize
 
 
 def brake_pad_presence(es, column_map={}):
diff --git a/zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py b/zephyr_ml/_labeling/labeling_functions/converter_replacement_presence.py
similarity index 97%
rename from zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py
rename to zephyr_ml/_labeling/labeling_functions/converter_replacement_presence.py
index eef8bd8..11884ce 100644
--- a/zephyr_ml/labeling/labeling_functions/converter_replacement_presence.py
+++ b/zephyr_ml/_labeling/labeling_functions/converter_replacement_presence.py
@@ -1,4 +1,4 @@
-from zephyr_ml.labeling.utils import denormalize
+from zephyr_ml._labeling.utils import denormalize
 
 
 def converter_replacement_presence(es, column_map={}):
diff --git a/zephyr_ml/labeling/labeling_functions/planet_bearing.py b/zephyr_ml/_labeling/labeling_functions/planet_bearing.py
similarity index 97%
rename from zephyr_ml/labeling/labeling_functions/planet_bearing.py
rename to zephyr_ml/_labeling/labeling_functions/planet_bearing.py
index 36a5412..fdd07af 100644
--- a/zephyr_ml/labeling/labeling_functions/planet_bearing.py
+++ b/zephyr_ml/_labeling/labeling_functions/planet_bearing.py
@@ -1,4 +1,4 @@
-from zephyr_ml.labeling.utils import denormalize
+from zephyr_ml._labeling.utils import denormalize
 
 
 def gearbox_replace_presence(es, column_map={}):
diff --git a/zephyr_ml/labeling/labeling_functions/total_power_loss.py b/zephyr_ml/_labeling/labeling_functions/total_power_loss.py
similarity index 96%
rename from zephyr_ml/labeling/labeling_functions/total_power_loss.py
rename to zephyr_ml/_labeling/labeling_functions/total_power_loss.py
index a866a05..343b17b 100644
--- a/zephyr_ml/labeling/labeling_functions/total_power_loss.py
+++ b/zephyr_ml/_labeling/labeling_functions/total_power_loss.py
@@ -1,5 +1,5 @@
 
-from zephyr_ml.labeling.utils import denormalize
+from zephyr_ml._labeling.utils import denormalize
 
 
 def total_power_loss(es, column_map={}):
diff --git a/zephyr_ml/labeling/utils.py b/zephyr_ml/_labeling/utils.py
similarity index 100%
rename from zephyr_ml/labeling/utils.py
rename to zephyr_ml/_labeling/utils.py
diff --git a/zephyr_ml/metadata.py b/zephyr_ml/_metadata.py
similarity index 100%
rename from zephyr_ml/metadata.py
rename to zephyr_ml/_metadata.py
diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py
index 1c70fc8..ffaff68 100644
--- a/zephyr_ml/core.py
+++ b/zephyr_ml/core.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import os
-from functools import wraps
 from inspect import getfullargspec
 
 import composeml as cp
@@ -12,9 +11,10 @@
 from mlblocks import MLBlock, MLPipeline
 from sklearn.model_selection import train_test_split
 
-from zephyr_ml.entityset import VALIDATE_DATA_FUNCTIONS, _create_entityset
-from zephyr_ml.feature_engineering import process_signals
-from zephyr_ml.labeling import get_labeling_functions, get_labeling_functions_map
+from zephyr_ml._entityset import create_entityset, VALIDATE_DATA_FUNCTIONS
+from zephyr_ml._feature_engineering import process_signals
+from zephyr_ml._labeling import get_labeling_functions, get_labeling_functions_map
+from zephyr_ml._guide_handler import guide, GuideHandler
 
 DEFAULT_METRICS = [
     "sklearn.metrics.accuracy_score",
@@ -28,297 +28,6 @@
 LOGGER = logging.getLogger(__name__)
 
 
-class GuideHandler:
-
-    def __init__(self, ordered_steps):
-        self.cur_iteration = 0
-        self.current_step = -1
-        self.start_point = -1
-        self.ordered_steps = ordered_steps
-        self.set_methods = set()
-
-        self.producer_to_step_map = {}
-        self.getter_to_step_map = {}
-
-        self.iterations = []
-        for idx, (keys, sets, gets) in enumerate(self.ordered_steps):
-            self.iterations.append(-1)
-
-            for prod in keys:
-                self.producer_to_step_map[prod.__name__] = idx
-            for prod in sets:
-                self.producer_to_step_map[prod.__name__] = idx
-                self.set_methods.add(prod.__name__)
-
-            for get in gets:
-                self.getter_to_step_map[get.__name__] = idx
-
-    def or_join(self, methods):
-        return " or ".join([method.__name__ for method in methods])
-
-    def get_get_steps_in_between(self, cur_step, next_step):
-        step_strs = []
-        for step in range(cur_step + 1, next_step):
-            step_strs.append(
-                f"{step} {self.or_join(self.ordered_steps[step][2])}")
-        return step_strs
-
-    def get_last_up_to_date(self, next_step):
-        latest_up_to_date = 0
-        for step in range(next_step):
-            if self.iterations[step] == self.cur_iteration:
-                latest_up_to_date = step
-        return latest_up_to_date
-
-    def join_steps(self, step_strs):
-        return "\n\t".join(step_strs)
-
-    def get_steps_in_between(self, cur_step, next_step):
-        step_strs = []
-        for step in range(cur_step + 1, next_step):
-            option_strs = []
-            option_strs.extend(self.ordered_steps[step][0])
-            option_strs.extend(self.ordered_steps[step][1])
-            step_strs.append(f"{step}. {self.or_join(option_strs)}")
-        return step_strs
-
-    def log_next_producer_step(self, name):
-        next_step = self.current_step + 1
-
-        if next_step >= len(self.ordered_steps):
-            cur_step_name = self.or_join(self.ordered_steps[self.current_step][0])
-            LOGGER.warning((f"[GUIDE] DONE: {name}.\n"
-                            f"\tYou have reached the end of the "
-                            f"predictive engineering workflow.\n"
-                            f"\tYou can call {cur_step_name} again or re-perform previous steps "
-                            f"based on results."))
-        else:
-            next_step_name = self.or_join(self.ordered_steps[next_step][0])
-            LOGGER.warning(f"[GUIDE] DONE: {name}.\n"
-                           f"\tYou can perform the next step by calling {next_step_name}.")
-
-    def perform_producer_step(self, zephyr, method,
-                              *method_args, **method_kwargs):
-        step_num = self.producer_to_step_map[method.__name__]
-        res = method(zephyr, *method_args, **method_kwargs)
-        self.current_step = step_num
-        self.iterations[step_num] = self.cur_iteration
-        self.log_next_producer_step(method.__name__)
-        return res
-
-    def try_log_forward_set_method_warning(self, name, next_step):
-        if self.current_step != -1:
-            from_str = (f"Going from step {self.current_step} to "
-                        f"step {next_step} by performing {name}.")
-        else:
-            from_str = (f"Performing step {next_step} with {name}.")
-        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
-                        f"\t{from_str}\n"
-                        f"\tThis is a forward step via a set method.\n"
-                        f"\tAll previous steps' results will be considered stale."))
-
-    def try_log_backwards_set_method_warning(self, name, next_step):
-        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
-                        f"\tGoing from step {self.current_step} to "
-                        f"step {next_step} by performing {name}.\n"
-                        f"\tThis is a backwards step via a set method.\n"
-                        f"\tAll other steps' results will be considered stale."))
-
-    def try_log_backwards_key_method_warning(self, name, next_step):
-        steps_in_between = self.get_steps_in_between(next_step, self.current_step + 1)
-        if len(steps_in_between) > 0:
-            steps_in_between_str = (f"\tAny results produced by the following steps "
-                                    f"will be considered stale:\n"
-                                    f"\t{self.join_steps(steps_in_between)}")
-        else:
-            steps_in_between_str = ""
-
-        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
-                        f"\tGoing from step {self.current_step} to "
-                        f"step {next_step} by performing {name}.\n"
-                        f"\tThis is a backwards step via a key method.\n"
-                        f"{steps_in_between_str}"))
-
-    def log_get_inconsistent_warning(self, name, next_step):
-        prod_steps_str = self.or_join(self.ordered_steps[next_step][0])
-        prod_steps = f"{next_step}.{prod_steps_str}"
-        latest_up_to_date = self.get_last_up_to_date(next_step)
-        LOGGER.warning((f"[GUIDE] INCONSISTENCY WARNING: {name}.\n"
-                        f"Unable to perform {name} because"
-                        f"{prod_steps} has not been run yet.\n"
-                        f"Run steps starting at or before {latest_up_to_date}."))
-
-    def log_get_stale_warning(self, name, next_step):
-        latest_up_to_date = self.get_last_up_to_date(next_step)
-        LOGGER.warning((f"[GUIDE] STALE WARNING: {name}.\n"
-                        f"This data is potentially stale.\n"
-                        f"Re-run steps starting at or before {latest_up_to_date}"
-                        f"to ensure data is up to date."))
-
-    # tries to perform step if possible -> warns that data might be stale
-
-    def try_perform_forward_producer_step(
-            self, zephyr, method, *method_args, **method_kwargs):
-        name = method.__name__
-        next_step = self.producer_to_step_map[name]
-        if name in self.set_methods:  # set method will update start point and start new iteration
-            self.try_log_forward_set_method_warning(name, next_step)
-            self.start_point = next_step
-            self.cur_iteration += 1
-        # next_step == 0, set method (already warned), or previous step is up
-        # to term
-        res = self.perform_producer_step(
-            zephyr, method, *method_args, **method_kwargs)
-        return res
-
-    def try_perform_backward_producer_step(
-            self, zephyr, method, *method_args, **method_kwargs):
-        name = method.__name__
-        next_step = self.producer_to_step_map[name]
-        # starting new iteration
-        self.cur_iteration += 1
-        if next_step == 0 or name in self.set_methods:
-            self.start_point = next_step
-        else:  # key method
-            # mark everything from start point to next step as current term
-            for i in range(self.start_point, next_step):
-                if self.iterations[i] != -1:
-                    self.iterations[i] = self.cur_iteration
-
-        if name in self.set_methods:
-            self.try_log_backwards_set_method_warning(name, next_step)
-        else:
-            self.try_log_backwards_key_method_warning(name, next_step)
-
-        res = self.perform_producer_step(
-            zephyr, method, *method_args, **method_kwargs)
-
-        return res
-
-    def try_perform_producer_step(
-            self, zephyr, method, *method_args, **method_kwargs):
-        name = method.__name__
-        next_step = self.producer_to_step_map[name]
-        if next_step >= self.current_step:
-            res = self.try_perform_forward_producer_step(
-                zephyr, method, *method_args, **method_kwargs)
-            return res
-        else:
-            res = self.try_perform_backward_producer_step(
-                zephyr, method, *method_args, **method_kwargs)
-            return res
-
-    # dont update current step or terms
-
-    def try_perform_inconsistent_producer_step(  # add using stale and overwriting
-            self, zephyr, method, *method_args, **method_kwargs):
-        name = method.__name__
-        next_step = self.producer_to_step_map[name]
-        # inconsistent forward step: performing key method but previous step is
-        # not up to date
-        if (next_step >= self.current_step and
-                self.iterations[next_step - 1] != self.cur_iteration):
-            prev_step = next_step - 1
-            prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
-            prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
-            if next_step == len(self.ordered_steps) - 1:
-                final_text = (f"\tOtherwise, you can regenerate the data of the previous "
-                              f"step by calling {prev_key_method}, and then call {name} again.")
-            else:
-                corr_set_method = self.or_join(self.ordered_steps[next_step][1])
-                final_text = (f"\tIf you already have the data for THIS step, you can use "
-                              f"{corr_set_method} to set the data.\n"
-                              f"\tOtherwise, you can regenerate the data of the "
-                              f"previous step by calling {prev_key_method}, "
-                              f"and then call {name} again.")
-            LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: {name}\n"
-                           f"\tUnable to perform {name} because you are "
-                           f"performing a key method at step {next_step} but the result of the "
-                           f"previous step, step {prev_step}, is stale.\n"
-                           f"\tIf you want to use the stale result or "
-                           f"already have the data for step {prev_step}, you can use "
-                           f"{prev_set_method} to set the data.\n"
-                           f"{final_text}")
-        elif (next_step < self.current_step and
-              self.iterations[next_step - 1] != self.cur_iteration):
-            prev_step = next_step - 1
-            prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
-            prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
-
-            if next_step == len(self.ordered_steps) - 1:
-                final_text = (f"\tOtherwise, you can regenerate the data of the previous "
-                              f"step by calling {prev_key_method}, and then call {name} again.")
-            else:
-                corr_set_method = self.or_join(self.ordered_steps[next_step][1])
-                final_text = (f"\tIf you already have the data for THIS step, you can use "
-                              f"{corr_set_method} to set the data.\n"
-                              f"\tOtherwise, you can regenerate the data of the "
-                              f"previous step by calling {prev_key_method}, "
-                              f"and then call {name} again.")
-            LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: {name}\n"
-                           f"\tUnable to perform {name} because "
-                           f"you are going backwards and starting a new iteration by "
-                           f"performing a key method at step {next_step} but the result of the "
-                           f"previous step, step {prev_step}, is STALE.\n"
-                           f"\tIf you want to use the STALE result or "
-                           f"already have the data for step {prev_step}, you can use "
-                           f"{prev_set_method} to set the data.\n"
-                           f"{final_text}")
-
-    def try_perform_getter_step(
-            self, zephyr, method, *method_args, **method_kwargs):
-        name = method.__name__
-        # either inconsistent, stale, or up to date
-        step_num = self.getter_to_step_map[name]
-        step_iteration = self.iterations[step_num]
-        if step_iteration == -1:
-            self.log_get_inconsistent_warning(name, step_num)
-        elif step_iteration == self.cur_iteration:
-            res = method(zephyr, *method_args, **method_kwargs)
-            return res
-        else:
-            self.log_get_stale_warning(name, step_num)
-            res = method(zephyr, *method_args, **method_kwargs)
-            return res
-
-    def guide_step(self, zephyr, method, *method_args, **method_kwargs):
-        method_name = method.__name__
-        if method_name in self.producer_to_step_map:
-            # up-todate
-            next_step = self.producer_to_step_map[method_name]
-            if (next_step == 0 or  # 0 step always valid, starting new iteration
-                # set method always valid, but will update start point and
-                # start new iteration
-                method_name in self.set_methods or
-                    # key method valid if previous step is up to date
-                    self.iterations[next_step - 1] == self.cur_iteration):
-                # forward step only valid if set method or key method w/ no
-                # skips
-                res = self.try_perform_producer_step(
-                    zephyr, method, *method_args, **method_kwargs)
-                return res
-            else:  # stale or inconsistent
-                res = self.try_perform_inconsistent_producer_step(
-                    zephyr, method, *method_args, **method_kwargs)
-                return res
-        elif method_name in self.getter_to_step_map:
-            res = self.try_perform_getter_step(
-                zephyr, method, *method_args, **method_kwargs)
-            return res
-        else:
-            print(f"Method {method_name} does not need to be wrapped")
-
-
-def guide(method):
-
-    @wraps(method)
-    def guided_step(instance, *method_args, **method_kwargs):
-        return instance._guide_handler.guide_step(
-            instance, method, *method_args, **method_kwargs)
-
-    return guided_step
-
-
 class Zephyr:
     """Zephyr Class.
 
@@ -435,7 +144,7 @@ def generate_entityset(
             raise ValueError(
                 f"Invalid entityset type: {es_type}. Please use one of the following types:\
                     {VALIDATE_DATA_FUNCTIONS.keys()}")
-        entityset = _create_entityset(dfs, es_type, custom_kwargs_mapping)
+        entityset = create_entityset(dfs, es_type, custom_kwargs_mapping)
 
         # perform signal processing
         if signal_dataframe_name is not None and signal_column is not None:
diff --git a/zephyr_ml/labeling/labeling_functions/__init__.py b/zephyr_ml/labeling/labeling_functions/__init__.py
deleted file mode 100644
index b756735..0000000
--- a/zephyr_ml/labeling/labeling_functions/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from zephyr_ml.labeling.labeling_functions.brake_pad_presence import brake_pad_presence
-from zephyr_ml.labeling.labeling_functions.converter_replacement_presence import (
-    converter_replacement_presence)
-from zephyr_ml.labeling.labeling_functions.planet_bearing import gearbox_replace_presence
-from zephyr_ml.labeling.labeling_functions.total_power_loss import total_power_loss

From 0e77f46153340ce418eed8dd7513d985265dd3e3 Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Sun, 29 Jun 2025 15:13:19 -0700
Subject: [PATCH 6/7] fix ga

---
 notebooks/modeling.ipynb | 68 ++++++++++++++++++++++++++++++++++------
 1 file changed, 58 insertions(+), 10 deletions(-)

diff --git a/notebooks/modeling.ipynb b/notebooks/modeling.ipynb
index 377a6e2..e3c4017 100644
--- a/notebooks/modeling.ipynb
+++ b/notebooks/modeling.ipynb
@@ -545,9 +545,13 @@
           "text": [
             "/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/sklearn/impute/_base.py:555: UserWarning: Skipping features without any observed values: [ 1  2  6  7  9 10 15 16 17 18]. At least one non-missing value is needed for imputation with strategy='mean'.\n",
             "  warnings.warn(\n",
-            "[GUIDE] Successfully performed set_feature_matrix.\n",
+            "[GUIDE] STALE WARNING: set_feature_matrix.\n",
+            "\tPerforming step 2 with set_feature_matrix.\n",
+            "\tThis is a forward step via a set method.\n",
+            "\tAll previous steps' results will be considered stale.\n",
+            "[GUIDE] DONE: set_feature_matrix.\n",
             "\tYou can perform the next step by calling generate_train_test_split.\n",
-            "[GUIDE] Successfully performed generate_train_test_split.\n",
+            "[GUIDE] DONE: generate_train_test_split.\n",
             "\tYou can perform the next step by calling fit_pipeline.\n"
           ]
         },
@@ -604,7 +608,16 @@
       "execution_count": 3,
       "id": "edffee03",
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[GUIDE] DONE: fit_pipeline.\n",
+            "\tYou can perform the next step by calling predict or evaluate.\n"
+          ]
+        }
+      ],
       "source": [
         "hyperparameters = {\n",
         "    \"xgboost.XGBClassifier#1\": {\n",
@@ -629,6 +642,15 @@
       "id": "78187756",
       "metadata": {},
       "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[GUIDE] DONE: predict.\n",
+            "\tYou have reached the end of the predictive engineering workflow.\n",
+            "\tYou can call predict or evaluate again or re-perform previous steps based on results.\n"
+          ]
+        },
         {
           "data": {
             "text/plain": [
@@ -644,6 +666,14 @@
         "zephyr.predict()"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "0c8440ee",
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
     {
       "cell_type": "markdown",
       "id": "24cda971",
@@ -657,7 +687,17 @@
       "execution_count": 5,
       "id": "cd097853",
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "[GUIDE] DONE: evaluate.\n",
+            "\tYou have reached the end of the predictive engineering workflow.\n",
+            "\tYou can call predict or evaluate again or re-perform previous steps based on results.\n"
+          ]
+        }
+      ],
       "source": [
         "res = zephyr.evaluate()"
       ]
@@ -675,10 +715,10 @@
               " 'sklearn.metrics.precision_score': 0.5,\n",
               " 'sklearn.metrics.f1_score': 0.6666666666666666,\n",
               " 'sklearn.metrics.recall_score': 1.0,\n",
-              " 'zephyr_ml.primitives.postprocessing.confusion_matrix': (array([[1, 1],\n",
+              " 'zephyr_ml.primitives.evaluation.confusion_matrix': (array([[1, 1],\n",
               "         [0, 1]]),\n",
               "  <Figure size 640x480 with 2 Axes>),\n",
-              " 'zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve': (0.5,\n",
+              " 'zephyr_ml.primitives.evaluation.roc_auc_score_and_curve': (0.5,\n",
               "  <Figure size 640x480 with 1 Axes>)}"
             ]
           },
@@ -719,13 +759,13 @@
       ],
       "source": [
         "%matplotlib inline\n",
-        "_, conf_matrix_fig = res[\"zephyr_ml.primitives.postprocessing.confusion_matrix\"]\n",
+        "_, conf_matrix_fig = res[\"zephyr_ml.primitives.evaluation.confusion_matrix\"]\n",
         "conf_matrix_fig"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": 9,
       "id": "d59e86b1",
       "metadata": {},
       "outputs": [
@@ -736,17 +776,25 @@
               "<Figure size 640x480 with 1 Axes>"
             ]
           },
-          "execution_count": 8,
+          "execution_count": 9,
           "metadata": {},
           "output_type": "execute_result"
         }
       ],
       "source": [
         "\n",
-        "_, roc_fig = res[\"zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve\"]\n",
+        "_, roc_fig = res[\"zephyr_ml.primitives.evaluation.roc_auc_score_and_curve\"]\n",
         "\n",
         "roc_fig\n"
       ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "38d109a1",
+      "metadata": {},
+      "outputs": [],
+      "source": []
     }
   ],
   "metadata": {

From 59b0be706d9adc2abf30322994c0324dcb0ac2ce Mon Sep 17 00:00:00 2001
From: Raymond Pan <raymondpan12345@gmail.com>
Date: Sun, 29 Jun 2025 15:48:28 -0700
Subject: [PATCH 7/7] lint

---
 zephyr_ml/_guide_handler.py | 1 -
 zephyr_ml/core.py           | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/zephyr_ml/_guide_handler.py b/zephyr_ml/_guide_handler.py
index 995217b..e2725ae 100644
--- a/zephyr_ml/_guide_handler.py
+++ b/zephyr_ml/_guide_handler.py
@@ -1,7 +1,6 @@
 import logging
 from functools import wraps
 
-
 LOGGER = logging.getLogger(__name__)
 
 
diff --git a/zephyr_ml/core.py b/zephyr_ml/core.py
index ffaff68..87f3762 100644
--- a/zephyr_ml/core.py
+++ b/zephyr_ml/core.py
@@ -11,10 +11,10 @@
 from mlblocks import MLBlock, MLPipeline
 from sklearn.model_selection import train_test_split
 
-from zephyr_ml._entityset import create_entityset, VALIDATE_DATA_FUNCTIONS
+from zephyr_ml._entityset import VALIDATE_DATA_FUNCTIONS, create_entityset
 from zephyr_ml._feature_engineering import process_signals
+from zephyr_ml._guide_handler import GuideHandler, guide
 from zephyr_ml._labeling import get_labeling_functions, get_labeling_functions_map
-from zephyr_ml._guide_handler import guide, GuideHandler
 
 DEFAULT_METRICS = [
     "sklearn.metrics.accuracy_score",