Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 58 additions & 10 deletions notebooks/modeling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,13 @@
"text": [
"/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/sklearn/impute/_base.py:555: UserWarning: Skipping features without any observed values: [ 1 2 6 7 9 10 15 16 17 18]. At least one non-missing value is needed for imputation with strategy='mean'.\n",
" warnings.warn(\n",
"[GUIDE] Successfully performed set_feature_matrix.\n",
"[GUIDE] STALE WARNING: set_feature_matrix.\n",
"\tPerforming step 2 with set_feature_matrix.\n",
"\tThis is a forward step via a set method.\n",
"\tAll previous steps' results will be considered stale.\n",
"[GUIDE] DONE: set_feature_matrix.\n",
"\tYou can perform the next step by calling generate_train_test_split.\n",
"[GUIDE] Successfully performed generate_train_test_split.\n",
"[GUIDE] DONE: generate_train_test_split.\n",
"\tYou can perform the next step by calling fit_pipeline.\n"
]
},
Expand Down Expand Up @@ -604,7 +608,16 @@
"execution_count": 3,
"id": "edffee03",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[GUIDE] DONE: fit_pipeline.\n",
"\tYou can perform the next step by calling predict or evaluate.\n"
]
}
],
"source": [
"hyperparameters = {\n",
" \"xgboost.XGBClassifier#1\": {\n",
Expand All @@ -629,6 +642,15 @@
"id": "78187756",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[GUIDE] DONE: predict.\n",
"\tYou have reached the end of the predictive engineering workflow.\n",
"\tYou can call predict or evaluate again or re-perform previous steps based on results.\n"
]
},
{
"data": {
"text/plain": [
Expand All @@ -644,6 +666,14 @@
"zephyr.predict()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c8440ee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "24cda971",
Expand All @@ -657,7 +687,17 @@
"execution_count": 5,
"id": "cd097853",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[GUIDE] DONE: evaluate.\n",
"\tYou have reached the end of the predictive engineering workflow.\n",
"\tYou can call predict or evaluate again or re-perform previous steps based on results.\n"
]
}
],
"source": [
"res = zephyr.evaluate()"
]
Expand All @@ -675,10 +715,10 @@
" 'sklearn.metrics.precision_score': 0.5,\n",
" 'sklearn.metrics.f1_score': 0.6666666666666666,\n",
" 'sklearn.metrics.recall_score': 1.0,\n",
" 'zephyr_ml.primitives.postprocessing.confusion_matrix': (array([[1, 1],\n",
" 'zephyr_ml.primitives.evaluation.confusion_matrix': (array([[1, 1],\n",
" [0, 1]]),\n",
" <Figure size 640x480 with 2 Axes>),\n",
" 'zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve': (0.5,\n",
" 'zephyr_ml.primitives.evaluation.roc_auc_score_and_curve': (0.5,\n",
" <Figure size 640x480 with 1 Axes>)}"
]
},
Expand Down Expand Up @@ -719,13 +759,13 @@
],
"source": [
"%matplotlib inline\n",
"_, conf_matrix_fig = res[\"zephyr_ml.primitives.postprocessing.confusion_matrix\"]\n",
"_, conf_matrix_fig = res[\"zephyr_ml.primitives.evaluation.confusion_matrix\"]\n",
"conf_matrix_fig"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "d59e86b1",
"metadata": {},
"outputs": [
Expand All @@ -736,17 +776,25 @@
"<Figure size 640x480 with 1 Axes>"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"_, roc_fig = res[\"zephyr_ml.primitives.postprocessing.roc_auc_score_and_curve\"]\n",
"_, roc_fig = res[\"zephyr_ml.primitives.evaluation.roc_auc_score_and_curve\"]\n",
"\n",
"roc_fig\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38d109a1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.5.dev9
current_version = 0.0.5.dev10
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,6 @@
test_suite='tests',
tests_require=tests_require,
url='https://github.com/sintel-dev/zephyr',
version='0.0.5.dev9',
version='0.0.5.dev10',
zip_safe=False,
)
2 changes: 1 addition & 1 deletion tests/labeling/test_data_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import featuretools as ft

from zephyr_ml.labeling import DataLabeler
from zephyr_ml._labeling.data_labeler import DataLabeler


class TestDataLabeler:
Expand Down
2 changes: 1 addition & 1 deletion tests/labeling/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd

from zephyr_ml.labeling.utils import (
from zephyr_ml._labeling.utils import (
aggregate_by_column, categorical_presence, denormalize, greater_than, keyword_in_text,
merge_binary_labeling_functions, total_duration)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_entityset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import pytest

from zephyr_ml import _create_entityset
from zephyr_ml._entityset import create_entityset


@pytest.fixture
Expand Down Expand Up @@ -120,11 +120,11 @@ def scada_dfs(base_dfs):


def create_pidata_entityset(pidata_dfs):
return _create_entityset(pidata_dfs, es_type="pidata")
return create_entityset(pidata_dfs, es_type="pidata")


def create_scada_entityset(scada_dfs):
return _create_entityset(scada_dfs, es_type="scada")
return create_entityset(scada_dfs, es_type="scada")


def test_create_pidata_missing_entities(pidata_dfs):
Expand Down
24 changes: 12 additions & 12 deletions tests/test_feature_engineering.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pandas as pd
import pytest

from zephyr_ml import _create_entityset
from zephyr_ml.feature_engineering import process_signals
from zephyr_ml._entityset import create_entityset
from zephyr_ml._feature_engineering import process_signals


@pytest.fixture
Expand Down Expand Up @@ -122,12 +122,12 @@ def scada_dfs(base_dfs):

@pytest.fixture
def pidata_es(pidata_dfs):
return _create_entityset(pidata_dfs, "pidata")
return create_entityset(pidata_dfs, "pidata")


@pytest.fixture
def scada_es(scada_dfs):
return _create_entityset(scada_dfs, "scada")
return create_entityset(scada_dfs, "scada")


@pytest.fixture
Expand All @@ -153,8 +153,8 @@ def test_process_signals_pidata(pidata_es, transformations, aggregations):
replace_dataframe = False
before = pidata_es['pidata'].copy()

process_signals(pidata_es, signal_dataframe_name, signal_column, transformations, aggregations,
window_size, replace_dataframe)
process_signals(pidata_es, signal_dataframe_name, signal_column, transformations,
aggregations, window_size, replace_dataframe)

processed = pidata_es['pidata_processed'].copy()
after = pidata_es['pidata'].copy()
Expand Down Expand Up @@ -189,8 +189,8 @@ def test_process_signals_pidata_replace(
window_size = '1m'
replace_dataframe = True

process_signals(pidata_es, signal_dataframe_name, signal_column, transformations, aggregations,
window_size, replace_dataframe)
process_signals(pidata_es, signal_dataframe_name, signal_column, transformations,
aggregations, window_size, replace_dataframe)

processed = pidata_es['pidata'].copy()

Expand Down Expand Up @@ -224,8 +224,8 @@ def test_process_signals_scada(scada_es, transformations, aggregations):
replace_dataframe = False
before = scada_es['scada'].copy()

process_signals(scada_es, signal_dataframe_name, signal_column, transformations, aggregations,
window_size, replace_dataframe)
process_signals(scada_es, signal_dataframe_name, signal_column, transformations,
aggregations, window_size, replace_dataframe)

expected = pd.DataFrame({
"_index": [0, 1, 2],
Expand Down Expand Up @@ -256,8 +256,8 @@ def test_process_signals_scada_replace(
window_size = '1m'
replace_dataframe = True

process_signals(scada_es, signal_dataframe_name, signal_column, transformations, aggregations,
window_size, replace_dataframe)
process_signals(scada_es, signal_dataframe_name, signal_column, transformations,
aggregations, window_size, replace_dataframe)

expected = pd.DataFrame({
"_index": [0, 1, 2],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_guide.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from zephyr_ml.core import GuideHandler, guide
from zephyr_ml._guide_handler import GuideHandler, guide


class DummyObject:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from zephyr_ml.metadata import DEFAULT_ES_KWARGS, DEFAULT_ES_TYPE_KWARGS, get_mapped_kwargs
from zephyr_ml._metadata import DEFAULT_ES_KWARGS, DEFAULT_ES_TYPE_KWARGS, get_mapped_kwargs


def test_default_scada_mapped_kwargs():
Expand Down
4 changes: 1 addition & 3 deletions zephyr_ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@

__author__ = 'MIT Data To AI Lab'
__email__ = 'dai-lab@mit.edu'
__version__ = '0.0.5.dev9'
__version__ = '0.0.5.dev10'

import os

from zephyr_ml.core import Zephyr
from zephyr_ml.entityset import VALIDATE_DATA_FUNCTIONS, _create_entityset
from zephyr_ml.labeling import DataLabeler

MLBLOCKS_PRIMITIVES = os.path.join(
os.path.dirname(__file__), "primitives", "jsons")
Expand Down
4 changes: 2 additions & 2 deletions zephyr_ml/entityset.py → zephyr_ml/_entityset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import featuretools as ft

from zephyr_ml.metadata import get_mapped_kwargs
from zephyr_ml._metadata import get_mapped_kwargs


def _validate_data(dfs, es_type, es_kwargs):
Expand Down Expand Up @@ -159,7 +159,7 @@ def validate_vibrations_data(dfs, new_kwargs_mapping=None):
}


def _create_entityset(entities, es_type, new_kwargs_mapping=None):
def create_entityset(entities, es_type, new_kwargs_mapping=None):

validate_func = VALIDATE_DATA_FUNCTIONS[es_type]
es_kwargs = validate_func(entities, new_kwargs_mapping)
Expand Down
File renamed without changes.
Loading
Loading