diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
index 41eed9234..e5e0ee94b 100644
--- a/examples/Basics/simple_flows_and_runs_tutorial.py
+++ b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -1,12 +1,10 @@
# %% [markdown]
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.
-
# %%
import sklearn
from sklearn.neighbors import KNeighborsClassifier
-
import openml
-
+from openml_sklearn import SklearnExtension
# %% [markdown]
#
#
Warning
@@ -20,68 +18,39 @@
# OpenML-Python API.
#
#
-
# %%
openml.config.start_using_configuration_for_example()
-
# %% [markdown]
# ## Train a machine learning model and evaluate it
# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
-
# %%
task = openml.tasks.get_task(119)
-
# Get the data
dataset = task.get_dataset()
X, y, categorical_indicator, attribute_names = dataset.get_data(
target=dataset.default_target_attribute
)
-
# Get the holdout split from the task
train_indices, test_indices = task.get_train_test_split_indices(fold=0, repeat=0)
X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
-
-knn_parameters = {
- "n_neighbors": 3,
-}
-clf = KNeighborsClassifier(**knn_parameters)
+clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train)
-
# Get experiment results
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)
-
# %% [markdown]
# ## Upload the machine learning experiments to OpenML
-# First, create a fow and fill it with metadata about the machine learning model.
-
+# Create a flow from the trained model using the sklearn extension.
+# This automatically extracts all metadata and hyperparameters.
# %%
-knn_flow = openml.flows.OpenMLFlow(
- # Metadata
- model=clf, # or None, if you do not want to upload the model object.
- name="CustomKNeighborsClassifier",
- description="A custom KNeighborsClassifier flow for OpenML.",
- external_version=f"{sklearn.__version__}",
- language="English",
- tags=["openml_tutorial_knn"],
- dependencies=f"{sklearn.__version__}",
- # Hyperparameters
- parameters={k: str(v) for k, v in knn_parameters.items()},
- parameters_meta_info={
- "n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
- },
- # If you have a pipeline with subcomponents, such as preprocessing, add them here.
- components={},
-)
+extension = SklearnExtension()
+knn_flow = extension.model_to_flow(clf)
knn_flow.publish()
print(f"knn_flow was published with the ID {knn_flow.flow_id}")
-
# %% [markdown]
# Second, we create a run to store the results associated with the flow.
-
# %%
-
# Format the predictions for OpenML
predictions = []
for test_index, y_true_i, y_pred_i, y_pred_proba_i in zip(
@@ -98,13 +67,11 @@
proba=dict(zip(task.class_labels, y_pred_proba_i)),
)
)
-
-# Format the parameters for OpenML
+# Get parameters from the flow
oml_knn_parameters = [
{"oml:name": k, "oml:value": v, "oml:component": knn_flow.flow_id}
- for k, v in knn_parameters.items()
+ for k, v in knn_flow.parameters.items()
]
-
knn_run = openml.runs.OpenMLRun(
task_id=task.task_id,
flow_id=knn_flow.flow_id,
@@ -117,6 +84,5 @@
knn_run = knn_run.publish()
print(f"Run was uploaded to {knn_run.openml_url}")
print(f"The flow can be found at {knn_run.flow.openml_url}")
-
# %%
-openml.config.stop_using_configuration_for_example()
+openml.config.stop_using_configuration_for_example()
\ No newline at end of file