diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py index 41eed9234..e5e0ee94b 100644 --- a/examples/Basics/simple_flows_and_runs_tutorial.py +++ b/examples/Basics/simple_flows_and_runs_tutorial.py @@ -1,12 +1,10 @@ # %% [markdown] # A simple tutorial on how to upload results from a machine learning experiment to OpenML. - # %% import sklearn from sklearn.neighbors import KNeighborsClassifier - import openml - +from openml_sklearn import SklearnExtension # %% [markdown] #
#

Warning

@@ -20,68 +18,39 @@ # OpenML-Python API. #

#
- # %% openml.config.start_using_configuration_for_example() - # %% [markdown] # ## Train a machine learning model and evaluate it # NOTE: We are using task 119 from the test server: https://test.openml.org/d/20 - # %% task = openml.tasks.get_task(119) - # Get the data dataset = task.get_dataset() X, y, categorical_indicator, attribute_names = dataset.get_data( target=dataset.default_target_attribute ) - # Get the holdout split from the task train_indices, test_indices = task.get_train_test_split_indices(fold=0, repeat=0) X_train, X_test = X.iloc[train_indices], X.iloc[test_indices] y_train, y_test = y.iloc[train_indices], y.iloc[test_indices] - -knn_parameters = { - "n_neighbors": 3, -} -clf = KNeighborsClassifier(**knn_parameters) +clf = KNeighborsClassifier(n_neighbors=3) clf.fit(X_train, y_train) - # Get experiment results y_pred = clf.predict(X_test) y_pred_proba = clf.predict_proba(X_test) - # %% [markdown] # ## Upload the machine learning experiments to OpenML -# First, create a fow and fill it with metadata about the machine learning model. - +# Create a flow from the trained model using the sklearn extension. +# This automatically extracts all metadata and hyperparameters. # %% -knn_flow = openml.flows.OpenMLFlow( - # Metadata - model=clf, # or None, if you do not want to upload the model object. - name="CustomKNeighborsClassifier", - description="A custom KNeighborsClassifier flow for OpenML.", - external_version=f"{sklearn.__version__}", - language="English", - tags=["openml_tutorial_knn"], - dependencies=f"{sklearn.__version__}", - # Hyperparameters - parameters={k: str(v) for k, v in knn_parameters.items()}, - parameters_meta_info={ - "n_neighbors": {"description": "number of neighbors to use", "data_type": "int"} - }, - # If you have a pipeline with subcomponents, such as preprocessing, add them here. - components={}, -) +extension = SklearnExtension() +knn_flow = extension.model_to_flow(clf) knn_flow.publish() print(f"knn_flow was published with the ID {knn_flow.flow_id}") - # %% [markdown] # Second, we create a run to store the results associated with the flow. - # %% - # Format the predictions for OpenML predictions = [] for test_index, y_true_i, y_pred_i, y_pred_proba_i in zip( @@ -98,13 +67,11 @@ proba=dict(zip(task.class_labels, y_pred_proba_i)), ) ) - -# Format the parameters for OpenML +# Get parameters from the flow oml_knn_parameters = [ {"oml:name": k, "oml:value": v, "oml:component": knn_flow.flow_id} - for k, v in knn_parameters.items() + for k, v in knn_flow.parameters.items() ] - knn_run = openml.runs.OpenMLRun( task_id=task.task_id, flow_id=knn_flow.flow_id, @@ -117,6 +84,5 @@ knn_run = knn_run.publish() print(f"Run was uploaded to {knn_run.openml_url}") print(f"The flow can be found at {knn_run.flow.openml_url}") - # %% -openml.config.stop_using_configuration_for_example() +openml.config.stop_using_configuration_for_example() \ No newline at end of file