Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions examples/Basics/simple_flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.

# %%
import sklearn
from sklearn.neighbors import KNeighborsClassifier

import sklearn
import openml

# %% [markdown]
Expand Down Expand Up @@ -54,7 +53,17 @@

# %% [markdown]
# ## Upload the machine learning experiments to OpenML
# First, create a fow and fill it with metadata about the machine learning model.
#
# ### Option A: Automatic publishing (simplified)
# The publish function automatically detects the model type and creates the flow:

# %%
knn_flow = openml.publish(clf, tags=["openml_tutorial_knn"])
print(f"Flow was auto-published with ID {knn_flow.flow_id}")

# %% [markdown]
# ### Option B: Manual flow construction (full control)
# For advanced use cases, you can manually construct the flow:

# %%
knn_flow = openml.flows.OpenMLFlow(
Expand All @@ -77,6 +86,9 @@
knn_flow.publish()
print(f"knn_flow was published with the ID {knn_flow.flow_id}")

# %% [markdown]
# Now we'll use the auto-published flow to create and upload a run.

# %% [markdown]
# Second, we create a run to store the results associated with the flow.

Expand Down
121 changes: 121 additions & 0 deletions openml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
# License: BSD 3-Clause
from __future__ import annotations

import contextlib
from typing import Any, Sequence

from . import (
_api_calls,
config,
Expand All @@ -33,6 +36,7 @@
utils,
)
from .__version__ import __version__
from .base import OpenMLBase
from .datasets import OpenMLDataFeature, OpenMLDataset
from .evaluations import OpenMLEvaluation
from .flows import OpenMLFlow
Expand All @@ -50,6 +54,122 @@
)


def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = None) -> Any:
"""Publish a common object (flow/model/run/dataset) with minimal friction.

This function provides a unified entry point for publishing various OpenML objects.
It automatically detects the object type and routes to the appropriate publishing
mechanism:

- For OpenML objects (``OpenMLDataset``, ``OpenMLFlow``, ``OpenMLRun``, etc.),
it directly calls their ``publish()`` method.
- For external models (e.g., scikit-learn estimators), it uses registered
extensions to convert them to ``OpenMLFlow`` objects before publishing.

Parameters
----------
obj : Any
The object to publish. Can be:
- An OpenML object (OpenMLDataset, OpenMLFlow, OpenMLRun, OpenMLTask)
- A machine learning model from a supported framework (e.g., scikit-learn)
name : str, optional
Override the default name for the published object.
If not provided, uses the object's default naming convention.
tags : Sequence[str], optional
Additional tags to attach to the published object.
Will be merged with any existing tags, removing duplicates while
preserving order.

Returns
-------
Any
The published object (typically with updated ID and metadata).

Raises
------
ValueError
If no extension is registered to handle the provided model type.

Examples
--------
Publishing an OpenML dataset:

>>> dataset = openml.datasets.get_dataset(61)
>>> openml.publish(dataset, tags=["example"])

Publishing a scikit-learn model:

>>> from sklearn.tree import DecisionTreeClassifier
>>> clf = DecisionTreeClassifier(max_depth=5)
>>> openml.publish(clf, name="MyDecisionTree", tags=["tutorial"])

Publishing an OpenML flow directly:

>>> flow = openml.flows.OpenMLFlow(...)
>>> openml.publish(flow)

Publishing an OpenML run (after execution with predictions):

>>> run = openml.runs.OpenMLRun(
... task_id=1, flow_id=100, dataset_id=61,
... data_content=predictions # predictions from model evaluation
... )
>>> openml.publish(run, tags=["experiment"])

Notes
-----
For external models (e.g., scikit-learn), the corresponding extension must be
installed (e.g., ``openml-sklearn``). The extension will be automatically imported
if available.
"""
# Case 1: Object is already an OpenML entity
if isinstance(obj, OpenMLBase):
if tags is not None and hasattr(obj, "tags"):
existing = list(getattr(obj, "tags", []) or [])
merged = list(dict.fromkeys([*existing, *tags]))
obj.tags = merged
if name is not None and hasattr(obj, "name"):
obj.name = name
return obj.publish()

# Case 2: Object is an external model - use extension registry
# Attempt to auto-import common extensions
_ensure_extension_imported(obj)

extension = extensions.functions.get_extension_by_model(obj, raise_if_no_extension=True)
if extension is None: # Defensive check (should not occur with raise_if_no_extension=True)
raise ValueError("No extension registered to handle the provided object.")
flow = extension.model_to_flow(obj)

if name is not None:
flow.name = name

if tags is not None:
existing_tags = list(getattr(flow, "tags", []) or [])
flow.tags = list(dict.fromkeys([*existing_tags, *tags]))

return flow.publish()


def _ensure_extension_imported(obj: Any) -> None:
"""Attempt to import the appropriate extension for common frameworks.

This is a convenience helper to automatically import extensions for
well-known frameworks, reducing friction for users.

Parameters
----------
obj : Any
The object to check.
"""
obj_module = type(obj).__module__

# Check for scikit-learn models
if obj_module.startswith("sklearn"):
with contextlib.suppress(ImportError):
import openml_sklearn # noqa: F401


def populate_cache(
task_ids: list[int] | None = None,
dataset_ids: list[int | str] | None = None,
Expand Down Expand Up @@ -120,4 +240,5 @@ def populate_cache(
"utils",
"_api_calls",
"__version__",
"publish",
]
51 changes: 51 additions & 0 deletions tests/test_openml/test_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,54 @@ def test_populate_cache(
assert task_mock.call_count == 2
for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
assert argument[0] == fixture

def test_publish_with_openml_object_merges_tags_and_name(self):
class Dummy(openml.base.OpenMLBase):
def __init__(self) -> None:
self.tags = ["a"]
self.name = "orig"
self.published = False

@property
def id(self):
return None

def _get_repr_body_fields(self):
return []

def _to_dict(self):
return {}

def _parse_publish_response(self, xml_response):
return None

def publish(self):
self.published = True
return self

obj = Dummy()
result = openml.publish(obj, name="new", tags=["b", "a"])
assert result is obj
assert obj.published is True
assert obj.name == "new"
assert obj.tags == ["a", "b"] # dedup and preserve order from original

@mock.patch("openml.extensions.functions.get_extension_by_model")
def test_publish_with_extension(self, get_ext_mock):
flow_mock = mock.MagicMock()
flow_mock.tags = []
flow_mock.publish.return_value = "flow-id"

ext_instance = mock.MagicMock()
ext_instance.model_to_flow.return_value = flow_mock
get_ext_mock.return_value = ext_instance

model = object()
flow_id = openml.publish(model, name="n", tags=["x"])

get_ext_mock.assert_called_once_with(model, raise_if_no_extension=True)
ext_instance.model_to_flow.assert_called_once_with(model)
assert flow_mock.name == "n"
assert flow_mock.tags == ["x"]
flow_mock.publish.assert_called_once_with()
assert flow_id == "flow-id"