Merge branch 'flow-migration-stacked' of https://github.com/Omswastik-11/openml-python into flow-migration-stacked

Omswastik-11 · Omswastik-11 · commit 5ab20c3e8d64 · 2026-06-03T16:41:18.000+05:30
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -107,6 +107,32 @@ $env:OPENML_TEST_SERVER_ADMIN_KEY = "admin-key"
 export OPENML_TEST_SERVER_ADMIN_KEY="admin-key"
 ```
 
+#### Diagnosing Slow Tests
+
+If you suspect a test (or the suite as a whole) is running too slowly, `pytest` already exposes everything you need to investigate it. A few invocations that are useful when looking into test runtimes:
+
+```bash
+# Show the 20 slowest tests (use 0 to list every test's duration)
+pytest tests --durations=20
+
+# Fail any test that exceeds the given timeout (requires pytest-timeout)
+pytest tests --timeout=600
+
+# Investigate only fixture/setup costs without actually running the tests
+pytest tests --setup-only
+
+# Profile a specific module, class, or test
+pytest tests/test_datasets/test_dataset.py --durations=0
+
+# Skip the slow live-server tests while profiling locally
+pytest tests --durations=0 -m "not production_server and not test_server"
+
+# Run the suite in parallel to reproduce CI behaviour (requires pytest-xdist)
+pytest tests -n 4 --dist=load --durations=0
+```
+
+Combining these with the marker filters (`production_server`, `test_server`, `sklearn`) makes it straightforward to narrow the investigation down to the slow tests without changing project configuration.
+
 ### Pull Request Checklist
 
 You can go to the `openml-python` GitHub repository to create the pull request by [comparing the branch](https://github.com/openml/openml-python/compare) from your fork with the `main` branch of the `openml-python` repository. When creating a pull request, make sure to follow the comments and structured provided by the template on GitHub.
@@ -214,4 +240,4 @@ When dependencies are installed, run
 ```bash
 mkdocs serve
 ```
-This will open a preview of the website.
+This will open a preview of the website.
diff --git a/examples/Advanced/tasks_tutorial.py b/examples/Advanced/tasks_tutorial.py
@@ -24,13 +24,15 @@
 #
 # We will start by simply listing only *supervised classification* tasks.
 #
-# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, but we
-# request a
+# **openml.list_tasks()** (or **openml.tasks.list_tasks()**) returns a dictionary of dictionaries by default, but we request a
 # [pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)
 # instead to have better visualization capabilities and easier access:
 
 # %%
-tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
+# New: top-level convenience alias
+tasks = openml.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
+# Old path still works:
+# tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
 print(tasks.columns)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
@@ -66,23 +68,29 @@
 # Similar to listing tasks by task type, we can list tasks by tags:
 
 # %%
-tasks = openml.tasks.list_tasks(tag="OpenML100")
+tasks = openml.list_tasks(tag="OpenML100")
+# Old path still works:
+# tasks = openml.tasks.list_tasks(tag="OpenML100")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
 # %% [markdown]
 # Furthermore, we can list tasks based on the dataset id:
 
 # %%
-tasks = openml.tasks.list_tasks(data_id=1471)
+tasks = openml.list_tasks(data_id=1471)
+# Old path still works:
+# tasks = openml.tasks.list_tasks(data_id=1471)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
 # %% [markdown]
 # In addition, a size limit and an offset can be applied both separately and simultaneously:
 
 # %%
-tasks = openml.tasks.list_tasks(size=10, offset=50)
+tasks = openml.list_tasks(size=10, offset=50)
+# Old path still works:
+# tasks = openml.tasks.list_tasks(size=10, offset=50)
 print(tasks)
 
 # %% [markdown]
@@ -98,7 +106,9 @@
 # Finally, it is also possible to list all tasks on OpenML with:
 
 # %%
-tasks = openml.tasks.list_tasks()
+tasks = openml.list_tasks()
+# Old path still works:
+# tasks = openml.tasks.list_tasks()
 print(len(tasks))
 
 # %% [markdown]
@@ -118,7 +128,10 @@
 
 # %%
 task_id = 31
-task = openml.tasks.get_task(task_id)
+# New: top-level convenience alias
+task = openml.get_task(task_id)
+# Old path still works:
+# task = openml.tasks.get_task(task_id)
 
 # %%
 # Properties of the task are stored as member variables:
diff --git a/examples/Basics/simple_datasets_tutorial.py b/examples/Basics/simple_datasets_tutorial.py
@@ -14,15 +14,21 @@
 # ## List datasets stored on OpenML
 
 # %%
-datasets_df = openml.datasets.list_datasets()
+# New: top-level convenience alias
+datasets_df = openml.list_datasets()
+# Old path still works for backwards compatibility:
+# datasets_df = openml.datasets.list_datasets()
 print(datasets_df.head(n=10))
 
 # %% [markdown]
 # ## Download a dataset
 
 # %%
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(dataset_id=61)
+# New: top-level convenience alias
+dataset = openml.get_dataset(dataset_id=61)
+# Old path still works:
+# dataset = openml.datasets.get_dataset(dataset_id=61)
 
 # Print a summary
 print(
diff --git a/examples/Basics/simple_flows_and_runs_tutorial.py b/examples/Basics/simple_flows_and_runs_tutorial.py
@@ -29,7 +29,10 @@
 # NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
 
 # %%
-task = openml.tasks.get_task(119)
+# New: top-level convenience alias
+task = openml.get_task(119)
+# Old path still works:
+# task = openml.tasks.get_task(119)
 
 # Get the data
 dataset = task.get_dataset()
@@ -54,7 +57,7 @@
 
 # %% [markdown]
 # ## Upload the machine learning experiments to OpenML
-# First, create a fow and fill it with metadata about the machine learning model.
+# First, create a flow and fill it with metadata about the machine learning model.
 
 # %%
 knn_flow = openml.flows.OpenMLFlow(
diff --git a/examples/Basics/simple_tasks_tutorial.py b/examples/Basics/simple_tasks_tutorial.py
@@ -10,7 +10,10 @@
 # [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31):
 
 # %%
-task = openml.tasks.get_task(31)
+# New: top-level convenience alias
+task = openml.get_task(31)
+# Old path still works:
+# task = openml.tasks.get_task(31)
 
 # %% [markdown]
 # Get the dataset and its data from the task.
diff --git a/openml/__init__.py b/openml/__init__.py
@@ -37,9 +37,12 @@
 from .__version__ import __version__
 from ._api import _backend
 from .datasets import OpenMLDataFeature, OpenMLDataset
+from .datasets.functions import get_dataset, list_datasets
 from .evaluations import OpenMLEvaluation
 from .flows import OpenMLFlow
+from .flows.functions import get_flow, list_flows
 from .runs import OpenMLRun
+from .runs.functions import get_run, list_runs
 from .setups import OpenMLParameter, OpenMLSetup
 from .study import OpenMLBenchmarkSuite, OpenMLStudy
 from .tasks import (
@@ -51,6 +54,7 @@
     OpenMLSupervisedTask,
     OpenMLTask,
 )
+from .tasks.functions import get_task, list_tasks
 
 if TYPE_CHECKING:
     from ._config import OpenMLConfigManager
@@ -124,6 +128,14 @@ def populate_cache(
     "exceptions",
     "extensions",
     "flows",
+    "get_dataset",
+    "get_flow",
+    "get_run",
+    "get_task",
+    "list_datasets",
+    "list_flows",
+    "list_runs",
+    "list_tasks",
     "runs",
     "setups",
     "study",
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -364,6 +364,11 @@ def get_datasets(
     -------
     datasets : list of datasets
         A list of dataset objects.
+
+    Examples
+    --------
+    >>> import openml
+    >>> datasets = openml.datasets.get_datasets([1, 2, 3])  # doctest: +SKIP
     """
     datasets = []
     for dataset_id in dataset_ids:
@@ -446,6 +451,13 @@ def get_dataset(  # noqa: C901, PLR0912
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
+
+    Examples
+    --------
+    >>> import openml
+    >>> dataset = openml.datasets.get_dataset(1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset("iris", version=1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset(1, download_data=True)  # doctest: +SKIP
     """
     if download_all_files:
         warnings.warn(
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -103,6 +103,15 @@ def run_model_on_task(  # noqa: PLR0913
         Result of the run.
     flow : OpenMLFlow (optional, only if `return_flow` is True).
         Flow generated from the model.
+
+    Examples
+    --------
+    >>> import openml
+    >>> import openml_sklearn  # doctest: +SKIP
+    >>> from sklearn.tree import DecisionTreeClassifier  # doctest: +SKIP
+    >>> clf = DecisionTreeClassifier()  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(6)  # doctest: +SKIP
+    >>> run = openml.runs.run_model_on_task(clf, task)  # doctest: +SKIP
     """
     if avoid_duplicate_runs is None:
         avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -558,9 +567,14 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     )  # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`
 
     for n_fit, rep_no, fold_no, sample_no in jobs:
-        pred_y, proba_y, test_indices, test_y, inner_trace, user_defined_measures_fold = job_rvals[
-            n_fit - 1
-        ]
+        (
+            pred_y,
+            proba_y,
+            test_indices,
+            test_y,
+            inner_trace,
+            user_defined_measures_fold,
+        ) = job_rvals[n_fit - 1]
 
         if inner_trace is not None:
             traces.append(inner_trace)
@@ -845,7 +859,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
+def _create_run_from_xml(  # noqa: PLR0915, PLR0912, C901
+    xml: str,
+    from_server: bool = True,  # noqa: FBT002
+) -> OpenMLRun:
     """Create a run object from xml returned from server.
 
     Parameters
diff --git a/openml/study/functions.py b/openml/study/functions.py
@@ -29,6 +29,12 @@ def get_suite(suite_id: int | str) -> OpenMLBenchmarkSuite:
     -------
     OpenMLSuite
         The OpenML suite object
+
+    Examples
+    --------
+    >>> import openml
+    >>> suite = openml.study.get_suite(99)  # doctest: +SKIP
+    >>> suite = openml.study.get_suite("OpenML-CC18")  # doctest: +SKIP
     """
     study = _get_study(suite_id, entity_type="task")
     assert isinstance(study, OpenMLBenchmarkSuite)
@@ -58,6 +64,11 @@ def get_study(
     -------
     OpenMLStudy
         The OpenML study object
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.get_study(1)  # doctest: +SKIP
     """
     if study_id == "OpenML100":
         message = (
@@ -108,7 +119,10 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
     tags = []
     if "oml:tag" in result_dict:
         for tag in result_dict["oml:tag"]:
-            current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
+            current_tag = {
+                "name": tag["oml:name"],
+                "write_access": tag["oml:write_access"],
+            }
             if "oml:window_start" in tag:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
@@ -209,6 +223,15 @@ def create_study(
     -------
     OpenMLStudy
         A local OpenML study object (call publish method to upload to server)
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.create_study(  # doctest: +SKIP
+    ...     name="My Study",
+    ...     description="A study on decision trees",
+    ...     run_ids=[1, 2, 3],
+    ... )
     """
     return OpenMLStudy(
         study_id=None,
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -338,7 +338,11 @@ def get_tasks(
     tasks = []
     for task_id in task_ids:
         tasks.append(
-            get_task(task_id, download_data=download_data, download_qualities=download_qualities)
+            get_task(
+                task_id,
+                download_data=download_data,
+                download_qualities=download_qualities,
+            )
         )
     return tasks
 
@@ -369,6 +373,12 @@ def get_task(
     Returns
     -------
     task: OpenMLTask
+
+    Examples
+    --------
+    >>> import openml
+    >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(1, download_splits=True)  # doctest: +SKIP
     """
     if not isinstance(task_id, int):
         raise TypeError(f"Task id should be integer, is {type(task_id)}")
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
@@ -41,3 +41,17 @@ def test_populate_cache(
         assert task_mock.call_count == 2
         for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
             assert argument[0] == fixture
+
+    def test_top_level_getters_aliases(self):
+        # Ensure top-level convenience aliases point to existing implementations.
+        assert openml.list_datasets is openml.datasets.list_datasets
+        assert openml.get_dataset is openml.datasets.get_dataset
+
+        assert openml.list_flows is openml.flows.list_flows
+        assert openml.get_flow is openml.flows.get_flow
+
+        assert openml.list_runs is openml.runs.list_runs
+        assert openml.get_run is openml.runs.get_run
+
+        assert openml.list_tasks is openml.tasks.list_tasks
+        assert openml.get_task is openml.tasks.get_task