Merge branch 'main' into runs-migration-stacked

Omswastik-11 · web-flow · commit 65ee8646e30d · 2026-05-11T18:05:03.000+05:30
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
@@ -84,6 +84,17 @@ class StudyAPI(ResourceAPI):
 
     resource_type: ResourceType = ResourceType.STUDY
 
+    @abstractmethod
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> pd.DataFrame: ...
+
 
 class RunAPI(ResourceAPI):
     """Abstract API interface for run resources."""
diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py
@@ -25,6 +25,7 @@
     ResourceType.DATASET,
     ResourceType.TASK,
     ResourceType.FLOW,
+    ResourceType.STUDY,
     ResourceType.SETUP,
     ResourceType.RUN,
 ]
diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py
@@ -1,11 +1,162 @@
 from __future__ import annotations
 
-from .base import ResourceV1API, ResourceV2API, StudyAPI
+import builtins
+
+import pandas as pd
+import xmltodict
+
+from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI
 
 
 class StudyV1API(ResourceV1API, StudyAPI):
-    """Version 1 API implementation for study resources."""
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: builtins.list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> pd.DataFrame:
+        """List studies using V1 API.
+
+        Parameters
+        ----------
+        limit : int, optional
+            Maximum number of studies to return.
+        offset : int, optional
+            Number of studies to skip.
+        status : str, optional
+            Filter by status (active, in_preparation, deactivated, all).
+        main_entity_type : str, optional
+            Filter by main entity type (run, task).
+        uploader : list[int], optional
+            Filter by uploader IDs.
+        benchmark_suite : int, optional
+            Filter by benchmark suite ID.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame containing study information.
+        """
+        api_call = self._build_url(
+            limit=limit,
+            offset=offset,
+            status=status,
+            main_entity_type=main_entity_type,
+            uploader=uploader,
+            benchmark_suite=benchmark_suite,
+        )
+        response = self._http.get(api_call)
+        xml_string = response.content.decode("utf-8")
+        return self._parse_list_xml(xml_string)
+
+    @staticmethod
+    def _build_url(  # noqa: PLR0913
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: builtins.list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> str:
+        """Build the V1 API URL for listing studies.
+
+        Parameters
+        ----------
+        limit : int, optional
+            Maximum number of studies to return.
+        offset : int, optional
+            Number of studies to skip.
+        status : str, optional
+            Filter by status (active, in_preparation, deactivated, all).
+        main_entity_type : str, optional
+            Filter by main entity type (run, task).
+        uploader : list[int], optional
+            Filter by uploader IDs.
+        benchmark_suite : int, optional
+            Filter by benchmark suite ID.
+
+        Returns
+        -------
+        str
+            The API call string with all filters applied.
+        """
+        api_call = "study/list"
+
+        if limit is not None:
+            api_call += f"/limit/{limit}"
+        if offset is not None:
+            api_call += f"/offset/{offset}"
+        if status is not None:
+            api_call += f"/status/{status}"
+        if main_entity_type is not None:
+            api_call += f"/main_entity_type/{main_entity_type}"
+        if uploader is not None:
+            api_call += f"/uploader/{','.join(str(u) for u in uploader)}"
+        if benchmark_suite is not None:
+            api_call += f"/benchmark_suite/{benchmark_suite}"
+
+        return api_call
+
+    @staticmethod
+    def _parse_list_xml(xml_string: str) -> pd.DataFrame:
+        """Parse the XML response from study list API.
+
+        Parameters
+        ----------
+        xml_string : str
+            The XML response from the API.
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame containing study information.
+        """
+        study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
+
+        # Minimalistic check if the XML is useful
+        assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type(
+            study_dict["oml:study_list"],
+        )
+        assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
+            "oml:study_list"
+        ]["@xmlns:oml"]
+
+        studies = {}
+        for study_ in study_dict["oml:study_list"]["oml:study"]:
+            # maps from xml name to a tuple of (dict name, casting fn)
+            expected_fields = {
+                "oml:id": ("id", int),
+                "oml:alias": ("alias", str),
+                "oml:main_entity_type": ("main_entity_type", str),
+                "oml:benchmark_suite": ("benchmark_suite", int),
+                "oml:name": ("name", str),
+                "oml:status": ("status", str),
+                "oml:creation_date": ("creation_date", str),
+                "oml:creator": ("creator", int),
+            }
+            study_id = int(study_["oml:id"])
+            current_study = {}
+            for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
+                if oml_field_name in study_:
+                    current_study[real_field_name] = cast_fn(study_[oml_field_name])
+            current_study["id"] = int(current_study["id"])
+            studies[study_id] = current_study
+
+        return pd.DataFrame.from_dict(studies, orient="index")
 
 
 class StudyV2API(ResourceV2API, StudyAPI):
-    """Version 2 API implementation for study resources."""
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,  # noqa: ARG002
+        offset: int | None = None,  # noqa: ARG002
+        status: str | None = None,  # noqa: ARG002
+        main_entity_type: str | None = None,  # noqa: ARG002
+        uploader: builtins.list[int] | None = None,  # noqa: ARG002
+        benchmark_suite: int | None = None,  # noqa: ARG002
+    ) -> pd.DataFrame:
+        """V2 API for listing studies is not yet available."""
+        self._not_supported(method="list")
diff --git a/openml/study/functions.py b/openml/study/functions.py
@@ -3,7 +3,7 @@
 
 import warnings
 from functools import partial
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import xmltodict
@@ -336,7 +336,8 @@ def delete_study(study_id: int) -> bool:
     bool
         True iff the deletion was successful. False otherwise
     """
-    return openml.utils._delete_entity("study", study_id)
+    result: bool = openml._backend.study.delete(study_id)
+    return result
 
 
 def attach_to_suite(suite_id: int, task_ids: list[int]) -> int:
@@ -465,7 +466,7 @@ def list_suites(
         - creation_date
     """
     listing_call = partial(
-        _list_studies,
+        openml._backend.study.list,
         main_entity_type="task",
         status=status,
         uploader=uploader,
@@ -481,7 +482,7 @@ def list_studies(
     offset: int | None = None,
     size: int | None = None,
     status: str | None = None,
-    uploader: list[str] | None = None,
+    uploader: list[int] | None = None,
     benchmark_suite: int | None = None,
 ) -> pd.DataFrame:
     """
@@ -516,7 +517,7 @@ def list_studies(
         these are also returned.
     """
     listing_call = partial(
-        _list_studies,
+        openml._backend.study.list,
         main_entity_type="run",
         status=status,
         uploader=uploader,
@@ -527,81 +528,3 @@ def list_studies(
         return pd.DataFrame()
 
     return pd.concat(batches)
-
-
-def _list_studies(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
-    """Perform api call to return a list of studies.
-
-    Parameters
-    ----------
-    limit: int
-        The maximum number of studies to return.
-    offset: int
-        The number of studies to skip, starting from the first.
-    kwargs : dict, optional
-        Legal filter operators (keys in the dict):
-        status, main_entity_type, uploader, benchmark_suite
-
-    Returns
-    -------
-    studies : dataframe
-    """
-    api_call = "study/list"
-    if limit is not None:
-        api_call += f"/limit/{limit}"
-    if offset is not None:
-        api_call += f"/offset/{offset}"
-    if kwargs is not None:
-        for operator, value in kwargs.items():
-            if value is not None:
-                api_call += f"/{operator}/{value}"
-    return __list_studies(api_call=api_call)
-
-
-def __list_studies(api_call: str) -> pd.DataFrame:
-    """Retrieves the list of OpenML studies and
-    returns it in a dictionary or a Pandas DataFrame.
-
-    Parameters
-    ----------
-    api_call : str
-        The API call for retrieving the list of OpenML studies.
-
-    Returns
-    -------
-    pd.DataFrame
-        A Pandas DataFrame of OpenML studies
-    """
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
-
-    # Minimalistic check if the XML is useful
-    assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type(
-        study_dict["oml:study_list"],
-    )
-    assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
-        "oml:study_list"
-    ]["@xmlns:oml"]
-
-    studies = {}
-    for study_ in study_dict["oml:study_list"]["oml:study"]:
-        # maps from xml name to a tuple of (dict name, casting fn)
-        expected_fields = {
-            "oml:id": ("id", int),
-            "oml:alias": ("alias", str),
-            "oml:main_entity_type": ("main_entity_type", str),
-            "oml:benchmark_suite": ("benchmark_suite", int),
-            "oml:name": ("name", str),
-            "oml:status": ("status", str),
-            "oml:creation_date": ("creation_date", str),
-            "oml:creator": ("creator", int),
-        }
-        study_id = int(study_["oml:id"])
-        current_study = {}
-        for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
-            if oml_field_name in study_:
-                current_study[real_field_name] = cast_fn(study_[oml_field_name])
-        current_study["id"] = int(current_study["id"])
-        studies[study_id] = current_study
-
-    return pd.DataFrame.from_dict(studies, orient="index")
diff --git a/tests/test_api/test_study.py b/tests/test_api/test_study.py

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@`
`25`	`25`	`ResourceType.DATASET,`
`26`	`26`	`ResourceType.TASK,`
`27`	`27`	`ResourceType.FLOW,`
	`28`	`+ ResourceType.STUDY,`
`28`	`29`	`ResourceType.SETUP,`
`29`	`30`	`ResourceType.RUN,`
`30`	`31`	`]`