Skip to content

Commit 65ee864

Browse files
authored
Merge branch 'main' into runs-migration-stacked
2 parents 74eb7ae + c62bf51 commit 65ee864

5 files changed

Lines changed: 393 additions & 86 deletions

File tree

openml/_api/resources/base/resources.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,17 @@ class StudyAPI(ResourceAPI):
8484

8585
resource_type: ResourceType = ResourceType.STUDY
8686

87+
@abstractmethod
88+
def list( # noqa: PLR0913
89+
self,
90+
limit: int | None = None,
91+
offset: int | None = None,
92+
status: str | None = None,
93+
main_entity_type: str | None = None,
94+
uploader: list[int] | None = None,
95+
benchmark_suite: int | None = None,
96+
) -> pd.DataFrame: ...
97+
8798

8899
class RunAPI(ResourceAPI):
89100
"""Abstract API interface for run resources."""

openml/_api/resources/base/versions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ResourceType.DATASET,
2626
ResourceType.TASK,
2727
ResourceType.FLOW,
28+
ResourceType.STUDY,
2829
ResourceType.SETUP,
2930
ResourceType.RUN,
3031
]

openml/_api/resources/study.py

Lines changed: 154 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,162 @@
11
from __future__ import annotations
22

3-
from .base import ResourceV1API, ResourceV2API, StudyAPI
3+
import builtins
4+
5+
import pandas as pd
6+
import xmltodict
7+
8+
from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI
49

510

611
class StudyV1API(ResourceV1API, StudyAPI):
7-
"""Version 1 API implementation for study resources."""
12+
def list( # noqa: PLR0913
13+
self,
14+
limit: int | None = None,
15+
offset: int | None = None,
16+
status: str | None = None,
17+
main_entity_type: str | None = None,
18+
uploader: builtins.list[int] | None = None,
19+
benchmark_suite: int | None = None,
20+
) -> pd.DataFrame:
21+
"""List studies using V1 API.
22+
23+
Parameters
24+
----------
25+
limit : int, optional
26+
Maximum number of studies to return.
27+
offset : int, optional
28+
Number of studies to skip.
29+
status : str, optional
30+
Filter by status (active, in_preparation, deactivated, all).
31+
main_entity_type : str, optional
32+
Filter by main entity type (run, task).
33+
uploader : list[int], optional
34+
Filter by uploader IDs.
35+
benchmark_suite : int, optional
36+
Filter by benchmark suite ID.
37+
38+
Returns
39+
-------
40+
pd.DataFrame
41+
DataFrame containing study information.
42+
"""
43+
api_call = self._build_url(
44+
limit=limit,
45+
offset=offset,
46+
status=status,
47+
main_entity_type=main_entity_type,
48+
uploader=uploader,
49+
benchmark_suite=benchmark_suite,
50+
)
51+
response = self._http.get(api_call)
52+
xml_string = response.content.decode("utf-8")
53+
return self._parse_list_xml(xml_string)
54+
55+
@staticmethod
56+
def _build_url( # noqa: PLR0913
57+
limit: int | None = None,
58+
offset: int | None = None,
59+
status: str | None = None,
60+
main_entity_type: str | None = None,
61+
uploader: builtins.list[int] | None = None,
62+
benchmark_suite: int | None = None,
63+
) -> str:
64+
"""Build the V1 API URL for listing studies.
65+
66+
Parameters
67+
----------
68+
limit : int, optional
69+
Maximum number of studies to return.
70+
offset : int, optional
71+
Number of studies to skip.
72+
status : str, optional
73+
Filter by status (active, in_preparation, deactivated, all).
74+
main_entity_type : str, optional
75+
Filter by main entity type (run, task).
76+
uploader : list[int], optional
77+
Filter by uploader IDs.
78+
benchmark_suite : int, optional
79+
Filter by benchmark suite ID.
80+
81+
Returns
82+
-------
83+
str
84+
The API call string with all filters applied.
85+
"""
86+
api_call = "study/list"
87+
88+
if limit is not None:
89+
api_call += f"/limit/{limit}"
90+
if offset is not None:
91+
api_call += f"/offset/{offset}"
92+
if status is not None:
93+
api_call += f"/status/{status}"
94+
if main_entity_type is not None:
95+
api_call += f"/main_entity_type/{main_entity_type}"
96+
if uploader is not None:
97+
api_call += f"/uploader/{','.join(str(u) for u in uploader)}"
98+
if benchmark_suite is not None:
99+
api_call += f"/benchmark_suite/{benchmark_suite}"
100+
101+
return api_call
102+
103+
@staticmethod
104+
def _parse_list_xml(xml_string: str) -> pd.DataFrame:
105+
"""Parse the XML response from study list API.
106+
107+
Parameters
108+
----------
109+
xml_string : str
110+
The XML response from the API.
111+
112+
Returns
113+
-------
114+
pd.DataFrame
115+
DataFrame containing study information.
116+
"""
117+
study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
118+
119+
# Minimalistic check if the XML is useful
120+
assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type(
121+
study_dict["oml:study_list"],
122+
)
123+
assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
124+
"oml:study_list"
125+
]["@xmlns:oml"]
126+
127+
studies = {}
128+
for study_ in study_dict["oml:study_list"]["oml:study"]:
129+
# maps from xml name to a tuple of (dict name, casting fn)
130+
expected_fields = {
131+
"oml:id": ("id", int),
132+
"oml:alias": ("alias", str),
133+
"oml:main_entity_type": ("main_entity_type", str),
134+
"oml:benchmark_suite": ("benchmark_suite", int),
135+
"oml:name": ("name", str),
136+
"oml:status": ("status", str),
137+
"oml:creation_date": ("creation_date", str),
138+
"oml:creator": ("creator", int),
139+
}
140+
study_id = int(study_["oml:id"])
141+
current_study = {}
142+
for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
143+
if oml_field_name in study_:
144+
current_study[real_field_name] = cast_fn(study_[oml_field_name])
145+
current_study["id"] = int(current_study["id"])
146+
studies[study_id] = current_study
147+
148+
return pd.DataFrame.from_dict(studies, orient="index")
8149

9150

10151
class StudyV2API(ResourceV2API, StudyAPI):
11-
"""Version 2 API implementation for study resources."""
152+
def list( # noqa: PLR0913
153+
self,
154+
limit: int | None = None, # noqa: ARG002
155+
offset: int | None = None, # noqa: ARG002
156+
status: str | None = None, # noqa: ARG002
157+
main_entity_type: str | None = None, # noqa: ARG002
158+
uploader: builtins.list[int] | None = None, # noqa: ARG002
159+
benchmark_suite: int | None = None, # noqa: ARG002
160+
) -> pd.DataFrame:
161+
"""V2 API for listing studies is not yet available."""
162+
self._not_supported(method="list")

openml/study/functions.py

Lines changed: 6 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import warnings
55
from functools import partial
6-
from typing import TYPE_CHECKING, Any
6+
from typing import TYPE_CHECKING
77

88
import pandas as pd
99
import xmltodict
@@ -336,7 +336,8 @@ def delete_study(study_id: int) -> bool:
336336
bool
337337
True iff the deletion was successful. False otherwise
338338
"""
339-
return openml.utils._delete_entity("study", study_id)
339+
result: bool = openml._backend.study.delete(study_id)
340+
return result
340341

341342

342343
def attach_to_suite(suite_id: int, task_ids: list[int]) -> int:
@@ -465,7 +466,7 @@ def list_suites(
465466
- creation_date
466467
"""
467468
listing_call = partial(
468-
_list_studies,
469+
openml._backend.study.list,
469470
main_entity_type="task",
470471
status=status,
471472
uploader=uploader,
@@ -481,7 +482,7 @@ def list_studies(
481482
offset: int | None = None,
482483
size: int | None = None,
483484
status: str | None = None,
484-
uploader: list[str] | None = None,
485+
uploader: list[int] | None = None,
485486
benchmark_suite: int | None = None,
486487
) -> pd.DataFrame:
487488
"""
@@ -516,7 +517,7 @@ def list_studies(
516517
these are also returned.
517518
"""
518519
listing_call = partial(
519-
_list_studies,
520+
openml._backend.study.list,
520521
main_entity_type="run",
521522
status=status,
522523
uploader=uploader,
@@ -527,81 +528,3 @@ def list_studies(
527528
return pd.DataFrame()
528529

529530
return pd.concat(batches)
530-
531-
532-
def _list_studies(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
533-
"""Perform api call to return a list of studies.
534-
535-
Parameters
536-
----------
537-
limit: int
538-
The maximum number of studies to return.
539-
offset: int
540-
The number of studies to skip, starting from the first.
541-
kwargs : dict, optional
542-
Legal filter operators (keys in the dict):
543-
status, main_entity_type, uploader, benchmark_suite
544-
545-
Returns
546-
-------
547-
studies : dataframe
548-
"""
549-
api_call = "study/list"
550-
if limit is not None:
551-
api_call += f"/limit/{limit}"
552-
if offset is not None:
553-
api_call += f"/offset/{offset}"
554-
if kwargs is not None:
555-
for operator, value in kwargs.items():
556-
if value is not None:
557-
api_call += f"/{operator}/{value}"
558-
return __list_studies(api_call=api_call)
559-
560-
561-
def __list_studies(api_call: str) -> pd.DataFrame:
562-
"""Retrieves the list of OpenML studies and
563-
returns it in a dictionary or a Pandas DataFrame.
564-
565-
Parameters
566-
----------
567-
api_call : str
568-
The API call for retrieving the list of OpenML studies.
569-
570-
Returns
571-
-------
572-
pd.DataFrame
573-
A Pandas DataFrame of OpenML studies
574-
"""
575-
xml_string = openml._api_calls._perform_api_call(api_call, "get")
576-
study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
577-
578-
# Minimalistic check if the XML is useful
579-
assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type(
580-
study_dict["oml:study_list"],
581-
)
582-
assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
583-
"oml:study_list"
584-
]["@xmlns:oml"]
585-
586-
studies = {}
587-
for study_ in study_dict["oml:study_list"]["oml:study"]:
588-
# maps from xml name to a tuple of (dict name, casting fn)
589-
expected_fields = {
590-
"oml:id": ("id", int),
591-
"oml:alias": ("alias", str),
592-
"oml:main_entity_type": ("main_entity_type", str),
593-
"oml:benchmark_suite": ("benchmark_suite", int),
594-
"oml:name": ("name", str),
595-
"oml:status": ("status", str),
596-
"oml:creation_date": ("creation_date", str),
597-
"oml:creator": ("creator", int),
598-
}
599-
study_id = int(study_["oml:id"])
600-
current_study = {}
601-
for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
602-
if oml_field_name in study_:
603-
current_study[real_field_name] = cast_fn(study_[oml_field_name])
604-
current_study["id"] = int(current_study["id"])
605-
studies[study_id] = current_study
606-
607-
return pd.DataFrame.from_dict(studies, orient="index")

0 commit comments

Comments
 (0)