|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -from .base import ResourceV1API, ResourceV2API, StudyAPI |
| 3 | +import builtins |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import xmltodict |
| 7 | + |
| 8 | +from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI |
4 | 9 |
|
5 | 10 |
|
6 | 11 | class StudyV1API(ResourceV1API, StudyAPI): |
7 | | - """Version 1 API implementation for study resources.""" |
| 12 | + def list( # noqa: PLR0913 |
| 13 | + self, |
| 14 | + limit: int | None = None, |
| 15 | + offset: int | None = None, |
| 16 | + status: str | None = None, |
| 17 | + main_entity_type: str | None = None, |
| 18 | + uploader: builtins.list[int] | None = None, |
| 19 | + benchmark_suite: int | None = None, |
| 20 | + ) -> pd.DataFrame: |
| 21 | + """List studies using V1 API. |
| 22 | +
|
| 23 | + Parameters |
| 24 | + ---------- |
| 25 | + limit : int, optional |
| 26 | + Maximum number of studies to return. |
| 27 | + offset : int, optional |
| 28 | + Number of studies to skip. |
| 29 | + status : str, optional |
| 30 | + Filter by status (active, in_preparation, deactivated, all). |
| 31 | + main_entity_type : str, optional |
| 32 | + Filter by main entity type (run, task). |
| 33 | + uploader : list[int], optional |
| 34 | + Filter by uploader IDs. |
| 35 | + benchmark_suite : int, optional |
| 36 | + Filter by benchmark suite ID. |
| 37 | +
|
| 38 | + Returns |
| 39 | + ------- |
| 40 | + pd.DataFrame |
| 41 | + DataFrame containing study information. |
| 42 | + """ |
| 43 | + api_call = self._build_url( |
| 44 | + limit=limit, |
| 45 | + offset=offset, |
| 46 | + status=status, |
| 47 | + main_entity_type=main_entity_type, |
| 48 | + uploader=uploader, |
| 49 | + benchmark_suite=benchmark_suite, |
| 50 | + ) |
| 51 | + response = self._http.get(api_call) |
| 52 | + xml_string = response.content.decode("utf-8") |
| 53 | + return self._parse_list_xml(xml_string) |
| 54 | + |
| 55 | + @staticmethod |
| 56 | + def _build_url( # noqa: PLR0913 |
| 57 | + limit: int | None = None, |
| 58 | + offset: int | None = None, |
| 59 | + status: str | None = None, |
| 60 | + main_entity_type: str | None = None, |
| 61 | + uploader: builtins.list[int] | None = None, |
| 62 | + benchmark_suite: int | None = None, |
| 63 | + ) -> str: |
| 64 | + """Build the V1 API URL for listing studies. |
| 65 | +
|
| 66 | + Parameters |
| 67 | + ---------- |
| 68 | + limit : int, optional |
| 69 | + Maximum number of studies to return. |
| 70 | + offset : int, optional |
| 71 | + Number of studies to skip. |
| 72 | + status : str, optional |
| 73 | + Filter by status (active, in_preparation, deactivated, all). |
| 74 | + main_entity_type : str, optional |
| 75 | + Filter by main entity type (run, task). |
| 76 | + uploader : list[int], optional |
| 77 | + Filter by uploader IDs. |
| 78 | + benchmark_suite : int, optional |
| 79 | + Filter by benchmark suite ID. |
| 80 | +
|
| 81 | + Returns |
| 82 | + ------- |
| 83 | + str |
| 84 | + The API call string with all filters applied. |
| 85 | + """ |
| 86 | + api_call = "study/list" |
| 87 | + |
| 88 | + if limit is not None: |
| 89 | + api_call += f"/limit/{limit}" |
| 90 | + if offset is not None: |
| 91 | + api_call += f"/offset/{offset}" |
| 92 | + if status is not None: |
| 93 | + api_call += f"/status/{status}" |
| 94 | + if main_entity_type is not None: |
| 95 | + api_call += f"/main_entity_type/{main_entity_type}" |
| 96 | + if uploader is not None: |
| 97 | + api_call += f"/uploader/{','.join(str(u) for u in uploader)}" |
| 98 | + if benchmark_suite is not None: |
| 99 | + api_call += f"/benchmark_suite/{benchmark_suite}" |
| 100 | + |
| 101 | + return api_call |
| 102 | + |
| 103 | + @staticmethod |
| 104 | + def _parse_list_xml(xml_string: str) -> pd.DataFrame: |
| 105 | + """Parse the XML response from study list API. |
| 106 | +
|
| 107 | + Parameters |
| 108 | + ---------- |
| 109 | + xml_string : str |
| 110 | + The XML response from the API. |
| 111 | +
|
| 112 | + Returns |
| 113 | + ------- |
| 114 | + pd.DataFrame |
| 115 | + DataFrame containing study information. |
| 116 | + """ |
| 117 | + study_dict = xmltodict.parse(xml_string, force_list=("oml:study",)) |
| 118 | + |
| 119 | + # Minimalistic check if the XML is useful |
| 120 | + assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type( |
| 121 | + study_dict["oml:study_list"], |
| 122 | + ) |
| 123 | + assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[ |
| 124 | + "oml:study_list" |
| 125 | + ]["@xmlns:oml"] |
| 126 | + |
| 127 | + studies = {} |
| 128 | + for study_ in study_dict["oml:study_list"]["oml:study"]: |
| 129 | + # maps from xml name to a tuple of (dict name, casting fn) |
| 130 | + expected_fields = { |
| 131 | + "oml:id": ("id", int), |
| 132 | + "oml:alias": ("alias", str), |
| 133 | + "oml:main_entity_type": ("main_entity_type", str), |
| 134 | + "oml:benchmark_suite": ("benchmark_suite", int), |
| 135 | + "oml:name": ("name", str), |
| 136 | + "oml:status": ("status", str), |
| 137 | + "oml:creation_date": ("creation_date", str), |
| 138 | + "oml:creator": ("creator", int), |
| 139 | + } |
| 140 | + study_id = int(study_["oml:id"]) |
| 141 | + current_study = {} |
| 142 | + for oml_field_name, (real_field_name, cast_fn) in expected_fields.items(): |
| 143 | + if oml_field_name in study_: |
| 144 | + current_study[real_field_name] = cast_fn(study_[oml_field_name]) |
| 145 | + current_study["id"] = int(current_study["id"]) |
| 146 | + studies[study_id] = current_study |
| 147 | + |
| 148 | + return pd.DataFrame.from_dict(studies, orient="index") |
8 | 149 |
|
9 | 150 |
|
10 | 151 | class StudyV2API(ResourceV2API, StudyAPI): |
11 | | - """Version 2 API implementation for study resources.""" |
| 152 | + def list( # noqa: PLR0913 |
| 153 | + self, |
| 154 | + limit: int | None = None, # noqa: ARG002 |
| 155 | + offset: int | None = None, # noqa: ARG002 |
| 156 | + status: str | None = None, # noqa: ARG002 |
| 157 | + main_entity_type: str | None = None, # noqa: ARG002 |
| 158 | + uploader: builtins.list[int] | None = None, # noqa: ARG002 |
| 159 | + benchmark_suite: int | None = None, # noqa: ARG002 |
| 160 | + ) -> pd.DataFrame: |
| 161 | + """V2 API for listing studies is not yet available.""" |
| 162 | + self._not_supported(method="list") |
0 commit comments