Skip to content

Commit 707e1f1

Browse files
committed
publish,tag methods need testing
1 parent be29dc9 commit 707e1f1

3 files changed

Lines changed: 54 additions & 32 deletions

File tree

openml/_api/resources/dataset.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from openml.datasets.dataset import OpenMLDataset
1919
from openml.exceptions import (
2020
OpenMLHashException,
21-
OpenMLMinioRequiredError,
2221
OpenMLPrivateDatasetError,
2322
OpenMLServerException,
2423
)
@@ -466,9 +465,7 @@ def parse_features_file(
466465
) -> dict[int, OpenMLDataFeature]:
467466
if features_pickle_file is None:
468467
features_pickle_file = features_file.with_suffix(features_file.suffix + ".pkl")
469-
if features_file.suffix != ".xml":
470-
# TODO (Shrivaths) can only parse xml warn/ raise exception
471-
raise NotImplementedError()
468+
assert features_file.suffix == ".xml"
472469

473470
with Path(features_file).open("r", encoding="utf8") as fh:
474471
features_xml = fh.read()
@@ -485,9 +482,7 @@ def parse_qualities_file(
485482
) -> dict[str, float]:
486483
if qualities_pickle_file is None:
487484
qualities_pickle_file = qualities_file.with_suffix(qualities_file.suffix + ".pkl")
488-
if qualities_file.suffix != ".xml":
489-
# TODO (Shrivaths) can only parse xml warn/ raise exception
490-
raise NotImplementedError()
485+
assert qualities_file.suffix == ".xml"
491486

492487
with Path(qualities_file).open("r", encoding="utf8") as fh:
493488
qualities_xml = fh.read()
@@ -582,25 +577,19 @@ def __handler(response: Response, path: Path, encoding: str) -> Path:
582577

583578
def download_features_file(self, dataset_id: int) -> Path:
584579
path = f"data/features/{dataset_id}"
585-
file = self._download_file(path, "features.xml")
586-
_ = self.parse_features_file(file)
587-
return file
580+
return self._download_file(path, "features.xml")
588581

589582
def download_qualities_file(self, dataset_id: int) -> Path:
590583
path = f"data/qualities/{dataset_id}"
591-
file = self._download_file(path, "qualities.xml")
592-
_ = self.parse_qualities_file(file)
593-
return file
584+
return self._download_file(path, "qualities.xml")
594585

595586
def download_dataset_parquet(
596587
self,
597588
description: dict | OpenMLDataset,
598589
download_all_files: bool = False, # noqa: FBT002
599590
) -> Path | None:
600-
if self._minio is None:
601-
raise OpenMLMinioRequiredError(
602-
"A minio object is required for Dataset, but none was provided"
603-
)
591+
assert self._minio is not None # for mypy
592+
604593
if isinstance(description, dict):
605594
url = str(description.get("oml:parquet_url"))
606595
elif isinstance(description, OpenMLDataset):
@@ -1026,25 +1015,19 @@ def __handler(response: Response, path: Path, encoding: str) -> Path:
10261015

10271016
def download_features_file(self, dataset_id: int) -> Path:
10281017
path = f"datasets/features/{dataset_id}"
1029-
file = self._download_file(path, "features.json")
1030-
_ = self.parse_features_file(file)
1031-
return file
1018+
return self._download_file(path, "features.json")
10321019

10331020
def download_qualities_file(self, dataset_id: int) -> Path:
10341021
path = f"datasets/qualities/{dataset_id}"
1035-
file = self._download_file(path, "qualities.json")
1036-
_ = self.parse_qualities_file(file)
1037-
return file
1022+
return self._download_file(path, "qualities.json")
10381023

10391024
def download_dataset_parquet(
10401025
self,
10411026
description: dict | OpenMLDataset,
10421027
download_all_files: bool = False, # noqa: FBT002
10431028
) -> Path | None:
1044-
if self._minio is None:
1045-
raise OpenMLMinioRequiredError(
1046-
"A minio object is required for Dataset, but none was provided"
1047-
)
1029+
assert self._minio is not None # for mypy
1030+
10481031
if isinstance(description, dict):
10491032
url = str(description.get("parquet_url"))
10501033
elif isinstance(description, OpenMLDataset):
@@ -1095,7 +1078,7 @@ def delete_topic(self, data_id: int, topic: str) -> int:
10951078
raise NotImplementedError(self._not_supported(method="delete_topic"))
10961079

10971080
def get_online_dataset_format(self, dataset_id: int) -> str:
1098-
dataset_json = self._http.get(f"datasets/{dataset_id}").text
1081+
dataset_json = self._http.get(f"datasets/{dataset_id}").json()
10991082
# build a dict from the xml and get the format from the dataset description
11001083
return dataset_json["data_set_description"]["format"].lower() # type: ignore
11011084

openml/datasets/dataset.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,49 @@ def _parse_publish_response(self, xml_response: dict) -> None:
949949
"""Parse the id from the xml_response and assign it to self."""
950950
self.dataset_id = int(xml_response["oml:upload_data_set"]["oml:id"])
951951

952+
def publish(self) -> OpenMLDataset:
953+
"""Publish this flow to OpenML server.
954+
955+
Returns
956+
-------
957+
self : OpenMLFlow
958+
"""
959+
file_elements = self._get_file_elements()
960+
if "description" not in file_elements:
961+
file_elements["description"] = self._to_xml()
962+
963+
dataset_id = openml._backend.dataset.publish(path="data", files=file_elements)
964+
self.dataset_id = dataset_id
965+
return self
966+
967+
def push_tag(self, tag: str) -> None:
968+
"""Annotates this dataset with a tag on the server.
969+
970+
Parameters
971+
----------
972+
tag : str
973+
Tag to attach to the dataset.
974+
"""
975+
if self.dataset_id is None:
976+
raise ValueError(
977+
"Dataset does not have an ID. Please publish the dataset before tagging."
978+
)
979+
openml._backend.dataset.tag(self.dataset_id, tag)
980+
981+
def remove_tag(self, tag: str) -> None:
982+
"""Removes a tag from this dataset on the server.
983+
984+
Parameters
985+
----------
986+
tag : str
987+
Tag to remove from the dataset.
988+
"""
989+
if self.dataset_id is None:
990+
raise ValueError(
991+
"Dataset does not have an ID. Please publish the dataset before untagging."
992+
)
993+
openml._backend.dataset.untag(self.dataset_id, tag)
994+
952995
def _to_dict(self) -> dict[str, dict]:
953996
"""Creates a dictionary representation of self."""
954997
props = [

openml/exceptions.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,3 @@ class OpenMLNotSupportedError(PyOpenMLError):
7373

7474
class OpenMLCacheRequiredError(PyOpenMLError):
7575
"""Raised when a cache object is required but not provided."""
76-
77-
78-
class OpenMLMinioRequiredError(PyOpenMLError):
79-
"""Raised when a minio object is required but not provided"""

0 commit comments

Comments
 (0)