1818from openml .datasets .dataset import OpenMLDataset
1919from openml .exceptions import (
2020 OpenMLHashException ,
21- OpenMLMinioRequiredError ,
2221 OpenMLPrivateDatasetError ,
2322 OpenMLServerException ,
2423)
@@ -466,9 +465,7 @@ def parse_features_file(
466465 ) -> dict [int , OpenMLDataFeature ]:
467466 if features_pickle_file is None :
468467 features_pickle_file = features_file .with_suffix (features_file .suffix + ".pkl" )
469- if features_file .suffix != ".xml" :
470- # TODO (Shrivaths) can only parse xml warn/ raise exception
471- raise NotImplementedError ()
468+ assert features_file .suffix == ".xml"
472469
473470 with Path (features_file ).open ("r" , encoding = "utf8" ) as fh :
474471 features_xml = fh .read ()
@@ -485,9 +482,7 @@ def parse_qualities_file(
485482 ) -> dict [str , float ]:
486483 if qualities_pickle_file is None :
487484 qualities_pickle_file = qualities_file .with_suffix (qualities_file .suffix + ".pkl" )
488- if qualities_file .suffix != ".xml" :
489- # TODO (Shrivaths) can only parse xml warn/ raise exception
490- raise NotImplementedError ()
485+ assert qualities_file .suffix == ".xml"
491486
492487 with Path (qualities_file ).open ("r" , encoding = "utf8" ) as fh :
493488 qualities_xml = fh .read ()
@@ -582,25 +577,19 @@ def __handler(response: Response, path: Path, encoding: str) -> Path:
582577
583578 def download_features_file (self , dataset_id : int ) -> Path :
584579 path = f"data/features/{ dataset_id } "
585- file = self ._download_file (path , "features.xml" )
586- _ = self .parse_features_file (file )
587- return file
580+ return self ._download_file (path , "features.xml" )
588581
589582 def download_qualities_file (self , dataset_id : int ) -> Path :
590583 path = f"data/qualities/{ dataset_id } "
591- file = self ._download_file (path , "qualities.xml" )
592- _ = self .parse_qualities_file (file )
593- return file
584+ return self ._download_file (path , "qualities.xml" )
594585
595586 def download_dataset_parquet (
596587 self ,
597588 description : dict | OpenMLDataset ,
598589 download_all_files : bool = False , # noqa: FBT002
599590 ) -> Path | None :
600- if self ._minio is None :
601- raise OpenMLMinioRequiredError (
602- "A minio object is required for Dataset, but none was provided"
603- )
591+ assert self ._minio is not None # for mypy
592+
604593 if isinstance (description , dict ):
605594 url = str (description .get ("oml:parquet_url" ))
606595 elif isinstance (description , OpenMLDataset ):
@@ -1026,25 +1015,19 @@ def __handler(response: Response, path: Path, encoding: str) -> Path:
10261015
10271016 def download_features_file (self , dataset_id : int ) -> Path :
10281017 path = f"datasets/features/{ dataset_id } "
1029- file = self ._download_file (path , "features.json" )
1030- _ = self .parse_features_file (file )
1031- return file
1018+ return self ._download_file (path , "features.json" )
10321019
10331020 def download_qualities_file (self , dataset_id : int ) -> Path :
10341021 path = f"datasets/qualities/{ dataset_id } "
1035- file = self ._download_file (path , "qualities.json" )
1036- _ = self .parse_qualities_file (file )
1037- return file
1022+ return self ._download_file (path , "qualities.json" )
10381023
10391024 def download_dataset_parquet (
10401025 self ,
10411026 description : dict | OpenMLDataset ,
10421027 download_all_files : bool = False , # noqa: FBT002
10431028 ) -> Path | None :
1044- if self ._minio is None :
1045- raise OpenMLMinioRequiredError (
1046- "A minio object is required for Dataset, but none was provided"
1047- )
1029+ assert self ._minio is not None # for mypy
1030+
10481031 if isinstance (description , dict ):
10491032 url = str (description .get ("parquet_url" ))
10501033 elif isinstance (description , OpenMLDataset ):
@@ -1095,7 +1078,7 @@ def delete_topic(self, data_id: int, topic: str) -> int:
10951078 raise NotImplementedError (self ._not_supported (method = "delete_topic" ))
10961079
10971080 def get_online_dataset_format (self , dataset_id : int ) -> str :
1098- dataset_json = self ._http .get (f"datasets/{ dataset_id } " ).text
1081+ dataset_json = self ._http .get (f"datasets/{ dataset_id } " ).json ()
10991082 # build a dict from the xml and get the format from the dataset description
11001083 return dataset_json ["data_set_description" ]["format" ].lower () # type: ignore
11011084
0 commit comments