@@ -531,27 +531,20 @@ def test_deletion_of_cache_dir(self):
531531 def test_deletion_of_cache_dir_faulty_download (self , patch ):
532532 patch .side_effect = Exception ("Boom!" )
533533 self .assertRaisesRegex (Exception , "Boom!" , openml .datasets .get_dataset , dataset_id = 1 )
534- datasets_cache_dir = os .path .join (self . workdir , "org" , " openml" , "test" , "datasets" )
534+ datasets_cache_dir = os .path .join (openml . config . get_cache_directory () , "datasets" )
535535 assert len (os .listdir (datasets_cache_dir )) == 0
536536
537537 @pytest .mark .test_server ()
538538 def test_publish_dataset (self ):
539- # lazy loading not possible as we need the arff-file.
540- openml .datasets .get_dataset (3 , download_data = True )
541- file_path = os .path .join (
542- openml .config .get_cache_directory (),
543- "datasets" ,
544- "3" ,
545- "dataset.arff" ,
546- )
539+ arff_file_path = self .static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
547540 dataset = OpenMLDataset (
548541 "anneal" ,
549542 "test" ,
550543 data_format = "arff" ,
551544 version = 1 ,
552545 licence = "public" ,
553546 default_target_attribute = "class" ,
554- data_file = file_path ,
547+ data_file = arff_file_path ,
555548 )
556549 dataset .publish ()
557550 TestBase ._mark_entity_for_removal ("data" , dataset .dataset_id )
@@ -895,7 +888,7 @@ def test_create_invalid_dataset(self):
895888
896889 @pytest .mark .test_server ()
897890 def test_get_online_dataset_arff (self ):
898- dataset_id = 100 # Australian
891+ dataset_id = 128 # iris -- one of the few datasets without parquet file
899892 # lazy loading not used as arff file is checked.
900893 dataset = openml .datasets .get_dataset (dataset_id , download_data = True )
901894 decoder = arff .ArffDecoder ()
@@ -1473,8 +1466,9 @@ def test_data_edit_critical_field(self):
14731466 raise e
14741467 time .sleep (10 )
14751468 # Delete the cache dir to get the newer version of the dataset
1469+
14761470 shutil .rmtree (
1477- os .path .join (self . workdir , "org" , " openml" , "test" , "datasets" , str (did )),
1471+ os .path .join (openml . config . get_cache_directory () , "datasets" , str (did )),
14781472 )
14791473
14801474 @pytest .mark .test_server ()
@@ -1740,7 +1734,6 @@ def test_delete_dataset(self):
17401734
17411735@mock .patch .object (requests .Session , "delete" )
17421736def test_delete_dataset_not_owned (mock_delete , test_files_directory , test_api_key ):
1743- openml .config .start_using_configuration_for_example ()
17441737 content_file = (
17451738 test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17461739 )
@@ -1755,14 +1748,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17551748 ):
17561749 openml .datasets .delete_dataset (40_000 )
17571750
1758- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1751+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
17591752 assert dataset_url == mock_delete .call_args .args [0 ]
17601753 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
17611754
17621755
17631756@mock .patch .object (requests .Session , "delete" )
17641757def test_delete_dataset_with_run (mock_delete , test_files_directory , test_api_key ):
1765- openml .config .start_using_configuration_for_example ()
17661758 content_file = (
17671759 test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17681760 )
@@ -1777,14 +1769,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17771769 ):
17781770 openml .datasets .delete_dataset (40_000 )
17791771
1780- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1772+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
17811773 assert dataset_url == mock_delete .call_args .args [0 ]
17821774 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
17831775
17841776
17851777@mock .patch .object (requests .Session , "delete" )
17861778def test_delete_dataset_success (mock_delete , test_files_directory , test_api_key ):
1787- openml .config .start_using_configuration_for_example ()
17881779 content_file = (
17891780 test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
17901781 )
@@ -1796,14 +1787,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
17961787 success = openml .datasets .delete_dataset (40000 )
17971788 assert success
17981789
1799- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1790+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
18001791 assert dataset_url == mock_delete .call_args .args [0 ]
18011792 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
18021793
18031794
18041795@mock .patch .object (requests .Session , "delete" )
18051796def test_delete_unknown_dataset (mock_delete , test_files_directory , test_api_key ):
1806- openml .config .start_using_configuration_for_example ()
18071797 content_file = (
18081798 test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18091799 )
@@ -1818,7 +1808,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
18181808 ):
18191809 openml .datasets .delete_dataset (9_999_999 )
18201810
1821- dataset_url = "https://test. openml.org /api/v1/xml/data/9999999"
1811+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/9999999"
18221812 assert dataset_url == mock_delete .call_args .args [0 ]
18231813 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
18241814
@@ -1913,9 +1903,8 @@ def _dataset_features_is_downloaded(did: int):
19131903
19141904
19151905def _dataset_data_file_is_downloaded (did : int ):
1916- parquet_present = _dataset_file_is_downloaded (did , "dataset.pq" )
1917- arff_present = _dataset_file_is_downloaded (did , "dataset.arff" )
1918- return parquet_present or arff_present
1906+ cache_directory = Path (openml .config .get_cache_directory ()) / "datasets" / str (did )
1907+ return any (f .suffix in (".pq" , ".arff" ) for f in cache_directory .iterdir ())
19191908
19201909
19211910def _assert_datasets_retrieved_successfully (
@@ -2020,7 +2009,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
20202009 test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
20212010 )
20222011 # While the mocked example is from production, unit tests by default connect to the test server.
2023- requests_mock .get ("https://test. openml.org /api/v1/xml/data/61" , text = content_file .read_text ())
2012+ requests_mock .get (f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/61" , text = content_file .read_text ())
20242013 dataset = openml .datasets .get_dataset (61 , download_data = True )
20252014 assert dataset ._parquet_url is not None
20262015 assert dataset .parquet_file is not None
0 commit comments