Skip to content

Commit ffa9ce9

Browse files
authored
Merge branch 'main' into migration
2 parents 1c922af + 1bc9f15 commit ffa9ce9

14 files changed

Lines changed: 83 additions & 76 deletions

File tree

openml/_api_calls.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from . import config
2323
from .__version__ import __version__
2424
from .exceptions import (
25+
OpenMLAuthenticationError,
2526
OpenMLHashException,
26-
OpenMLNotAuthorizedError,
2727
OpenMLServerError,
2828
OpenMLServerException,
2929
OpenMLServerNoResult,
@@ -515,11 +515,7 @@ def __parse_server_exception(
515515
400, # run/42 delete
516516
460, # task/42 delete
517517
]:
518-
msg = (
519-
f"The API call {url} requires authentication via an API key.\nPlease configure "
520-
"OpenML-Python to use your API as described in this example:"
521-
"\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
522-
)
523-
return OpenMLNotAuthorizedError(message=msg)
518+
msg = f"The API call {url} requires authentication via an API key."
519+
return OpenMLAuthenticationError(message=msg)
524520

525521
return OpenMLServerException(code=code, message=full_message, url=url)

openml/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def check_server(server: str) -> str:
109109

110110
def replace_shorthand(server: str) -> str:
111111
if server == "test":
112-
return "https://test.openml.org/api/v1/xml"
112+
return f"{config.TEST_SERVER_URL}/api/v1/xml"
113113
if server == "production_server":
114114
return "https://www.openml.org/api/v1/xml"
115115
return server

openml/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
3131
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
3232

33+
TEST_SERVER_URL = "https://test.openml.org"
34+
3335

3436
class _Config(TypedDict):
3537
apikey: str
@@ -218,7 +220,7 @@ class ConfigurationForExamples:
218220
_last_used_server = None
219221
_last_used_key = None
220222
_start_last_called = False
221-
_test_server = "https://test.openml.org/api/v1/xml"
223+
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
222224
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY
223225

224226
@classmethod
@@ -482,7 +484,8 @@ def get_cache_directory() -> str:
482484
483485
"""
484486
url_suffix = urlparse(server).netloc
485-
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118
487+
url_parts = url_suffix.replace(":", "_").split(".")[::-1]
488+
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
486489
return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118
487490

488491

openml/exceptions.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,29 @@ class OpenMLNotAuthorizedError(OpenMLServerError):
6363
"""Indicates an authenticated user is not authorized to execute the requested action."""
6464

6565

66+
class OpenMLAuthenticationError(OpenMLServerError):
67+
"""Exception raised when API authentication fails.
68+
69+
This typically occurs when:
70+
- No API key is configured
71+
- The API key is invalid or expired
72+
- The API key format is incorrect
73+
74+
This is different from authorization (OpenMLNotAuthorizedError), which occurs
75+
when a valid API key lacks permissions for the requested operation.
76+
"""
77+
78+
def __init__(self, message: str):
79+
help_text = (
80+
"\n\nTo fix this:\n"
81+
"1. Get your API key from https://www.openml.org/\n"
82+
" (you'll need to register for a free account if you don't have one)\n"
83+
"2. Configure your API key by following the authentication guide:\n"
84+
" https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
85+
)
86+
super().__init__(message + help_text)
87+
88+
6689
class ObjectNotPublishedError(PyOpenMLError):
6790
"""Indicates an object has not been published yet."""
6891

openml/tasks/functions.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,10 @@ def get_task(
415415
if not isinstance(task_id, int):
416416
raise TypeError(f"Task id should be integer, is {type(task_id)}")
417417

418-
cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419-
tid_cache_dir = cache_key_dir / str(task_id)
420-
tid_cache_dir_existed = tid_cache_dir.exists()
418+
task_cache_directory = openml.utils._create_cache_directory_for_id(
419+
TASKS_CACHE_DIR_NAME, task_id
420+
)
421+
task_cache_directory_existed = task_cache_directory.exists()
421422
try:
422423
task = _get_task_description(task_id)
423424
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -431,8 +432,8 @@ def get_task(
431432
if download_splits and isinstance(task, OpenMLSupervisedTask):
432433
task.download_split()
433434
except Exception as e:
434-
if not tid_cache_dir_existed:
435-
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
435+
if not task_cache_directory_existed:
436+
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
436437
raise e
437438

438439
return task

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TestBase(unittest.TestCase):
4949
"user": [],
5050
}
5151
flow_name_tracker: ClassVar[list[str]] = []
52-
test_server = "https://test.openml.org/api/v1/xml"
52+
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
5353
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
5454
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
5555

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def with_server(request):
279279
openml.config._sync_api_config()
280280
yield
281281
return
282-
openml.config.server = "https://test.openml.org/api/v1/xml"
282+
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
283283
openml.config.apikey = TestBase.user_key
284284
openml.config._sync_api_config()
285285
yield

tests/files/localhost_8000

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org/openml/test

tests/test_datasets/test_dataset_functions.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -531,27 +531,20 @@ def test_deletion_of_cache_dir(self):
531531
def test_deletion_of_cache_dir_faulty_download(self, patch):
532532
patch.side_effect = Exception("Boom!")
533533
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
534-
datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
534+
datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
535535
assert len(os.listdir(datasets_cache_dir)) == 0
536536

537537
@pytest.mark.test_server()
538538
def test_publish_dataset(self):
539-
# lazy loading not possible as we need the arff-file.
540-
openml.datasets.get_dataset(3, download_data=True)
541-
file_path = os.path.join(
542-
openml.config.get_cache_directory(),
543-
"datasets",
544-
"3",
545-
"dataset.arff",
546-
)
539+
arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
547540
dataset = OpenMLDataset(
548541
"anneal",
549542
"test",
550543
data_format="arff",
551544
version=1,
552545
licence="public",
553546
default_target_attribute="class",
554-
data_file=file_path,
547+
data_file=arff_file_path,
555548
)
556549
dataset.publish()
557550
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
@@ -895,7 +888,7 @@ def test_create_invalid_dataset(self):
895888

896889
@pytest.mark.test_server()
897890
def test_get_online_dataset_arff(self):
898-
dataset_id = 100 # Australian
891+
dataset_id = 128 # iris -- one of the few datasets without parquet file
899892
# lazy loading not used as arff file is checked.
900893
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
901894
decoder = arff.ArffDecoder()
@@ -1473,8 +1466,9 @@ def test_data_edit_critical_field(self):
14731466
raise e
14741467
time.sleep(10)
14751468
# Delete the cache dir to get the newer version of the dataset
1469+
14761470
shutil.rmtree(
1477-
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
1471+
os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
14781472
)
14791473

14801474
@pytest.mark.test_server()
@@ -1740,7 +1734,6 @@ def test_delete_dataset(self):
17401734

17411735
@mock.patch.object(requests.Session, "delete")
17421736
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
1743-
openml.config.start_using_configuration_for_example()
17441737
content_file = (
17451738
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17461739
)
@@ -1755,14 +1748,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17551748
):
17561749
openml.datasets.delete_dataset(40_000)
17571750

1758-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1751+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17591752
assert dataset_url == mock_delete.call_args.args[0]
17601753
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17611754

17621755

17631756
@mock.patch.object(requests.Session, "delete")
17641757
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
1765-
openml.config.start_using_configuration_for_example()
17661758
content_file = (
17671759
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17681760
)
@@ -1777,14 +1769,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17771769
):
17781770
openml.datasets.delete_dataset(40_000)
17791771

1780-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1772+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17811773
assert dataset_url == mock_delete.call_args.args[0]
17821774
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17831775

17841776

17851777
@mock.patch.object(requests.Session, "delete")
17861778
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
1787-
openml.config.start_using_configuration_for_example()
17881779
content_file = (
17891780
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
17901781
)
@@ -1796,14 +1787,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
17961787
success = openml.datasets.delete_dataset(40000)
17971788
assert success
17981789

1799-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1790+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
18001791
assert dataset_url == mock_delete.call_args.args[0]
18011792
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18021793

18031794

18041795
@mock.patch.object(requests.Session, "delete")
18051796
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
1806-
openml.config.start_using_configuration_for_example()
18071797
content_file = (
18081798
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18091799
)
@@ -1818,7 +1808,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
18181808
):
18191809
openml.datasets.delete_dataset(9_999_999)
18201810

1821-
dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
1811+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
18221812
assert dataset_url == mock_delete.call_args.args[0]
18231813
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18241814

@@ -1913,9 +1903,8 @@ def _dataset_features_is_downloaded(did: int):
19131903

19141904

19151905
def _dataset_data_file_is_downloaded(did: int):
1916-
parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
1917-
arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
1918-
return parquet_present or arff_present
1906+
cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
1907+
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())
19191908

19201909

19211910
def _assert_datasets_retrieved_successfully(
@@ -2020,7 +2009,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
20202009
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
20212010
)
20222011
# While the mocked example is from production, unit tests by default connect to the test server.
2023-
requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
2012+
requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
20242013
dataset = openml.datasets.get_dataset(61, download_data=True)
20252014
assert dataset._parquet_url is not None
20262015
assert dataset.parquet_file is not None

tests/test_flows/test_flow_functions.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,6 @@ def test_delete_flow(self):
453453

454454
@mock.patch.object(requests.Session, "delete")
455455
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
456-
openml.config.start_using_configuration_for_example()
457456
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
458457
mock_delete.return_value = create_request_response(
459458
status_code=412,
@@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
466465
):
467466
openml.flows.delete_flow(40_000)
468467

469-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
468+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
470469
assert flow_url == mock_delete.call_args.args[0]
471470
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
472471

473472

474473
@mock.patch.object(requests.Session, "delete")
475474
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
476-
openml.config.start_using_configuration_for_example()
477475
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
478476
mock_delete.return_value = create_request_response(
479477
status_code=412,
@@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
486484
):
487485
openml.flows.delete_flow(40_000)
488486

489-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
487+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
490488
assert flow_url == mock_delete.call_args.args[0]
491489
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
492490

493491

494492
@mock.patch.object(requests.Session, "delete")
495493
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
496-
openml.config.start_using_configuration_for_example()
497494
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
498495
mock_delete.return_value = create_request_response(
499496
status_code=412,
@@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
506503
):
507504
openml.flows.delete_flow(40_000)
508505

509-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
506+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
510507
assert flow_url == mock_delete.call_args.args[0]
511508
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
512509

513510

514511
@mock.patch.object(requests.Session, "delete")
515512
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
516-
openml.config.start_using_configuration_for_example()
517513
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
518514
mock_delete.return_value = create_request_response(
519515
status_code=200,
@@ -523,15 +519,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
523519
success = openml.flows.delete_flow(33364)
524520
assert success
525521

526-
flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
522+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
527523
assert flow_url == mock_delete.call_args.args[0]
528524
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
529525

530526

531527
@mock.patch.object(requests.Session, "delete")
532528
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
533529
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
534-
openml.config.start_using_configuration_for_example()
535530
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
536531
mock_delete.return_value = create_request_response(
537532
status_code=412,
@@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
544539
):
545540
openml.flows.delete_flow(9_999_999)
546541

547-
flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
542+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
548543
assert flow_url == mock_delete.call_args.args[0]
549544
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

0 commit comments

Comments
 (0)