Skip to content

Commit c07d2cf

Browse files
authored
Merge branch 'main' into issue-1578
2 parents 14802db + 1bc9f15 commit c07d2cf

14 files changed

Lines changed: 83 additions & 76 deletions

File tree

openml/_api_calls.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from . import config
2323
from .__version__ import __version__
2424
from .exceptions import (
25+
OpenMLAuthenticationError,
2526
OpenMLHashException,
26-
OpenMLNotAuthorizedError,
2727
OpenMLServerError,
2828
OpenMLServerException,
2929
OpenMLServerNoResult,
@@ -515,11 +515,7 @@ def __parse_server_exception(
515515
400, # run/42 delete
516516
460, # task/42 delete
517517
]:
518-
msg = (
519-
f"The API call {url} requires authentication via an API key.\nPlease configure "
520-
"OpenML-Python to use your API as described in this example:"
521-
"\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
522-
)
523-
return OpenMLNotAuthorizedError(message=msg)
518+
msg = f"The API call {url} requires authentication via an API key."
519+
return OpenMLAuthenticationError(message=msg)
524520

525521
return OpenMLServerException(code=code, message=full_message, url=url)

openml/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def check_server(server: str) -> str:
109109

110110
def replace_shorthand(server: str) -> str:
111111
if server == "test":
112-
return "https://test.openml.org/api/v1/xml"
112+
return f"{config.TEST_SERVER_URL}/api/v1/xml"
113113
if server == "production_server":
114114
return "https://www.openml.org/api/v1/xml"
115115
return server

openml/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
2929
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
3030

31+
TEST_SERVER_URL = "https://test.openml.org"
32+
3133

3234
class _Config(TypedDict):
3335
apikey: str
@@ -214,7 +216,7 @@ class ConfigurationForExamples:
214216
_last_used_server = None
215217
_last_used_key = None
216218
_start_last_called = False
217-
_test_server = "https://test.openml.org/api/v1/xml"
219+
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
218220
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY
219221

220222
@classmethod
@@ -470,7 +472,8 @@ def get_cache_directory() -> str:
470472
471473
"""
472474
url_suffix = urlparse(server).netloc
473-
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118
475+
url_parts = url_suffix.replace(":", "_").split(".")[::-1]
476+
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
474477
return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118
475478

476479

openml/exceptions.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,28 @@ class OpenMLNotAuthorizedError(OpenMLServerError):
6363
"""Indicates an authenticated user is not authorized to execute the requested action."""
6464

6565

66+
class OpenMLAuthenticationError(OpenMLServerError):
67+
"""Exception raised when API authentication fails.
68+
69+
This typically occurs when:
70+
- No API key is configured
71+
- The API key is invalid or expired
72+
- The API key format is incorrect
73+
74+
This is different from authorization (OpenMLNotAuthorizedError), which occurs
75+
when a valid API key lacks permissions for the requested operation.
76+
"""
77+
78+
def __init__(self, message: str):
79+
help_text = (
80+
"\n\nTo fix this:\n"
81+
"1. Get your API key from https://www.openml.org/\n"
82+
" (you'll need to register for a free account if you don't have one)\n"
83+
"2. Configure your API key by following the authentication guide:\n"
84+
" https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
85+
)
86+
super().__init__(message + help_text)
87+
88+
6689
class ObjectNotPublishedError(PyOpenMLError):
6790
"""Indicates an object has not been published yet."""

openml/tasks/functions.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,10 @@ def get_task(
415415
if not isinstance(task_id, int):
416416
raise TypeError(f"Task id should be integer, is {type(task_id)}")
417417

418-
cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419-
tid_cache_dir = cache_key_dir / str(task_id)
420-
tid_cache_dir_existed = tid_cache_dir.exists()
418+
task_cache_directory = openml.utils._create_cache_directory_for_id(
419+
TASKS_CACHE_DIR_NAME, task_id
420+
)
421+
task_cache_directory_existed = task_cache_directory.exists()
421422
try:
422423
task = _get_task_description(task_id)
423424
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -434,8 +435,8 @@ def get_task(
434435
if download_splits and isinstance(task, OpenMLSupervisedTask):
435436
task.download_split()
436437
except Exception as e:
437-
if not tid_cache_dir_existed:
438-
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
438+
if not task_cache_directory_existed:
439+
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
439440
raise e
440441

441442
return task

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
4747
"user": [],
4848
}
4949
flow_name_tracker: ClassVar[list[str]] = []
50-
test_server = "https://test.openml.org/api/v1/xml"
50+
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
5151
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
5252
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
5353

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def with_server(request):
277277
openml.config.apikey = None
278278
yield
279279
return
280-
openml.config.server = "https://test.openml.org/api/v1/xml"
280+
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
281281
openml.config.apikey = TestBase.user_key
282282
yield
283283

tests/files/localhost_8000

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org/openml/test

tests/test_datasets/test_dataset_functions.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -527,27 +527,20 @@ def test_deletion_of_cache_dir(self):
527527
def test_deletion_of_cache_dir_faulty_download(self, patch):
528528
patch.side_effect = Exception("Boom!")
529529
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
530-
datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
530+
datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
531531
assert len(os.listdir(datasets_cache_dir)) == 0
532532

533533
@pytest.mark.test_server()
534534
def test_publish_dataset(self):
535-
# lazy loading not possible as we need the arff-file.
536-
openml.datasets.get_dataset(3, download_data=True)
537-
file_path = os.path.join(
538-
openml.config.get_cache_directory(),
539-
"datasets",
540-
"3",
541-
"dataset.arff",
542-
)
535+
arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
543536
dataset = OpenMLDataset(
544537
"anneal",
545538
"test",
546539
data_format="arff",
547540
version=1,
548541
licence="public",
549542
default_target_attribute="class",
550-
data_file=file_path,
543+
data_file=arff_file_path,
551544
)
552545
dataset.publish()
553546
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
@@ -890,7 +883,7 @@ def test_create_invalid_dataset(self):
890883

891884
@pytest.mark.test_server()
892885
def test_get_online_dataset_arff(self):
893-
dataset_id = 100 # Australian
886+
dataset_id = 128 # iris -- one of the few datasets without parquet file
894887
# lazy loading not used as arff file is checked.
895888
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
896889
decoder = arff.ArffDecoder()
@@ -1468,8 +1461,9 @@ def test_data_edit_critical_field(self):
14681461
raise e
14691462
time.sleep(10)
14701463
# Delete the cache dir to get the newer version of the dataset
1464+
14711465
shutil.rmtree(
1472-
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
1466+
os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
14731467
)
14741468

14751469
@pytest.mark.test_server()
@@ -1734,7 +1728,6 @@ def test_delete_dataset(self):
17341728

17351729
@mock.patch.object(requests.Session, "delete")
17361730
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
1737-
openml.config.start_using_configuration_for_example()
17381731
content_file = (
17391732
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17401733
)
@@ -1749,14 +1742,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17491742
):
17501743
openml.datasets.delete_dataset(40_000)
17511744

1752-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1745+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17531746
assert dataset_url == mock_delete.call_args.args[0]
17541747
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17551748

17561749

17571750
@mock.patch.object(requests.Session, "delete")
17581751
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
1759-
openml.config.start_using_configuration_for_example()
17601752
content_file = (
17611753
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17621754
)
@@ -1771,14 +1763,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17711763
):
17721764
openml.datasets.delete_dataset(40_000)
17731765

1774-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1766+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17751767
assert dataset_url == mock_delete.call_args.args[0]
17761768
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17771769

17781770

17791771
@mock.patch.object(requests.Session, "delete")
17801772
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
1781-
openml.config.start_using_configuration_for_example()
17821773
content_file = (
17831774
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
17841775
)
@@ -1790,14 +1781,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
17901781
success = openml.datasets.delete_dataset(40000)
17911782
assert success
17921783

1793-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1784+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17941785
assert dataset_url == mock_delete.call_args.args[0]
17951786
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17961787

17971788

17981789
@mock.patch.object(requests.Session, "delete")
17991790
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
1800-
openml.config.start_using_configuration_for_example()
18011791
content_file = (
18021792
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18031793
)
@@ -1812,7 +1802,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
18121802
):
18131803
openml.datasets.delete_dataset(9_999_999)
18141804

1815-
dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
1805+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
18161806
assert dataset_url == mock_delete.call_args.args[0]
18171807
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18181808

@@ -1907,9 +1897,8 @@ def _dataset_features_is_downloaded(did: int):
19071897

19081898

19091899
def _dataset_data_file_is_downloaded(did: int):
1910-
parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
1911-
arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
1912-
return parquet_present or arff_present
1900+
cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
1901+
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())
19131902

19141903

19151904
def _assert_datasets_retrieved_successfully(
@@ -2014,7 +2003,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
20142003
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
20152004
)
20162005
# While the mocked example is from production, unit tests by default connect to the test server.
2017-
requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
2006+
requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
20182007
dataset = openml.datasets.get_dataset(61, download_data=True)
20192008
assert dataset._parquet_url is not None
20202009
assert dataset.parquet_file is not None

tests/test_flows/test_flow_functions.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,6 @@ def test_delete_flow(self):
453453

454454
@mock.patch.object(requests.Session, "delete")
455455
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
456-
openml.config.start_using_configuration_for_example()
457456
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
458457
mock_delete.return_value = create_request_response(
459458
status_code=412,
@@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
466465
):
467466
openml.flows.delete_flow(40_000)
468467

469-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
468+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
470469
assert flow_url == mock_delete.call_args.args[0]
471470
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
472471

473472

474473
@mock.patch.object(requests.Session, "delete")
475474
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
476-
openml.config.start_using_configuration_for_example()
477475
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
478476
mock_delete.return_value = create_request_response(
479477
status_code=412,
@@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
486484
):
487485
openml.flows.delete_flow(40_000)
488486

489-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
487+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
490488
assert flow_url == mock_delete.call_args.args[0]
491489
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
492490

493491

494492
@mock.patch.object(requests.Session, "delete")
495493
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
496-
openml.config.start_using_configuration_for_example()
497494
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
498495
mock_delete.return_value = create_request_response(
499496
status_code=412,
@@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
506503
):
507504
openml.flows.delete_flow(40_000)
508505

509-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
506+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
510507
assert flow_url == mock_delete.call_args.args[0]
511508
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
512509

513510

514511
@mock.patch.object(requests.Session, "delete")
515512
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
516-
openml.config.start_using_configuration_for_example()
517513
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
518514
mock_delete.return_value = create_request_response(
519515
status_code=200,
@@ -523,15 +519,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
523519
success = openml.flows.delete_flow(33364)
524520
assert success
525521

526-
flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
522+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
527523
assert flow_url == mock_delete.call_args.args[0]
528524
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
529525

530526

531527
@mock.patch.object(requests.Session, "delete")
532528
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
533529
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
534-
openml.config.start_using_configuration_for_example()
535530
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
536531
mock_delete.return_value = create_request_response(
537532
status_code=412,
@@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
544539
):
545540
openml.flows.delete_flow(9_999_999)
546541

547-
flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
542+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
548543
assert flow_url == mock_delete.call_args.args[0]
549544
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

0 commit comments

Comments
 (0)