Skip to content

Commit 6fa7e93

Browse files
authored
Merge branch 'main' into fix/race-condition-test-split
2 parents 8910a81 + 1bc9f15 commit 6fa7e93

31 files changed

Lines changed: 335 additions & 328 deletions

.github/workflows/test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ jobs:
114114
fi
115115
116116
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
117-
marks="sklearn and not production and not uses_test_server"
117+
marks="sklearn and not production_server and not test_server"
118118
else
119-
marks="not production and not uses_test_server"
119+
marks="not production_server and not test_server"
120120
fi
121121
122122
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -131,9 +131,9 @@ jobs:
131131
fi
132132
133133
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
134-
marks="sklearn and production and not uses_test_server"
134+
marks="sklearn and production_server and not test_server"
135135
else
136-
marks="production and not uses_test_server"
136+
marks="production_server and not test_server"
137137
fi
138138
139139
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -143,7 +143,7 @@ jobs:
143143
env:
144144
OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
145145
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
146-
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
146+
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
147147
148148
- name: Check for files left behind by test
149149
if: matrix.os != 'windows-latest' && always()

docs/developer_setup.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ The OpenML Python SDK utilizes `pytest` markers to categorize tests based on dep
170170
| Marker | Description |
171171
|-------------------|-----------------------------------------------------------------------------|
172172
| `sklearn` | Tests requiring `scikit-learn`. Skipped if the library is missing. |
173-
| `production` | Tests that interact with the live OpenML server (real API calls). |
174-
| `uses_test_server` | Tests requiring the OpenML test server environment. |
173+
| `production_server`| Tests that interact with the live OpenML server (real API calls). |
174+
| `test_server` | Tests requiring the OpenML test server environment. |
175175

176176
### Execution Examples
177177

@@ -190,7 +190,7 @@ pytest -m sklearn
190190
Exclude production tests (local only):
191191

192192
```bash
193-
pytest -m "not production"
193+
pytest -m "not production_server"
194194
```
195195

196196
### Admin Privilege Tests

openml/_api_calls.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from . import config
2323
from .__version__ import __version__
2424
from .exceptions import (
25+
OpenMLAuthenticationError,
2526
OpenMLHashException,
26-
OpenMLNotAuthorizedError,
2727
OpenMLServerError,
2828
OpenMLServerException,
2929
OpenMLServerNoResult,
@@ -515,11 +515,7 @@ def __parse_server_exception(
515515
400, # run/42 delete
516516
460, # task/42 delete
517517
]:
518-
msg = (
519-
f"The API call {url} requires authentication via an API key.\nPlease configure "
520-
"OpenML-Python to use your API as described in this example:"
521-
"\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
522-
)
523-
return OpenMLNotAuthorizedError(message=msg)
518+
msg = f"The API call {url} requires authentication via an API key."
519+
return OpenMLAuthenticationError(message=msg)
524520

525521
return OpenMLServerException(code=code, message=full_message, url=url)

openml/cli.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,15 @@ def check_apikey(apikey: str) -> str:
102102

103103
def configure_server(value: str) -> None:
104104
def check_server(server: str) -> str:
105-
is_shorthand = server in ["test", "production"]
105+
is_shorthand = server in ["test", "production_server"]
106106
if is_shorthand or looks_like_url(server):
107107
return ""
108-
return "Must be 'test', 'production' or a url."
108+
return "Must be 'test', 'production_server' or a url."
109109

110110
def replace_shorthand(server: str) -> str:
111111
if server == "test":
112-
return "https://test.openml.org/api/v1/xml"
113-
if server == "production":
112+
return f"{config.TEST_SERVER_URL}/api/v1/xml"
113+
if server == "production_server":
114114
return "https://www.openml.org/api/v1/xml"
115115
return server
116116

@@ -119,7 +119,7 @@ def replace_shorthand(server: str) -> str:
119119
value=value,
120120
check_with_message=check_server,
121121
intro_message="Specify which server you wish to connect to.",
122-
input_message="Specify a url or use 'test' or 'production' as a shorthand: ",
122+
input_message="Specify a url or use 'test' or 'production_server' as a shorthand: ",
123123
sanitize=replace_shorthand,
124124
)
125125

openml/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
2929
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
3030

31+
TEST_SERVER_URL = "https://test.openml.org"
32+
3133

3234
class _Config(TypedDict):
3335
apikey: str
@@ -214,7 +216,7 @@ class ConfigurationForExamples:
214216
_last_used_server = None
215217
_last_used_key = None
216218
_start_last_called = False
217-
_test_server = "https://test.openml.org/api/v1/xml"
219+
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
218220
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY
219221

220222
@classmethod
@@ -470,7 +472,8 @@ def get_cache_directory() -> str:
470472
471473
"""
472474
url_suffix = urlparse(server).netloc
473-
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118
475+
url_parts = url_suffix.replace(":", "_").split(".")[::-1]
476+
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
474477
return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118
475478

476479

openml/exceptions.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,28 @@ class OpenMLNotAuthorizedError(OpenMLServerError):
6363
"""Indicates an authenticated user is not authorized to execute the requested action."""
6464

6565

66+
class OpenMLAuthenticationError(OpenMLServerError):
67+
"""Exception raised when API authentication fails.
68+
69+
This typically occurs when:
70+
- No API key is configured
71+
- The API key is invalid or expired
72+
- The API key format is incorrect
73+
74+
This is different from authorization (OpenMLNotAuthorizedError), which occurs
75+
when a valid API key lacks permissions for the requested operation.
76+
"""
77+
78+
def __init__(self, message: str):
79+
help_text = (
80+
"\n\nTo fix this:\n"
81+
"1. Get your API key from https://www.openml.org/\n"
82+
" (you'll need to register for a free account if you don't have one)\n"
83+
"2. Configure your API key by following the authentication guide:\n"
84+
" https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
85+
)
86+
super().__init__(message + help_text)
87+
88+
6689
class ObjectNotPublishedError(PyOpenMLError):
6790
"""Indicates an object has not been published yet."""

openml/tasks/functions.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,10 @@ def get_task(
415415
if not isinstance(task_id, int):
416416
raise TypeError(f"Task id should be integer, is {type(task_id)}")
417417

418-
cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419-
tid_cache_dir = cache_key_dir / str(task_id)
420-
tid_cache_dir_existed = tid_cache_dir.exists()
418+
task_cache_directory = openml.utils._create_cache_directory_for_id(
419+
TASKS_CACHE_DIR_NAME, task_id
420+
)
421+
task_cache_directory_existed = task_cache_directory.exists()
421422
try:
422423
task = _get_task_description(task_id)
423424
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -431,8 +432,8 @@ def get_task(
431432
if download_splits and isinstance(task, OpenMLSupervisedTask):
432433
task.download_split()
433434
except Exception as e:
434-
if not tid_cache_dir_existed:
435-
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
435+
if not task_cache_directory_existed:
436+
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
436437
raise e
437438

438439
return task

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
4747
"user": [],
4848
}
4949
flow_name_tracker: ClassVar[list[str]] = []
50-
test_server = "https://test.openml.org/api/v1/xml"
50+
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
5151
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
5252
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
5353

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,10 @@ filterwarnings=[
133133
"ignore:the matrix subclass:PendingDeprecationWarning"
134134
]
135135
markers = [
136-
"server: anything that connects to a server",
137136
"upload: anything that uploads to a server",
138-
"production: any interaction with the production server",
137+
"production_server: any interaction with the production server",
139138
"cache: anything that interacts with the (test) cache",
139+
"test_server: tests that require the OpenML test server",
140140
]
141141

142142
# https://github.com/charliermarsh/ruff

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,12 @@ def as_robot() -> Iterator[None]:
272272

273273
@pytest.fixture(autouse=True)
274274
def with_server(request):
275-
if "production" in request.keywords:
275+
if "production_server" in request.keywords:
276276
openml.config.server = "https://www.openml.org/api/v1/xml"
277277
openml.config.apikey = None
278278
yield
279279
return
280-
openml.config.server = "https://test.openml.org/api/v1/xml"
280+
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
281281
openml.config.apikey = TestBase.user_key
282282
yield
283283

0 commit comments

Comments
 (0)