Skip to content

Commit 18e85de

Browse files
committed
Merge commit pull/1576 into dataset_resource
2 parents 1c2fa99 + 74ab366 commit 18e85de

29 files changed

Lines changed: 421 additions & 167 deletions

.github/workflows/dist.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Setup Python
2828
uses: actions/setup-python@v5
2929
with:
30-
python-version: 3.8
30+
python-version: "3.10"
3131
- name: Build dist
3232
run: |
3333
pip install build

.github/workflows/docs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- name: Setup Python
2929
uses: actions/setup-python@v5
3030
with:
31-
python-version: 3.8
31+
python-version: "3.10"
3232
- name: Install dependencies
3333
run: |
3434
pip install -e .[docs,examples]

.github/workflows/test.yml

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
---
12
name: Tests
23

34
on:
@@ -21,13 +22,13 @@ concurrency:
2122

2223
jobs:
2324
test:
24-
name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
25+
name: (${{ matrix.os }},Py${{ matrix.python-version }},sk${{ matrix.scikit-learn }},sk-only:${{ matrix.sklearn-only }})
2526
runs-on: ${{ matrix.os }}
2627

2728
strategy:
2829
fail-fast: false
2930
matrix:
30-
python-version: ["3.10", "3.11", "3.12", "3.13"]
31+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
3132
scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
3233
os: [ubuntu-latest]
3334
sklearn-only: ["true"]
@@ -38,8 +39,18 @@ jobs:
3839
scikit-learn: "1.3.*"
3940
- python-version: "3.13"
4041
scikit-learn: "1.4.*"
42+
- python-version: "3.14"
43+
scikit-learn: "1.3.*"
44+
- python-version: "3.14"
45+
scikit-learn: "1.4.*"
4146

4247
include:
48+
# Full test run on ubuntu, 3.14
49+
- os: ubuntu-latest
50+
python-version: "3.14"
51+
scikit-learn: "1.7.*"
52+
sklearn-only: "false"
53+
4354
# Full test run on Windows
4455
- os: windows-latest
4556
python-version: "3.12"
@@ -87,9 +98,9 @@ jobs:
8798
fi
8899
89100
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
90-
marks="sklearn and not production"
101+
marks="sklearn and not production and not uses_test_server"
91102
else
92-
marks="not production"
103+
marks="not production and not uses_test_server"
93104
fi
94105
95106
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -102,17 +113,17 @@ jobs:
102113
fi
103114
104115
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
105-
marks="sklearn and production"
116+
marks="sklearn and production and not uses_test_server"
106117
else
107-
marks="production"
118+
marks="production and not uses_test_server"
108119
fi
109120
110121
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
111122
112123
- name: Run tests on Windows
113124
if: matrix.os == 'windows-latest'
114125
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
115-
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
126+
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"
116127
117128
- name: Check for files left behind by test
118129
if: matrix.os != 'windows-latest' && always()

.gitignore

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,17 @@ dmypy.sock
9898

9999
# Tests
100100
.pytest_cache
101+
102+
# Virtual environments
103+
oenv/
104+
venv/
105+
.env/
101106
.venv
107+
.venv/
108+
109+
# Python cache
110+
__pycache__/
111+
*.pyc
102112

103113
# Ruff
104-
.ruff-cache/
114+
.ruff-cache/

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
## The Python API for a World of Data and More :dizzy:
1616

1717
[![Latest Release](https://img.shields.io/github/v/release/openml/openml-python)](https://github.com/openml/openml-python/releases)
18-
[![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/openml/)
18+
[![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue)](https://pypi.org/project/openml/)
1919
[![Downloads](https://static.pepy.tech/badge/openml)](https://pepy.tech/project/openml)
2020
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
2121
<!-- Add green badges for CI and precommit -->
@@ -60,7 +60,7 @@ for task_id in suite.tasks:
6060

6161
## :magic_wand: Installation
6262

63-
OpenML-Python is supported on Python 3.8 - 3.13 and is available on Linux, MacOS, and Windows.
63+
OpenML-Python is supported on Python 3.10 - 3.14 and is available on Linux, MacOS, and Windows.
6464

6565
You can install OpenML-Python with:
6666

openml/_api/config.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,62 @@
11
from __future__ import annotations
22

3-
API_V1_SERVER = "https://www.openml.org/api/v1/xml"
4-
API_V2_SERVER = "http://127.0.0.1:8001"
5-
API_KEY = "..."
3+
from dataclasses import dataclass
4+
from typing import Literal
5+
6+
DelayMethod = Literal["human", "robot"]
7+
8+
9+
@dataclass
10+
class APIConfig:
11+
server: str
12+
base_url: str
13+
key: str
14+
timeout: int = 10 # seconds
15+
16+
17+
@dataclass
18+
class APISettings:
19+
v1: APIConfig
20+
v2: APIConfig
21+
22+
23+
@dataclass
24+
class ConnectionConfig:
25+
retries: int = 3
26+
delay_method: DelayMethod = "human"
27+
delay_time: int = 1 # seconds
28+
29+
def __post_init__(self) -> None:
30+
if self.delay_method not in ("human", "robot"):
31+
raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}")
32+
33+
34+
@dataclass
35+
class CacheConfig:
36+
dir: str = "~/.openml/cache"
37+
ttl: int = 60 * 60 * 24 * 7 # one week
38+
39+
40+
@dataclass
41+
class Settings:
42+
api: APISettings
43+
connection: ConnectionConfig
44+
cache: CacheConfig
45+
46+
47+
settings = Settings(
48+
api=APISettings(
49+
v1=APIConfig(
50+
server="https://www.openml.org/",
51+
base_url="api/v1/xml/",
52+
key="...",
53+
),
54+
v2=APIConfig(
55+
server="http://127.0.0.1:8001/",
56+
base_url="",
57+
key="...",
58+
),
59+
),
60+
connection=ConnectionConfig(),
61+
cache=CacheConfig(),
62+
)

openml/_api/http/client.py

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,151 @@
11
from __future__ import annotations
22

3-
from typing import Any, Mapping
3+
from pathlib import Path
4+
from typing import TYPE_CHECKING, Any
5+
from urllib.parse import urlencode, urljoin, urlparse
46

57
import requests
68
from requests import Response
79

810
from openml.__version__ import __version__
11+
from openml._api.config import settings
912

13+
if TYPE_CHECKING:
14+
from openml._api.config import APIConfig
1015

11-
class HTTPClient:
12-
def __init__(self, base_url: str) -> None:
13-
self.base_url = base_url
16+
17+
class CacheMixin:
18+
@property
19+
def dir(self) -> str:
20+
return settings.cache.dir
21+
22+
@property
23+
def ttl(self) -> int:
24+
return settings.cache.ttl
25+
26+
def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path:
27+
parsed_url = urlparse(url)
28+
netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain
29+
path_parts = parsed_url.path.strip("/").split("/")
30+
31+
# remove api_key and serialize params if any
32+
filtered_params = {k: v for k, v in params.items() if k != "api_key"}
33+
params_part = [urlencode(filtered_params)] if filtered_params else []
34+
35+
return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part)
36+
37+
def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002
38+
return Response()
39+
40+
def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002
41+
return None
42+
43+
44+
class HTTPClient(CacheMixin):
45+
def __init__(self, config: APIConfig) -> None:
46+
self.config = config
1447
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
1548

49+
@property
50+
def server(self) -> str:
51+
return self.config.server
52+
53+
@property
54+
def base_url(self) -> str:
55+
return self.config.base_url
56+
57+
@property
58+
def key(self) -> str:
59+
return self.config.key
60+
61+
@property
62+
def timeout(self) -> int:
63+
return self.config.timeout
64+
65+
def request(
66+
self,
67+
method: str,
68+
path: str,
69+
*,
70+
use_cache: bool = False,
71+
use_api_key: bool = False,
72+
**request_kwargs: Any,
73+
) -> Response:
74+
url = urljoin(self.server, urljoin(self.base_url, path))
75+
76+
params = request_kwargs.pop("params", {})
77+
params = params.copy()
78+
if use_api_key:
79+
params["api_key"] = self.key
80+
81+
headers = request_kwargs.pop("headers", {})
82+
headers = headers.copy()
83+
headers.update(self.headers)
84+
85+
timeout = request_kwargs.pop("timeout", self.timeout)
86+
cache_dir = self._get_cache_dir(url, params)
87+
88+
if use_cache:
89+
try:
90+
return self._get_cache_response(cache_dir)
91+
# TODO: handle ttl expired error
92+
except Exception:
93+
raise
94+
95+
response = requests.request(
96+
method=method,
97+
url=url,
98+
params=params,
99+
headers=headers,
100+
timeout=timeout,
101+
**request_kwargs,
102+
)
103+
104+
if use_cache:
105+
self._set_cache_response(cache_dir, response)
106+
107+
return response
108+
16109
def get(
17110
self,
18111
path: str,
19-
params: Mapping[str, Any] | None = None,
112+
*,
113+
use_cache: bool = False,
114+
use_api_key: bool = False,
115+
**request_kwargs: Any,
20116
) -> Response:
21-
url = f"{self.base_url}/{path}"
22-
return requests.get(url, params=params, headers=self.headers, timeout=10)
117+
# TODO: remove override when cache is implemented
118+
use_cache = False
119+
return self.request(
120+
method="GET",
121+
path=path,
122+
use_cache=use_cache,
123+
use_api_key=use_api_key,
124+
**request_kwargs,
125+
)
23126

24127
def post(
25128
self,
26129
path: str,
27-
data: Mapping[str, Any] | None = None,
28-
json: dict | None = None,
29-
files: Any = None,
130+
**request_kwargs: Any,
30131
) -> Response:
31-
url = f"{self.base_url}/{path}"
32-
return requests.post(
33-
url, data=data, json=json, files=files, headers=self.headers, timeout=10
132+
return self.request(
133+
method="POST",
134+
path=path,
135+
use_cache=False,
136+
use_api_key=True,
137+
**request_kwargs,
34138
)
35139

36140
def delete(
37141
self,
38142
path: str,
39-
params: Mapping[str, Any] | None = None,
143+
**request_kwargs: Any,
40144
) -> Response:
41-
url = f"{self.base_url}/{path}"
42-
return requests.delete(url, params=params, headers=self.headers, timeout=10)
145+
return self.request(
146+
method="DELETE",
147+
path=path,
148+
use_cache=False,
149+
use_api_key=True,
150+
**request_kwargs,
151+
)

openml/_api/runtime/core.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22

33
from typing import TYPE_CHECKING
44

5-
from openml._api.config import (
6-
API_V1_SERVER,
7-
API_V2_SERVER,
8-
)
5+
from openml._api.config import settings
96
from openml._api.http.client import HTTPClient
107
from openml._api.resources import (
118
DatasetsV1,
@@ -25,8 +22,8 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI):
2522

2623

2724
def build_backend(version: str, *, strict: bool) -> APIBackend:
28-
v1_http = HTTPClient(API_V1_SERVER)
29-
v2_http = HTTPClient(API_V2_SERVER)
25+
v1_http = HTTPClient(config=settings.api.v1)
26+
v2_http = HTTPClient(config=settings.api.v2)
3027

3128
v1 = APIBackend(
3229
datasets=DatasetsV1(v1_http),

0 commit comments

Comments
 (0)