Skip to content

Commit 2b2bab6

Browse files
committed
lockutils, minio path util
1 parent 68ea96c commit 2b2bab6

4 files changed

Lines changed: 26 additions & 7 deletions

File tree

openml/datasets/dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
import numpy as np
1616
import pandas as pd
1717
import scipy.sparse
18-
from filelock import FileLock
1918

2019
import openml
2120
from openml.base import OpenMLBase
21+
from openml.utils._openml import file_lock
2222

2323
from .data_feature import OpenMLDataFeature
2424

@@ -467,7 +467,7 @@ def _parse_data_from_arff( # noqa: C901, PLR0912, PLR0915
467467
List[str]: List of column names.
468468
"""
469469
lock_path = str(arff_file_path) + ".lock"
470-
with FileLock(lock_path):
470+
with file_lock(lock_path):
471471
try:
472472
data = self._get_arff(self.format)
473473
except OSError as e:
@@ -618,7 +618,7 @@ def _parse_data_from_file(
618618

619619
def _parse_data_from_pq(self, data_file: Path) -> tuple[list[str], list[bool], pd.DataFrame]:
620620
lock_path = str(data_file) + ".lock"
621-
with FileLock(lock_path):
621+
with file_lock(lock_path):
622622
try:
623623
data = pd.read_parquet(data_file)
624624
except Exception as e:
@@ -635,7 +635,7 @@ def _load_data(self) -> tuple[pd.DataFrame, list[bool], list[str]]: # noqa: PLR
635635
if need_to_create_pickle or need_to_create_feather:
636636
cache_file = self.data_pickle_file if need_to_create_pickle else self.data_feather_file
637637
lock_path = str(cache_file) + ".lock"
638-
with FileLock(lock_path):
638+
with file_lock(lock_path):
639639
if self.data_file is None:
640640
self._download_data()
641641

openml/utils/_openml.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
from __future__ import annotations
33

44
import contextlib
5+
import hashlib
56
import re
67
import shutil
78
import warnings
89
from abc import ABC, abstractmethod
9-
from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
10+
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Sized
1011
from functools import wraps
1112
from pathlib import Path
1213
from typing import (
@@ -454,6 +455,24 @@ def _create_lockfiles_dir() -> Path:
454455
return path
455456

456457

458+
@contextlib.contextmanager
459+
def file_lock(lock_path: str) -> Iterator[None]:
460+
"""Context manager that uses `oslo_concurrency.lockutils.external_lock`
461+
462+
The oslo-based locks are placed in the centralized cache `locks` folder
463+
returned by ``_create_lockfiles_dir()``. A deterministic name derived from
464+
the lock path is used to avoid collisions.
465+
"""
466+
with warnings.catch_warnings():
467+
warnings.simplefilter("ignore")
468+
from oslo_concurrency import lockutils
469+
470+
lock_dir = _create_lockfiles_dir()
471+
name = hashlib.sha256(str(lock_path).encode()).hexdigest()
472+
with lockutils.external_lock(name=name, lock_path=lock_dir):
473+
yield
474+
475+
457476
class ProgressBar(ProgressType):
458477
"""Progressbar for MinIO function's `progress` parameter."""
459478

tests/test_datasets/test_dataset_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1835,7 +1835,7 @@ def _dataset_features_is_downloaded(did: int):
18351835

18361836
def _dataset_data_file_is_downloaded(dataset: OpenMLDataset):
18371837
if dataset._parquet_url is not None:
1838-
pq_directory = Path(openml.config.get_cache_directory()) / Path(openml.config.get_minio_download_path(dataset._parquet_url)).parent
1838+
pq_directory = Path(openml.config.get_minio_download_path(dataset._parquet_url)).parent
18391839
if pq_directory.exists():
18401840
return any(f.suffix == ".pq" for f in pq_directory.iterdir())
18411841
arff_directory = Path(openml.config.get_cache_directory()) / "data/v1/download" / str(dataset.id)

tests/test_tasks/test_task_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def test_get_task(self):
165165
os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff")
166166
)
167167
assert os.path.exists(
168-
os.path.join(openml.config.get_cache_directory(), openml.config.get_minio_download_path(dataset._parquet_url))
168+
openml.config.get_minio_download_path(dataset._parquet_url)
169169
)
170170

171171
@pytest.mark.test_server()

0 commit comments

Comments
 (0)