diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d725b5..8e12d3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,10 +65,19 @@ jobs: with: name: godata_server-${{ matrix.target }} path: godata_server-${{ matrix.target }}.zip - test_linux: name: Test on Linux runs-on: ubuntu-latest + services: + redis: + image: redis + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 outputs: godata_version: ${{ steps.get_godata_version.outputs.version }} needs: [linux_build_server] @@ -93,13 +102,16 @@ jobs: - name: Install godata run: poetry install --with test - name: Run tests + env: + REDIS_HOST: localhost + REDIS_PORT: 6379 run: | export DATA_PATH=$(pwd)/tests/test_io/test_data/ chmod +x ./server/godata_server mkdir -p /home/runner/.local/bin sudo mv ./server/godata_server /home/runner/.local/bin/ cd tests - sudo -E bash -c '/home/runner/.local/bin/poetry run pytest' + sudo -E bash -c '/home/runner/.local/bin/poetry run pytest -s' - name: output godata version id: get_godata_version run: | @@ -136,7 +148,7 @@ jobs: chmod +x ./server/godata_server sudo mv ./server/godata_server /Users/runner/.local/bin cd tests - sudo -E bash -c 'poetry run pytest' + sudo -E bash -c 'poetry run pytest -s' check_tags: name: check tags diff --git a/godata/files/__init__.py b/godata/files/__init__.py index a4d73e8..c0c3385 100644 --- a/godata/files/__init__.py +++ b/godata/files/__init__.py @@ -1,3 +1,6 @@ """ Management of all actual files are handled in python. """ +from .locking import get_lock + +__all__ = ["get_lock"] diff --git a/godata/files/locking.py b/godata/files/locking.py new file mode 100644 index 0000000..d4998df --- /dev/null +++ b/godata/files/locking.py @@ -0,0 +1,58 @@ +""" +Godata locks files that it is modifying to prevent multiple processes from modifying +the same file at the same time. This uses the portalocker package. + +For user machines (or storage) that's directly attached, this can be done +with a basic file lock. Locking on network storage is more subtle. Portalocker +supports using redis as a lock server. + +Godata will check to see if a redis server is running and use it. If no redis server +is running, it will fall back on standard file locks. +""" +from os import environ +from pathlib import Path + +import loguru +import portalocker +import redis + + +# check if redis is running +def get_redis_lock(path: Path, client: redis.Redis) -> portalocker.RedisLock: + lock = portalocker.RedisLock(str(path), client) + return lock + + +def get_file_lock(path: Path) -> portalocker.Lock: + lock = portalocker.Lock(str(path)) + return lock + + +REDIS_HOST = environ.get("REDIS_HOST") +REDIS_PORT = environ.get("REDIS_PORT") +REDIS_PASSWORD = environ.get("REDIS_PASSWORD") # NOT YET SUPPORTED + + +if REDIS_HOST is not None: + try: + REDIS_PORT = int(REDIS_PORT) + except (ValueError, TypeError): + REDIS_PORT = 6379 + + client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) + try: + client.ping() + except redis.ConnectionError: + raise ValueError( + f"Could not connec to the redis server at {REDIS_HOST}:{REDIS_PORT}" + ) + + def get_lock(path: Path): + return get_redis_lock(path, client) + +else: + loguru.logger.warning("No redis server found. Falling back on file locks.") + get_lock = get_file_lock + client = None + +__all__ = ["get_lock"] diff --git a/godata/project.py b/godata/project.py index 3a00737..92b3ae4 100644 --- a/godata/project.py +++ b/godata/project.py @@ -12,10 +12,10 @@ from pathlib import Path from typing import Any -import portalocker from loguru import logger from godata.client import client +from godata.files import get_lock from godata.files import utils as file_utils from godata.io import find_writer, get_typekey, godataIoException, try_to_read from godata.utils import sanitize_project_path @@ -189,11 +189,11 @@ def store( if isinstance(to_read, Path): try: obj = try_to_read(to_read) # This can be very slow... Could be improved - writer_fn, suffix = find_writer(obj, to_read) + writer_fn, suffix = find_writer(obj, to_read.suffix) except godataIoException as e: logger.warning( - f"Could not find a reader for file {to_read}. The file will still" + f"Could not find a reader for file {to_read}. The file will still " "be stored, but godata will only be able to return a path." f"Error: {e}" ) @@ -239,9 +239,9 @@ def store( metadata=metadata, ) storage_path.parent.mkdir(parents=True, exist_ok=True) - - with portalocker.Lock(str(storage_path), "wb"): - writer_fn(obj, storage_path, **writer_kwargs) + lock = get_lock(storage_path) + with lock: + writer_fn(obj, str(storage_path), **writer_kwargs) return True @@ -292,8 +292,7 @@ def get( format = get_typekey(load_type) else: format = file_info.get("obj_type") - - with portalocker.Lock(str(path), "rb"): + with get_lock(path): data = try_to_read(path, format, reader_kwargs) return data except godataIoException as e: diff --git a/poetry.lock b/poetry.lock index 02daaf5..4a7ad0b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1122,6 +1122,7 @@ files = [ [package.dependencies] pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} +redis = {version = "*", optional = true, markers = "extra == \"redis\""} [package.extras] docs = ["sphinx (>=1.7.1)"] @@ -1447,6 +1448,24 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "redis" +version = "5.0.1" +description = "Python client for Redis database and key-value store" +optional = false +python-versions = ">=3.7" +files = [ + {file = "redis-5.0.1-py3-none-any.whl", hash = "sha256:ed4802971884ae19d640775ba3b03aa2e7bd5e8fb8dfaed2decce4d0fc48391f"}, + {file = "redis-5.0.1.tar.gz", hash = "sha256:0dab495cd5753069d3bc650a0dde8a8f9edde16fc5691b689a566eda58100d0f"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""} + +[package.extras] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] + [[package]] name = "requests" version = "2.31.0" @@ -1925,4 +1944,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "417efb34643a62fea3836125e338623778b02e4d8d16f2c2d80d0bfd5cedf5ac" +content-hash = "b4708e81a0821da2ec1c632a378b5ceadb31a5fd50b83cb151db3267c26940b8" diff --git a/pyproject.toml b/pyproject.toml index 14f96b0..61a72e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ loguru = "^0.7.2" aiohttp = "^3.9.1" requests = "^2.31.0" click = "^8.1.7" -portalocker = "^2.8.2" +portalocker = {extras = ["redis"], version = "^2.8.2"} appdirs = "^1.4.4" packaging = "^23.2" pydantic = "^2.6.1" diff --git a/test_godata.sh b/test_godata.sh index adda838..f63b998 100755 --- a/test_godata.sh +++ b/test_godata.sh @@ -10,8 +10,30 @@ while getopts 'i' opt; do ;; esac done -docker build -t godata-test:latest . && docker run --env DATA_PATH=/home/data --env RUST_BACKTRACE=1 -v $GODATA_TEST_ROOT/test_io/test_data:/home/data -it godata-test:latest $CMD +# Run a redis server in a networked container +docker network create godata-net +# check if redis is already running +docker ps | grep redis +if [ $? -eq 0 ]; then + echo "Redis is already running" +else + echo "Starting redis" + docker run --network godata-net -d redis +fi + +# Get the IP address of the redis server +REDIS_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $(docker ps -q --filter ancestor=redis)) + +docker build -t godata-test:latest .\ +&& \ +docker run \ + --network godata-net \ + --env DATA_PATH=/home/data \ + --env RUST_BACKTRACE=1 \ + --env REDIS_HOST=$REDIS_IP \ + -v $GODATA_TEST_ROOT/test_io/test_data:/home/data \ + -it godata-test:latest $CMD # Run this script from the root of the project to test the godata server and # client. This docker daemon must be running. # Assumes there is an environment variable $GODATA_TEST_ROOT that points to the diff --git a/tests/test_projects.py b/tests/test_projects.py index ec309b6..c4802b8 100644 --- a/tests/test_projects.py +++ b/tests/test_projects.py @@ -243,3 +243,11 @@ def test_ie(): # get the list of folders in this path data = p2.get("data/test_data") assert np.all(data == expected_data) + + +def test_place_in_root(): + p = create_project("test14") + expected_data = np.ones((10, 10)) + p.store(expected_data, "test_data") + data = p.get("test_data") + assert np.all(data == expected_data)