From 9261bad1ae58649cef990c6294fc92bf1b5df443 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 14:41:20 -0800 Subject: [PATCH 01/10] Now works with a redis server --- godata/files/__init__.py | 3 +++ godata/files/locking.py | 52 ++++++++++++++++++++++++++++++++++++++++ godata/project.py | 11 ++++----- poetry.lock | 21 +++++++++++++++- pyproject.toml | 2 +- test_godata.sh | 24 ++++++++++++++++++- tests/test_projects.py | 8 +++++++ 7 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 godata/files/locking.py diff --git a/godata/files/__init__.py b/godata/files/__init__.py index a4d73e8..c0c3385 100644 --- a/godata/files/__init__.py +++ b/godata/files/__init__.py @@ -1,3 +1,6 @@ """ Management of all actual files are handled in python. """ +from .locking import get_lock + +__all__ = ["get_lock"] diff --git a/godata/files/locking.py b/godata/files/locking.py new file mode 100644 index 0000000..dc67cf9 --- /dev/null +++ b/godata/files/locking.py @@ -0,0 +1,52 @@ +""" +Godata locks files that it is modifying to prevent multiple processes from modifying +the same file at the same time. This uses the portalocker package. + +For user machines (or storage) that's directly attached, this can be done +with a basic file lock. Locking on network storage is more subtle. Portalocker +supports using redis as a lock server. + +Godata will check to see if a redis server is running and use it. If no redis server +is running, it will fall back on standard file locks. +""" +from os import environ +from pathlib import Path + +import loguru +import portalocker +import redis + + +# check if redis is running +def get_redis_lock(path: Path, client: redis.Redis) -> portalocker.RedisLock: + lock = portalocker.RedisLock(str(path), client) + return lock + + +def get_file_lock(path: Path) -> portalocker.Lock: + lock = portalocker.Lock(str(path)) + return lock + + +REDIS_HOST = environ.get("REDIS_HOST") or "localhost" +REDIS_PORT = environ.get("REDIS_PORT") # NOT YET SUPPORTED +try: + REDIS_PORT = int(REDIS_PORT) +except (ValueError, TypeError): + REDIS_PORT = 6379 + +REDIS_PASSWORD = environ.get("REDIS_PASSWORD") # NOT YET SUPPORTED +client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) + +try: + client.ping() + + def get_lock(path: Path): + return get_redis_lock(path, client) + +except redis.ConnectionError: + loguru.logger.warning("No redis server found. Falling back on file locks.") + get_lock = get_file_lock + client = None + +__all__ = ["get_lock"] diff --git a/godata/project.py b/godata/project.py index 3a00737..65b0848 100644 --- a/godata/project.py +++ b/godata/project.py @@ -12,10 +12,10 @@ from pathlib import Path from typing import Any -import portalocker from loguru import logger from godata.client import client +from godata.files import get_lock from godata.files import utils as file_utils from godata.io import find_writer, get_typekey, godataIoException, try_to_read from godata.utils import sanitize_project_path @@ -239,9 +239,9 @@ def store( metadata=metadata, ) storage_path.parent.mkdir(parents=True, exist_ok=True) - - with portalocker.Lock(str(storage_path), "wb"): - writer_fn(obj, storage_path, **writer_kwargs) + lock = get_lock(storage_path) + with lock: + writer_fn(obj, str(storage_path), **writer_kwargs) return True @@ -292,8 +292,7 @@ def get( format = get_typekey(load_type) else: format = file_info.get("obj_type") - - with portalocker.Lock(str(path), "rb"): + with get_lock(path): data = try_to_read(path, format, reader_kwargs) return data except godataIoException as e: diff --git a/poetry.lock b/poetry.lock index 02daaf5..4a7ad0b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1122,6 +1122,7 @@ files = [ [package.dependencies] pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} +redis = {version = "*", optional = true, markers = "extra == \"redis\""} [package.extras] docs = ["sphinx (>=1.7.1)"] @@ -1447,6 +1448,24 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "redis" +version = "5.0.1" +description = "Python client for Redis database and key-value store" +optional = false +python-versions = ">=3.7" +files = [ + {file = "redis-5.0.1-py3-none-any.whl", hash = "sha256:ed4802971884ae19d640775ba3b03aa2e7bd5e8fb8dfaed2decce4d0fc48391f"}, + {file = "redis-5.0.1.tar.gz", hash = "sha256:0dab495cd5753069d3bc650a0dde8a8f9edde16fc5691b689a566eda58100d0f"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""} + +[package.extras] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] + [[package]] name = "requests" version = "2.31.0" @@ -1925,4 +1944,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "417efb34643a62fea3836125e338623778b02e4d8d16f2c2d80d0bfd5cedf5ac" +content-hash = "b4708e81a0821da2ec1c632a378b5ceadb31a5fd50b83cb151db3267c26940b8" diff --git a/pyproject.toml b/pyproject.toml index 14f96b0..61a72e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ loguru = "^0.7.2" aiohttp = "^3.9.1" requests = "^2.31.0" click = "^8.1.7" -portalocker = "^2.8.2" +portalocker = {extras = ["redis"], version = "^2.8.2"} appdirs = "^1.4.4" packaging = "^23.2" pydantic = "^2.6.1" diff --git a/test_godata.sh b/test_godata.sh index adda838..f63b998 100755 --- a/test_godata.sh +++ b/test_godata.sh @@ -10,8 +10,30 @@ while getopts 'i' opt; do ;; esac done -docker build -t godata-test:latest . && docker run --env DATA_PATH=/home/data --env RUST_BACKTRACE=1 -v $GODATA_TEST_ROOT/test_io/test_data:/home/data -it godata-test:latest $CMD +# Run a redis server in a networked container +docker network create godata-net +# check if redis is already running +docker ps | grep redis +if [ $? -eq 0 ]; then + echo "Redis is already running" +else + echo "Starting redis" + docker run --network godata-net -d redis +fi + +# Get the IP address of the redis server +REDIS_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $(docker ps -q --filter ancestor=redis)) + +docker build -t godata-test:latest .\ +&& \ +docker run \ + --network godata-net \ + --env DATA_PATH=/home/data \ + --env RUST_BACKTRACE=1 \ + --env REDIS_HOST=$REDIS_IP \ + -v $GODATA_TEST_ROOT/test_io/test_data:/home/data \ + -it godata-test:latest $CMD # Run this script from the root of the project to test the godata server and # client. This docker daemon must be running. # Assumes there is an environment variable $GODATA_TEST_ROOT that points to the diff --git a/tests/test_projects.py b/tests/test_projects.py index ec309b6..c4802b8 100644 --- a/tests/test_projects.py +++ b/tests/test_projects.py @@ -243,3 +243,11 @@ def test_ie(): # get the list of folders in this path data = p2.get("data/test_data") assert np.all(data == expected_data) + + +def test_place_in_root(): + p = create_project("test14") + expected_data = np.ones((10, 10)) + p.store(expected_data, "test_data") + data = p.get("test_data") + assert np.all(data == expected_data) From ee7c5cd44b7ca61df14e4997a54ebd0017301cf3 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 14:42:25 -0800 Subject: [PATCH 02/10] slight re-organize --- godata/files/locking.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/godata/files/locking.py b/godata/files/locking.py index dc67cf9..a7d9ee6 100644 --- a/godata/files/locking.py +++ b/godata/files/locking.py @@ -29,13 +29,14 @@ def get_file_lock(path: Path) -> portalocker.Lock: REDIS_HOST = environ.get("REDIS_HOST") or "localhost" -REDIS_PORT = environ.get("REDIS_PORT") # NOT YET SUPPORTED +REDIS_PORT = environ.get("REDIS_PORT") +REDIS_PASSWORD = environ.get("REDIS_PASSWORD") # NOT YET SUPPORTED + try: REDIS_PORT = int(REDIS_PORT) except (ValueError, TypeError): REDIS_PORT = 6379 -REDIS_PASSWORD = environ.get("REDIS_PASSWORD") # NOT YET SUPPORTED client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) try: From d4503a8bee2ac16ff3801eace0e67b4ac0f6f993 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 14:51:30 -0800 Subject: [PATCH 03/10] Add redis to github action --- .github/workflows/ci.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d725b5..3da7df1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,6 +65,21 @@ jobs: with: name: godata_server-${{ matrix.target }} path: godata_server-${{ matrix.target }}.zip + + start_redis: + name: Start Redis + runs-on: ubuntu-latest + services: + redis: + image: redis + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: Wait for Redis to start + run: redis-cli -h redis ping test_linux: name: Test on Linux @@ -93,6 +108,9 @@ jobs: - name: Install godata run: poetry install --with test - name: Run tests + env: + REDIS_HOST: redis + REDIS_PORT: 6379 run: | export DATA_PATH=$(pwd)/tests/test_io/test_data/ chmod +x ./server/godata_server From 530a12c8fb6ec90d35aa3d26c6ceee983413deae Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 14:53:45 -0800 Subject: [PATCH 04/10] Fix redis start --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3da7df1..06902cf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,10 +77,6 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 5 - steps: - - name: Wait for Redis to start - run: redis-cli -h redis ping - test_linux: name: Test on Linux runs-on: ubuntu-latest From b646e29f1235c07cc5189f83de91c83601ef5452 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 14:56:23 -0800 Subject: [PATCH 05/10] Fix redis start --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06902cf..0d70758 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,6 +69,7 @@ jobs: start_redis: name: Start Redis runs-on: ubuntu-latest + needs: [linux_build_server] services: redis: image: redis @@ -82,7 +83,7 @@ jobs: runs-on: ubuntu-latest outputs: godata_version: ${{ steps.get_godata_version.outputs.version }} - needs: [linux_build_server] + needs: [linux_build_server, start_redis] strategy: matrix: target: [x86_64-unknown-linux-gnu] From 1adf8bb9e20e17c7236a83e066fafa4f7c590d8b Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 15:06:27 -0800 Subject: [PATCH 06/10] Fix redis start --- .github/workflows/ci.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d70758..eabff8e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,11 +65,9 @@ jobs: with: name: godata_server-${{ matrix.target }} path: godata_server-${{ matrix.target }}.zip - - start_redis: - name: Start Redis + test_linux: + name: Test on Linux runs-on: ubuntu-latest - needs: [linux_build_server] services: redis: image: redis @@ -78,12 +76,9 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 5 - test_linux: - name: Test on Linux - runs-on: ubuntu-latest outputs: godata_version: ${{ steps.get_godata_version.outputs.version }} - needs: [linux_build_server, start_redis] + needs: [linux_build_server] strategy: matrix: target: [x86_64-unknown-linux-gnu] From 843d8765b744b322e4b0daa7225a0c117b2cd8f9 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 15:16:32 -0800 Subject: [PATCH 07/10] Disable ptest output capturing --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eabff8e..e0555fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -109,7 +109,7 @@ jobs: mkdir -p /home/runner/.local/bin sudo mv ./server/godata_server /home/runner/.local/bin/ cd tests - sudo -E bash -c '/home/runner/.local/bin/poetry run pytest' + sudo -E bash -c '/home/runner/.local/bin/poetry run pytest -s' - name: output godata version id: get_godata_version run: | @@ -146,7 +146,7 @@ jobs: chmod +x ./server/godata_server sudo mv ./server/godata_server /Users/runner/.local/bin cd tests - sudo -E bash -c 'poetry run pytest' + sudo -E bash -c 'poetry run pytest -s' check_tags: name: check tags From a3fde3f5576e3742f54aa20741681e8f979c0033 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 15:36:53 -0800 Subject: [PATCH 08/10] Now fails if Redis connection fails with valid input --- .github/workflows/ci.yml | 2 +- godata/files/locking.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0555fa..d571ea6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,7 +101,7 @@ jobs: run: poetry install --with test - name: Run tests env: - REDIS_HOST: redis + REDIS_HOST: localhost REDIS_PORT: 6379 run: | export DATA_PATH=$(pwd)/tests/test_io/test_data/ diff --git a/godata/files/locking.py b/godata/files/locking.py index a7d9ee6..d4998df 100644 --- a/godata/files/locking.py +++ b/godata/files/locking.py @@ -28,24 +28,29 @@ def get_file_lock(path: Path) -> portalocker.Lock: return lock -REDIS_HOST = environ.get("REDIS_HOST") or "localhost" +REDIS_HOST = environ.get("REDIS_HOST") REDIS_PORT = environ.get("REDIS_PORT") REDIS_PASSWORD = environ.get("REDIS_PASSWORD") # NOT YET SUPPORTED -try: - REDIS_PORT = int(REDIS_PORT) -except (ValueError, TypeError): - REDIS_PORT = 6379 -client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) +if REDIS_HOST is not None: + try: + REDIS_PORT = int(REDIS_PORT) + except (ValueError, TypeError): + REDIS_PORT = 6379 -try: - client.ping() + client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) + try: + client.ping() + except redis.ConnectionError: + raise ValueError( + f"Could not connec to the redis server at {REDIS_HOST}:{REDIS_PORT}" + ) def get_lock(path: Path): return get_redis_lock(path, client) -except redis.ConnectionError: +else: loguru.logger.warning("No redis server found. Falling back on file locks.") get_lock = get_file_lock client = None From c9cef500cdcfa76fefb95b286e5ba1f4e088646d Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 15:46:21 -0800 Subject: [PATCH 09/10] Fix redis --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d571ea6..8e12d3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,6 +71,8 @@ jobs: services: redis: image: redis + ports: + - 6379:6379 options: >- --health-cmd "redis-cli ping" --health-interval 10s From b58d72e7bfafd99bcace9d02a5a42e4cd39b8fc6 Mon Sep 17 00:00:00 2001 From: Patrick Wells Date: Fri, 23 Feb 2024 16:06:42 -0800 Subject: [PATCH 10/10] Fix small bug --- godata/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/godata/project.py b/godata/project.py index 65b0848..92b3ae4 100644 --- a/godata/project.py +++ b/godata/project.py @@ -189,11 +189,11 @@ def store( if isinstance(to_read, Path): try: obj = try_to_read(to_read) # This can be very slow... Could be improved - writer_fn, suffix = find_writer(obj, to_read) + writer_fn, suffix = find_writer(obj, to_read.suffix) except godataIoException as e: logger.warning( - f"Could not find a reader for file {to_read}. The file will still" + f"Could not find a reader for file {to_read}. The file will still " "be stored, but godata will only be able to return a path." f"Error: {e}" )