diff --git a/.github/workflows/test-data-validation.yml b/.github/workflows/test-data-validation.yml new file mode 100644 index 00000000..6ffde28e --- /dev/null +++ b/.github/workflows/test-data-validation.yml @@ -0,0 +1,192 @@ +name: Test Data Validation + +on: + workflow_dispatch: + inputs: + python-version: + description: 'Python version to test' + required: true + type: choice + options: + - '3.9' + - '3.10' + - '3.11' + - '3.12' + - '3.13' + default: '3.12' + +jobs: + build-wheels: + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Generate meson files (Linux/macOS) + if: runner.os != 'Windows' + run: | + python3 scripts/generate_meson.py ./src/dbzero/ core + python3 scripts/generate_meson_tests.py tests/ + python3 scripts/generate_meson_dbzero.py dbzero/ + + - name: Generate meson files (Windows) + if: runner.os == 'Windows' + run: | + python scripts/generate_meson.py ./src/dbzero/ core + python scripts/generate_meson_tests.py tests/ + python scripts/generate_meson_dbzero.py dbzero/ + + - name: Configure git (Linux/macOS) + if: runner.os != 'Windows' + run: | + git config --global user.email "ci@example.com" + git config --global user.name "CI Builder" + rm -f .gitignore + git add . && git commit -m "Update meson files" + + - name: Configure git (Windows) + if: runner.os == 'Windows' + run: | + git config --global user.email "ci@example.com" + git config --global user.name "CI Builder" + if (Test-Path .gitignore) { Remove-Item .gitignore } + git add . && git commit -m "Update meson files" + + - name: Install build tools + run: pip install build + + - name: Build wheel + run: python -m build + + - name: Upload wheel + uses: actions/upload-artifact@v4 + with: + name: wheel-${{ runner.os }} + path: dist/*.whl + retention-days: 1 + + create-test-data: + needs: build-wheels + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Download wheel + uses: actions/download-artifact@v4 + with: + name: wheel-${{ runner.os }} + path: dist/ + + - name: Install wheel (Linux/macOS) + if: runner.os != 'Windows' + run: | + pip install dist/*.whl + + - name: Install wheel (Windows) + if: runner.os == 'Windows' + run: | + $wheel = Get-ChildItem -Path dist/*.whl | Select-Object -First 1 + pip install $wheel.FullName + + - name: Create test data (Linux/macOS) + if: runner.os != 'Windows' + run: | + python python_tests/scripts/create_test_data.py --out_dir ./test-data-${{ runner.os }} + + - name: Create test data (Windows) + if: runner.os == 'Windows' + run: | + python python_tests/scripts/create_test_data.py --out_dir ./test-data-${{ runner.os }} + + - name: Upload test data + uses: actions/upload-artifact@v4 + with: + name: test-data-${{ runner.os }} + path: test-data-${{ runner.os }}/ + retention-days: 1 + + validate-test-data: + needs: create-test-data + strategy: + matrix: + validator-os: [ubuntu-latest, macos-latest, windows-latest] + data-source: [Linux, macOS, Windows] + runs-on: ${{ matrix.validator-os }} + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Download wheel for validator OS + uses: actions/download-artifact@v4 + with: + name: wheel-${{ matrix.validator-os }} + path: dist/ + + - name: Install wheel (Linux/macOS) + if: runner.os != 'Windows' + run: | + pip install dist/*.whl + + - name: Install wheel (Windows) + if: runner.os == 'Windows' + run: | + $wheel = Get-ChildItem -Path dist/*.whl | Select-Object -First 1 + pip install $wheel.FullName + + - name: Download test data from ${{ matrix.data-source }} + uses: actions/download-artifact@v4 + with: + name: test-data-${{ matrix.data-source }} + path: test-data-${{ matrix.data-source }}/ + + - name: Validate test data (Linux/macOS) + if: runner.os != 'Windows' + run: | + python -m dbzero_ce.python_tests.validate_test_data --input_dir ./test-data-${{ matrix.data-source }} + + - name: Validate test data (Windows) + if: runner.os == 'Windows' + run: | + python -m dbzero_ce.python_tests.validate_test_data --input_dir ./test-data-${{ matrix.data-source }} + + cleanup: + needs: validate-test-data + if: always() + runs-on: ubuntu-latest + steps: + - name: Delete wheel artifacts + uses: geekyeggo/delete-artifact@v5 + with: + name: | + wheel-Linux + wheel-macOS + wheel-Windows + failOnError: false + + - name: Delete test data artifacts + uses: geekyeggo/delete-artifact@v5 + with: + name: | + test-data-Linux + test-data-macOS + test-data-Windows + failOnError: false diff --git a/python_tests/scripts/create_test_data.py b/python_tests/scripts/create_test_data.py new file mode 100644 index 00000000..025ce356 --- /dev/null +++ b/python_tests/scripts/create_test_data.py @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# Copyright (c) 2025 DBZero Software sp. z o.o. + +import random +import string +import datetime +import dbzero as db0 + + +@db0.memo() +class TestObject: + def __init__(self, id, value, timestamp): + self.id = id + self.value = value + self.timestamp = timestamp + +@db0.memo(singleton=True) +class TestDataSingleton: + """Singleton containing various db0 collections for testing purposes""" + + def __init__(self): + # List with mixed types (int, string, object) + self.list = [ + 1, + "string_value", + {"key": "value"}, + 42, + "another_string", + {"nested": {"data": 123}}, + 999, + "third_string", + {"id": 1, "name": "test"}, + 100 + ] + + # Dict with 10 elements + self.dict = { + "key1": 1, + "key2": "value2", + "key3": [1, 2, 3], + "key4": {"nested": "dict"}, + "key5": 42.5, + "key6": True, + "key7": None, + "key8": "string_value", + "key9": [4, 5, 6], + "key10": {"id": 10} + } + + # Set with 10 elements + self.set = { + 1, 2, 3, 4, 5, + "str1", "str2", "str3", "str4", "str5" + } + + # Tuple with 10 elements + self.tuple =( + "a", "b", "c", "d", "e", + 1, 2, 3, 4, 5 + ) + + # ByteArray with 10 bytes + self.byte_array = db0.bytearray(b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09') + + # Large list with 10k random objects + self.large_list = [] + for i in range(10000): + # Generate random string with random length (5-50 chars) + random_length = random.randint(5, 50) + random_string = ''.join(random.choices( + string.ascii_letters + string.digits, + k=random_length + )) + + # Create object with random string value + obj = TestObject( + id=i, + value=random_string, + timestamp=datetime.datetime.now() + ) + self.large_list.append(obj) + + +def create_test_data(): + """ + Create and return the TestDataSingleton with all test data. + + Usage example: + import dbzero as db0 + from python_tests.create_test_data import create_test_data + + db0.init("/path/to/db") + db0.open("test-prefix") + + test_data = create_test_data() + + print(f"List length: {len(test_data.list)}") + print(f"Dict keys: {list(test_data.dict.keys())}") + print(f"Large list length: {len(test_data.large_list)}") + + db0.close() + """ + return TestDataSingleton() + + +if __name__ == "__main__": + import os + import shutil + import argparse + + # Parse command line arguments + parser = argparse.ArgumentParser(description="Create test data in db0 database") + parser.add_argument( + "--out_dir", + type=str, + default=os.path.join(os.getcwd(), "db0-create-test-data"), + help="Output directory for db0 database (default: db0-create-test-data)" + ) + args = parser.parse_args() + + # Setup test database + DB0_DIR = args.out_dir + + if os.path.exists(DB0_DIR): + shutil.rmtree(DB0_DIR) + os.mkdir(DB0_DIR) + + db0.init(DB0_DIR) + db0.open("test-data-prefix") + + # Create test data + print(f"Creating test data in: {DB0_DIR}") + test_data = create_test_data() + + print(f"\nTest data created successfully!") + print(f" - list: {len(test_data.list)} elements") + print(f" - dict: {len(test_data.dict)} elements") + print(f" - set: {len(test_data.set)} elements") + print(f" - tuple: {len(test_data.tuple)} elements") + print(f" - byte_array: {len(test_data.byte_array)} bytes") + print(f" - large_list: {len(test_data.large_list)} elements") + + # Show some sample data + print(f"\nSample from list: {test_data.list[:3]}") + print(f"Sample from dict: {dict(list(test_data.dict.items())[:3])}") + print(f"Sample from large_list: {test_data.large_list[0]}") + + db0.close() + + print(f"\nData persisted in: {DB0_DIR}") + print("Done!") diff --git a/python_tests/scripts/validate_test_data.py b/python_tests/scripts/validate_test_data.py new file mode 100644 index 00000000..c464a9c9 --- /dev/null +++ b/python_tests/scripts/validate_test_data.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# Copyright (c) 2025 DBZero Software sp. z o.o. + +import os +import argparse +import dbzero as db0 +from create_test_data import TestDataSingleton + + +def validate_test_data(test_data): + """Validate that test data has the expected structure and content""" + errors = [] + warnings = [] + + # Validate list + if not hasattr(test_data, 'list'): + errors.append("Missing attribute: list") + elif len(test_data.list) != 10: + errors.append(f"list has {len(test_data.list)} elements, expected 10") + else: + print("✓ list: 10 elements") + # Check for mixed types + has_int = any(isinstance(item, int) for item in test_data.list) + has_str = any(isinstance(item, str) for item in test_data.list) + has_dict = any(isinstance(item, dict) for item in test_data.list) + if not (has_int and has_str and has_dict): + warnings.append("list should contain mixed types (int, string, dict). ") + + # Validate dict + if not hasattr(test_data, 'dict'): + errors.append("Missing attribute: dict") + elif len(test_data.dict) != 10: + errors.append(f"dict has {len(test_data.dict)} elements, expected 10") + else: + print("✓ dict: 10 elements") + expected_keys = [f"key{i}" for i in range(1, 11)] + missing_keys = [k for k in expected_keys if k not in test_data.dict] + if missing_keys: + warnings.append(f"dict missing expected keys: {missing_keys}") + + # Validate set + if not hasattr(test_data, 'set'): + errors.append("Missing attribute: set") + elif len(test_data.set) != 10: + errors.append(f"set has {len(test_data.set)} elements, expected 10") + else: + print("✓ set: 10 elements") + + # Validate tuple + if not hasattr(test_data, 'tuple'): + errors.append("Missing attribute: tuple") + elif len(test_data.tuple) != 10: + errors.append(f"tuple has {len(test_data.tuple)} elements, expected 10") + else: + print("✓ tuple: 10 elements") + + # Validate byte_array + if not hasattr(test_data, 'byte_array'): + errors.append("Missing attribute: byte_array") + elif len(test_data.byte_array) != 10: + errors.append(f"byte_array has {len(test_data.byte_array)} bytes, expected 10") + else: + print("✓ byte_array: 10 bytes") + + # Validate large_list + if not hasattr(test_data, 'large_list'): + errors.append("Missing attribute: large_list") + elif len(test_data.large_list) != 10000: + errors.append(f"large_list has {len(test_data.large_list)} elements, expected 10000") + else: + print("✓ large_list: 10000 elements") + + # Check first few objects in large_list + sample_size = min(10, len(test_data.large_list)) + for i in range(sample_size): + obj = test_data.large_list[i] + if not hasattr(obj, 'id'): + warnings.append(f"large_list[{i}] missing 'id' attribute") + if not hasattr(obj, 'value'): + warnings.append(f"large_list[{i}] missing 'value' attribute") + elif not isinstance(obj.value, str): + warnings.append(f"large_list[{i}].value is not a string") + elif not (5 <= len(obj.value) <= 50): + warnings.append(f"large_list[{i}].value length {len(obj.value)} not in range [5, 50]") + if not hasattr(obj, 'timestamp'): + warnings.append(f"large_list[{i}] missing 'timestamp' attribute") + + if not warnings or len([w for w in warnings if 'large_list' in w]) == 0: + print(f" ✓ Sample validation of first {sample_size} objects passed") + + return errors, warnings + + +if __name__ == "__main__": + # Parse command line arguments + parser = argparse.ArgumentParser(description="Validate test data in db0 database") + parser.add_argument( + "--input_dir", + type=str, + required=True, + help="Input directory containing db0 database to validate" + ) + args = parser.parse_args() + + # Check if directory exists + if not os.path.exists(args.input_dir): + print(f"Error: Directory does not exist: {args.input_dir}") + exit(1) + + if not os.path.isdir(args.input_dir): + print(f"Error: Path is not a directory: {args.input_dir}") + exit(1) + + print(f"Validating test data in: {args.input_dir}\n") + + # Initialize db0 and open the database + try: + db0.init(args.input_dir) + db0.open("test-data-prefix") + except Exception as e: + print(f"Error opening database: {e}") + exit(1) + + # Load the singleton + try: + test_data = TestDataSingleton() + except Exception as e: + print(f"Error loading TestDataSingleton: {e}") + db0.close() + exit(1) + + # Validate the data + print("Validating collections:\n") + errors, warnings = validate_test_data(test_data) + + # Display sample data + print("\nSample data:") + if hasattr(test_data, 'list') and len(test_data.list) > 0: + print(f" list[0:3]: {test_data.list[:3]}") + + if hasattr(test_data, 'dict') and len(test_data.dict) > 0: + sample_dict = dict(list(test_data.dict.items())[:3]) + print(f" dict (first 3): {sample_dict}") + + if hasattr(test_data, 'large_list') and len(test_data.large_list) > 0: + obj = test_data.large_list[0] + print(f" large_list[0]: id={obj.id if hasattr(obj, 'id') else 'N/A'}, " + f"value='{obj.value[:20] if hasattr(obj, 'value') else 'N/A'}...', " + f"timestamp={obj.timestamp if hasattr(obj, 'timestamp') else 'N/A'}") + + # Close database + db0.close() + + # Report results + print("\n" + "="*50) + if errors: + print(f"\n❌ VALIDATION FAILED with {len(errors)} error(s):") + for error in errors: + print(f" - {error}") + exit_code = 1 + else: + print("\n✓ VALIDATION PASSED - All required data present") + exit_code = 0 + + if warnings: + print(f"\n⚠ {len(warnings)} warning(s):") + for warning in warnings: + print(f" - {warning}") + + exit(exit_code)