From 9058ecb1c9fe606a843c1fb99517a1ef8f178434 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 15 Mar 2026 23:51:19 +0000 Subject: [PATCH 1/5] Add cross-language symmetry tests for DID database Introduce symmetry test infrastructure following the NDI-matlab pattern, enabling cross-language validation between MATLAB and Python DID implementations. - makeArtifacts: generates a small DID database with random demoA/demoB/demoC documents across 3 branches (branch_main, branch_dev, branch_feature), exports the SQLite database file and per-branch JSON audit files as persistent artifacts - readArtifacts: parameterized test that reads artifacts from either matlabArtifacts or pythonArtifacts, validates document counts, class names, field values, and dependencies against the JSON audit files - INSTRUCTIONS.md files document the directory conventions and artifact format https://claude.ai/code/session_01TT3ycsjcvsKLnAp1WffRPe --- .../+makeArtifacts/+database/buildDatabase.m | 146 +++++++++++++++++ .../+symmetry/+makeArtifacts/INSTRUCTIONS.md | 26 +++ .../+readArtifacts/+database/buildDatabase.m | 153 ++++++++++++++++++ .../+symmetry/+readArtifacts/INSTRUCTIONS.md | 42 +++++ 4 files changed, 367 insertions(+) create mode 100644 tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m create mode 100644 tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md create mode 100644 tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m create mode 100644 tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md diff --git a/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m new file mode 100644 index 0000000..edae243 --- /dev/null +++ b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m @@ -0,0 +1,146 @@ +classdef buildDatabase < matlab.unittest.TestCase + % BUILDDATABASE - Generate DID database artifacts for cross-language symmetry testing + % + % This test creates a small DID database with random documents (demoA, demoB, demoC) + % across multiple branches, then exports the database file and per-branch JSON + % audit files as artifacts for comparison with other DID implementations (e.g., Python). + + properties (Constant) + dbFilename = 'symmetry_test.sqlite' + end + + properties + db % The did.database object + artifactDir % Path where artifacts will be saved + end + + methods (TestMethodSetup) + function setupMethod(testCase) + testCase.applyFixture(matlab.unittest.fixtures.WorkingFolderFixture); + testCase.applyFixture(did.test.fixture.PathConstantFixture); + end + end + + methods (TestMethodTeardown) + function teardownMethod(~) + % Override teardown to do nothing: artifacts must persist in tempdir + % so that the Python test suite can read them. + end + end + + methods (Test) + function testBuildDatabaseArtifacts(testCase) + % Use a fixed seed for reproducibility across runs + rng('default'); + + % Determine the artifact directory + artifactDir = fullfile(tempdir(), 'DID', 'symmetryTest', ... + 'matlabArtifacts', 'database', 'buildDatabase', ... + 'testBuildDatabaseArtifacts'); %#ok<*PROPLC> + testCase.artifactDir = artifactDir; + + % Clear previous artifacts if they exist + if isfolder(artifactDir) + rmdir(artifactDir, 's'); + end + mkdir(artifactDir); + + % Step 1: Create the database + dbPath = fullfile(artifactDir, testCase.dbFilename); + testCase.db = did.implementations.sqlitedb(dbPath); + + % Step 2: Create 3 branches in a simple hierarchy: + % branch_main + % ├── branch_dev + % └── branch_feature + branchNames = {'branch_main', 'branch_dev', 'branch_feature'}; + + % Create the root branch + testCase.db.add_branch(branchNames{1}); + + % Generate initial documents for the root branch (small counts) + [~, ~, rootDocs] = did.test.helper.documents.make_doc_tree([3 3 3]); + testCase.db.add_docs(rootDocs); + + % Create branch_dev as child of branch_main + testCase.db.set_branch(branchNames{1}); + testCase.db.add_branch(branchNames{2}); + + % Add some additional documents to branch_dev + [~, ~, devDocs] = did.test.helper.documents.make_doc_tree([2 2 2]); + testCase.db.add_docs(devDocs); + + % Create branch_feature as child of branch_main + testCase.db.set_branch(branchNames{1}); + testCase.db.add_branch(branchNames{3}); + + % Add some additional documents to branch_feature + [~, ~, featureDocs] = did.test.helper.documents.make_doc_tree([2 1 2]); + testCase.db.add_docs(featureDocs); + + % Step 3: Export per-branch JSON audit files + jsonBranchesDir = fullfile(artifactDir, 'jsonBranches'); + mkdir(jsonBranchesDir); + + % Build metadata structure + metadata = struct(); + metadata.branchNames = {branchNames{:}}; %#ok + metadata.branchHierarchy = struct(); + metadata.branchHierarchy.branch_main = {{'branch_dev', 'branch_feature'}}; + metadata.branchHierarchy.branch_dev = {{}}; + metadata.branchHierarchy.branch_feature = {{}}; + metadata.dbFilename = testCase.dbFilename; + branchDocCounts = struct(); + + for i = 1:numel(branchNames) + branchName = branchNames{i}; + testCase.db.set_branch(branchName); + + % Get all document IDs in this branch + docIds = testCase.db.get_doc_ids(branchName); + + % Retrieve full documents + branchDocsData = cell(1, numel(docIds)); + for j = 1:numel(docIds) + doc = testCase.db.get_docs(docIds{j}); + branchDocsData{j} = doc.document_properties; + end + + % Write the branch JSON file + branchJsonStr = did.datastructures.jsonencodenan(branchDocsData); + branchJsonFile = fullfile(jsonBranchesDir, ['branch_' branchName '.json']); + fid = fopen(branchJsonFile, 'w'); + testCase.verifyGreaterThan(fid, 0, ... + ['Could not create JSON file for branch ' branchName]); + if fid > 0 + fprintf(fid, '%s', branchJsonStr); + fclose(fid); + end + + % Track document counts for metadata + branchDocCounts.(branchName) = numel(docIds); + end + + % Step 4: Write metadata.json + metadata.branchDocCounts = branchDocCounts; + metadataJsonStr = did.datastructures.jsonencodenan(metadata); + fid = fopen(fullfile(artifactDir, 'metadata.json'), 'w'); + testCase.verifyGreaterThan(fid, 0, 'Could not create metadata.json'); + if fid > 0 + fprintf(fid, '%s', metadataJsonStr); + fclose(fid); + end + + % Verify artifacts were created + testCase.verifyTrue(isfile(dbPath), 'Database file was not created.'); + testCase.verifyTrue(isfolder(jsonBranchesDir), 'jsonBranches directory was not created.'); + testCase.verifyTrue(isfile(fullfile(artifactDir, 'metadata.json')), 'metadata.json was not created.'); + + for i = 1:numel(branchNames) + branchFile = fullfile(jsonBranchesDir, ['branch_' branchNames{i} '.json']); + testCase.verifyTrue(isfile(branchFile), ... + ['Branch JSON file missing for ' branchNames{i}]); + end + end + end +end diff --git a/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md b/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md new file mode 100644 index 0000000..990466a --- /dev/null +++ b/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md @@ -0,0 +1,26 @@ +# DID Symmetry Make Artifacts + +This folder contains MATLAB unit tests whose purpose is to generate standard DID artifacts for symmetry testing with other DID language ports (e.g., Python). + +## Rules for `makeArtifacts` tests: + +1. **Artifact Location**: Tests must store their generated artifacts in the system's temporary directory (`tempdir`). +2. **Directory Structure**: Inside the temporary directory, artifacts must be placed in a specific nested folder structure: + `DID/symmetryTest/matlabArtifacts////` + + - ``: The last part of the MATLAB package namespace. For example, for a test located at `tests/+did/+symmetry/+makeArtifacts/+database`, the namespace is `database`. + - ``: The name of the test class (e.g., `buildDatabase`). + - ``: The specific name of the test method being executed (e.g., `testBuildDatabaseArtifacts`). + +3. **Persistent Teardown**: The generated artifacts and the underlying DID database must persist in the temporary directory so that the Python test suite can read them. To achieve this, you must explicitly override any superclass test teardown methods to do nothing. + +4. **Artifact Contents**: Each test should produce: + - The SQLite database file itself. + - One JSON audit file per branch, containing all documents in that branch. Each file should be named `branch_.json` and placed in a `jsonBranches` subdirectory. + - A `metadata.json` file describing the database structure (branch hierarchy, document counts, etc.). + +5. **Deterministic Seeds**: Tests should use `rng('default')` so that the random document/branch generation is reproducible across runs. + +## Example: +For a test class `buildDatabase.m` in `tests/+did/+symmetry/+makeArtifacts/+database` with a test method `testBuildDatabaseArtifacts`, the artifacts should be saved to: +`[tempdir(), 'DID/symmetryTest/matlabArtifacts/database/buildDatabase/testBuildDatabaseArtifacts/']` diff --git a/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m new file mode 100644 index 0000000..2ff9fd1 --- /dev/null +++ b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m @@ -0,0 +1,153 @@ +classdef buildDatabase < matlab.unittest.TestCase + % BUILDDATABASE - Read and validate DID database artifacts for cross-language symmetry testing + % + % This test reads database artifacts generated by either the MATLAB or Python + % DID test suite and validates that the documents can be correctly loaded and + % that their contents match the JSON audit files. + + properties (TestParameter) + % Define the two potential sources of artifacts + SourceType = {'matlabArtifacts', 'pythonArtifacts'}; + end + + methods (Test) + function testBuildDatabaseArtifacts(testCase, SourceType) + % Determine the artifact directory expected from either MATLAB or Python + artifactDir = fullfile(tempdir(), 'DID', 'symmetryTest', SourceType, ... + 'database', 'buildDatabase', 'testBuildDatabaseArtifacts'); + + % If the directory does not exist, skip this parameterized test + testCase.assumeTrue(isfolder(artifactDir), ... + ['Artifact directory from ' SourceType ' does not exist.']); + + % Step 1: Load and validate metadata.json + metadataFile = fullfile(artifactDir, 'metadata.json'); + testCase.assumeTrue(isfile(metadataFile), ... + ['metadata.json not found in ' SourceType ' artifact directory.']); + + fid = fopen(metadataFile, 'r'); + rawJson = fread(fid, inf, '*char')'; + fclose(fid); + metadata = jsondecode(rawJson); + + testCase.verifyTrue(isfield(metadata, 'branchNames'), ... + 'metadata.json missing branchNames field.'); + testCase.verifyTrue(isfield(metadata, 'dbFilename'), ... + 'metadata.json missing dbFilename field.'); + + % Step 2: Open the DID database + dbPath = fullfile(artifactDir, metadata.dbFilename); + testCase.assumeTrue(isfile(dbPath), ... + ['Database file not found: ' dbPath]); + + db = did.implementations.sqlitedb(dbPath); + + % Step 3: Validate each branch against its JSON audit file + branchNames = metadata.branchNames; + if ischar(branchNames) + branchNames = {branchNames}; + end + + jsonBranchesDir = fullfile(artifactDir, 'jsonBranches'); + testCase.assumeTrue(isfolder(jsonBranchesDir), ... + ['jsonBranches directory not found in ' SourceType]); + + for i = 1:numel(branchNames) + branchName = branchNames{i}; + + % Load the expected branch JSON + branchJsonFile = fullfile(jsonBranchesDir, ['branch_' branchName '.json']); + testCase.assumeTrue(isfile(branchJsonFile), ... + ['Branch JSON file missing for ' branchName ' in ' SourceType]); + + fid = fopen(branchJsonFile, 'r'); + rawBranchJson = fread(fid, inf, '*char')'; + fclose(fid); + expectedDocs = jsondecode(rawBranchJson); + + % Handle single-doc case where jsondecode returns a struct not array + if isstruct(expectedDocs) && ~isscalar(expectedDocs) + % Already a struct array, leave as-is + elseif isstruct(expectedDocs) && isscalar(expectedDocs) + expectedDocs = {expectedDocs}; + elseif iscell(expectedDocs) + % Cell array from jsondecode, leave as-is + end + + % Get actual documents from the database branch + db.set_branch(branchName); + actualDocIds = db.get_doc_ids(branchName); + + % Verify document count matches + if iscell(expectedDocs) + expectedCount = numel(expectedDocs); + else + expectedCount = numel(expectedDocs); + end + testCase.verifyEqual(numel(actualDocIds), expectedCount, ... + ['Document count mismatch in branch ' branchName ' from ' SourceType ... + '. Expected ' num2str(expectedCount) ' but got ' num2str(numel(actualDocIds))]); + + % Verify each expected document exists in the database + for j = 1:expectedCount + if iscell(expectedDocs) + expectedDoc = expectedDocs{j}; + else + expectedDoc = expectedDocs(j); + end + + expectedId = expectedDoc.base.id; + + % Try to retrieve the document from the database + doc = db.get_docs(expectedId); + testCase.verifyNotEmpty(doc, ... + ['Document ' expectedId ' from ' SourceType ' not found in database branch ' branchName]); + + if ~isempty(doc) + actualProps = doc.document_properties; + + % Verify document class name matches + testCase.verifyEqual(actualProps.document_class.class_name, ... + expectedDoc.document_class.class_name, ... + ['Class name mismatch for doc ' expectedId ' in branch ' branchName ' from ' SourceType]); + + % Verify demo-type-specific value fields + demoFields = {'demoA', 'demoB', 'demoC'}; + for k = 1:numel(demoFields) + fieldName = demoFields{k}; + if isfield(expectedDoc, fieldName) + testCase.verifyTrue(isfield(actualProps, fieldName), ... + ['Missing field ' fieldName ' in doc ' expectedId ' from ' SourceType]); + if isfield(actualProps, fieldName) + testCase.verifyEqual(actualProps.(fieldName).value, ... + expectedDoc.(fieldName).value, ... + ['Value mismatch in ' fieldName ' for doc ' expectedId ' from ' SourceType]); + end + end + end + + % Verify depends_on if present + if isfield(expectedDoc, 'depends_on') + testCase.verifyTrue(isfield(actualProps, 'depends_on'), ... + ['Missing depends_on in doc ' expectedId ' from ' SourceType]); + end + end + end + end + + % Step 4: Verify branch document counts match metadata + if isfield(metadata, 'branchDocCounts') + for i = 1:numel(branchNames) + branchName = branchNames{i}; + if isfield(metadata.branchDocCounts, branchName) + expectedCount = metadata.branchDocCounts.(branchName); + db.set_branch(branchName); + actualDocIds = db.get_doc_ids(branchName); + testCase.verifyEqual(numel(actualDocIds), expectedCount, ... + ['Metadata doc count mismatch for branch ' branchName ' from ' SourceType]); + end + end + end + end + end +end diff --git a/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md b/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md new file mode 100644 index 0000000..5e4a7ff --- /dev/null +++ b/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md @@ -0,0 +1,42 @@ +# DID Symmetry Read Artifacts + +This folder contains MATLAB unit tests whose purpose is to read standard DID artifacts generated by both the Python DID test suite and the MATLAB test suite, comparing them against expected values. + +## Process overview: + +1. **Artifact Location**: The test suites will place their generated artifacts in the system's temporary directory (`tempdir()`). +2. **Directory Structure**: Inside the temporary directory, artifacts can be found in a specific nested folder structure: + `DID/symmetryTest/////` + + - ``: Either `matlabArtifacts` or `pythonArtifacts`. + - ``: The module/package location of the corresponding test (e.g., `database`). + - ``: The name of the test class (e.g., `buildDatabase`). + - ``: The specific name of the test method that was executed (e.g., `testBuildDatabaseArtifacts`). + +3. **Testing Goals**: The MATLAB tests located in this `+readArtifacts` package should define assertions that: + - Load the JSON branch audit files created by the target suite. + - Load the actual DID database (SQLite) produced by the target suite. + - Assert that the DID documents retrieved by MATLAB match the expected JSON structure dumped by the test suite. + - Run across both `pythonArtifacts` and `matlabArtifacts` using parameterized testing to ensure parity. + +## Example: + +Use MATLAB's `TestParameter` property block to dynamically pass the `SourceType` to your tests. + +```matlab +classdef buildDatabase < matlab.unittest.TestCase + properties (TestParameter) + SourceType = {'matlabArtifacts', 'pythonArtifacts'}; + end + + methods (Test) + function testBuildDatabaseArtifacts(testCase, SourceType) + artifactDir = fullfile(tempdir(), 'DID', 'symmetryTest', SourceType, ... + 'database', 'buildDatabase', 'testBuildDatabaseArtifacts'); + testCase.assumeTrue(isfolder(artifactDir), ... + ['Artifact directory from ' SourceType ' does not exist.']); + % ... load database, compare with JSON branch files ... + end + end +end +``` From 1966b9da8b2e81aba9aed9690fca760821eca2e5 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 00:02:56 +0000 Subject: [PATCH 2/5] Add did.util.databaseSummary and compareDatabaseSummary utilities Introduce reusable utilities for summarizing and comparing DID databases, then refactor symmetry tests to use them: - did.util.databaseSummary(db): produces a struct capturing the full database state (branches, hierarchy, documents sorted by ID with class names and properties), suitable for JSON serialization - did.util.compareDatabaseSummary(a, b): compares two summaries (structs, JSON files, or database objects) and returns a report with isEqual flag and detailed per-branch/per-document mismatch messages - makeArtifacts now exports summary.json via databaseSummary and includes a self-check round-trip comparison - readArtifacts now loads summary.json, re-summarizes the live database, and uses compareDatabaseSummary for the primary validation https://claude.ai/code/session_01TT3ycsjcvsKLnAp1WffRPe --- src/did/+did/+util/compareDatabaseSummary.m | 291 ++++++++++++++++++ src/did/+did/+util/databaseSummary.m | 100 ++++++ .../+makeArtifacts/+database/buildDatabase.m | 76 ++--- .../+symmetry/+makeArtifacts/INSTRUCTIONS.md | 6 +- .../+readArtifacts/+database/buildDatabase.m | 137 ++++----- .../+symmetry/+readArtifacts/INSTRUCTIONS.md | 24 +- 6 files changed, 494 insertions(+), 140 deletions(-) create mode 100644 src/did/+did/+util/compareDatabaseSummary.m create mode 100644 src/did/+did/+util/databaseSummary.m diff --git a/src/did/+did/+util/compareDatabaseSummary.m b/src/did/+did/+util/compareDatabaseSummary.m new file mode 100644 index 0000000..7b7ba42 --- /dev/null +++ b/src/did/+did/+util/compareDatabaseSummary.m @@ -0,0 +1,291 @@ +function report = compareDatabaseSummary(summaryA, summaryB) + % COMPAREDATABASESUMMARY - compare two database summaries and return a report + % + % REPORT = did.util.compareDatabaseSummary(SUMMARYA, SUMMARYB) + % + % Compares two database summary structs (as produced by did.util.databaseSummary + % or loaded from JSON) and returns a report struct describing any differences. + % + % SUMMARYA and SUMMARYB may be: + % - structs returned by did.util.databaseSummary() + % - file paths to JSON files containing serialized summaries + % - did.database objects (which will be summarized automatically) + % + % The returned REPORT struct contains: + % .isEqual - true if the two summaries match on all checked fields + % .messages - cell array of human-readable difference descriptions + % .branchComparison - struct with per-branch comparison details: + % ..inBoth - true if branch exists in both summaries + % ..docCountA - document count in summary A + % ..docCountB - document count in summary B + % ..docCountMatch - true if counts match + % ..missingInA - cell array of doc IDs in B but not A + % ..missingInB - cell array of doc IDs in A but not B + % ..valueMismatches - cell array of mismatch descriptions + % + % Example: + % summaryA = did.util.databaseSummary(dbA); + % summaryB = did.util.databaseSummary(dbB); + % report = did.util.compareDatabaseSummary(summaryA, summaryB); + % if ~report.isEqual + % disp(report.messages); + % end + % + % See also: did.util.databaseSummary + + % Convert inputs to summary structs if needed + summaryA = toSummaryStruct(summaryA); + summaryB = toSummaryStruct(summaryB); + + report = struct(); + report.isEqual = true; + report.messages = {}; + report.branchComparison = struct(); + + % Compare branch names + branchesA = summaryA.branchNames; + branchesB = summaryB.branchNames; + if ischar(branchesA), branchesA = {branchesA}; end + if ischar(branchesB), branchesB = {branchesB}; end + + allBranches = union(branchesA, branchesB); + + onlyInA = setdiff(branchesA, branchesB); + onlyInB = setdiff(branchesB, branchesA); + + if ~isempty(onlyInA) + report.isEqual = false; + for i = 1:numel(onlyInA) + report.messages{end+1} = ['Branch "' onlyInA{i} '" exists only in summary A.']; + end + end + if ~isempty(onlyInB) + report.isEqual = false; + for i = 1:numel(onlyInB) + report.messages{end+1} = ['Branch "' onlyInB{i} '" exists only in summary B.']; + end + end + + % Compare each branch that exists in both + for i = 1:numel(allBranches) + branchName = allBranches{i}; + safeName = matlab.lang.makeValidName(branchName); + + comp = struct(); + comp.inBoth = ismember(branchName, branchesA) && ismember(branchName, branchesB); + + if ~comp.inBoth + comp.docCountA = 0; + comp.docCountB = 0; + comp.docCountMatch = false; + comp.missingInA = {}; + comp.missingInB = {}; + comp.valueMismatches = {}; + report.branchComparison.(safeName) = comp; + continue; + end + + % Get branch data from each summary + branchA = summaryA.branches.(safeName); + branchB = summaryB.branches.(safeName); + + comp.docCountA = branchA.docCount; + comp.docCountB = branchB.docCount; + comp.docCountMatch = (branchA.docCount == branchB.docCount); + comp.missingInA = {}; + comp.missingInB = {}; + comp.valueMismatches = {}; + + if ~comp.docCountMatch + report.isEqual = false; + report.messages{end+1} = ['Branch "' branchName '": doc count mismatch (' ... + num2str(branchA.docCount) ' vs ' num2str(branchB.docCount) ').']; + end + + % Build lookup maps by document ID + docsA = branchA.documents; + docsB = branchB.documents; + mapA = buildDocMap(docsA); + mapB = buildDocMap(docsB); + + idsA = keys(mapA); + idsB = keys(mapB); + + comp.missingInA = setdiff(idsB, idsA); + comp.missingInB = setdiff(idsA, idsB); + + if ~isempty(comp.missingInA) + report.isEqual = false; + for j = 1:numel(comp.missingInA) + report.messages{end+1} = ['Branch "' branchName '": doc "' comp.missingInA{j} '" missing in summary A.']; + end + end + if ~isempty(comp.missingInB) + report.isEqual = false; + for j = 1:numel(comp.missingInB) + report.messages{end+1} = ['Branch "' branchName '": doc "' comp.missingInB{j} '" missing in summary B.']; + end + end + + % Compare documents present in both + commonIds = intersect(idsA, idsB); + for j = 1:numel(commonIds) + docId = commonIds{j}; + docA = mapA(docId); + docB = mapB(docId); + + % Compare class name + classA = getClassName(docA); + classB = getClassName(docB); + if ~strcmp(classA, classB) + report.isEqual = false; + msg = ['Branch "' branchName '", doc "' docId '": class name mismatch ("' classA '" vs "' classB '").']; + report.messages{end+1} = msg; + comp.valueMismatches{end+1} = msg; + end + + % Compare demo-type value fields + demoFields = {'demoA', 'demoB', 'demoC'}; + propsA = getProperties(docA); + propsB = getProperties(docB); + for k = 1:numel(demoFields) + fieldName = demoFields{k}; + hasA = isfield(propsA, fieldName); + hasB = isfield(propsB, fieldName); + if hasA && hasB + valA = propsA.(fieldName).value; + valB = propsB.(fieldName).value; + if ~isequal(valA, valB) + report.isEqual = false; + msg = ['Branch "' branchName '", doc "' docId '": ' fieldName '.value mismatch (' ... + num2str(valA) ' vs ' num2str(valB) ').']; + report.messages{end+1} = msg; + comp.valueMismatches{end+1} = msg; + end + elseif hasA ~= hasB + report.isEqual = false; + msg = ['Branch "' branchName '", doc "' docId '": field "' fieldName '" present in one summary but not the other.']; + report.messages{end+1} = msg; + comp.valueMismatches{end+1} = msg; + end + end + + % Compare depends_on + hasDepsA = isfield(propsA, 'depends_on'); + hasDepsB = isfield(propsB, 'depends_on'); + if hasDepsA && hasDepsB + depsA = propsA.depends_on; + depsB = propsB.depends_on; + if ~isequal(normalizeDeps(depsA), normalizeDeps(depsB)) + report.isEqual = false; + msg = ['Branch "' branchName '", doc "' docId '": depends_on mismatch.']; + report.messages{end+1} = msg; + comp.valueMismatches{end+1} = msg; + end + elseif hasDepsA ~= hasDepsB + report.isEqual = false; + msg = ['Branch "' branchName '", doc "' docId '": depends_on present in one summary but not the other.']; + report.messages{end+1} = msg; + comp.valueMismatches{end+1} = msg; + end + end + + report.branchComparison.(safeName) = comp; + end +end + +%% Local helper functions + +function s = toSummaryStruct(input) + % Convert various input types to a summary struct + if isstruct(input) + s = input; + elseif ischar(input) || isstring(input) + % Treat as file path to JSON + fid = fopen(input, 'r'); + if fid < 0 + error('DID:CompareSummary:FileNotFound', 'Could not open file: %s', input); + end + rawJson = fread(fid, inf, '*char')'; + fclose(fid); + s = jsondecode(rawJson); + elseif isa(input, 'did.database') + s = did.util.databaseSummary(input); + else + error('DID:CompareSummary:InvalidInput', ... + 'Input must be a summary struct, a JSON file path, or a did.database object.'); + end +end + +function m = buildDocMap(docs) + % Build a containers.Map from document ID to document struct + m = containers.Map('KeyType', 'char', 'ValueType', 'any'); + if iscell(docs) + for i = 1:numel(docs) + docStruct = docs{i}; + docId = getDocId(docStruct); + if ~isempty(docId) + m(docId) = docStruct; + end + end + elseif isstruct(docs) + for i = 1:numel(docs) + docId = getDocId(docs(i)); + if ~isempty(docId) + m(docId) = docs(i); + end + end + end +end + +function docId = getDocId(docStruct) + % Extract document ID from a summary doc struct + if isfield(docStruct, 'id') + docId = docStruct.id; + elseif isfield(docStruct, 'properties') && isfield(docStruct.properties, 'base') + docId = docStruct.properties.base.id; + elseif isfield(docStruct, 'base') + docId = docStruct.base.id; + else + docId = ''; + end +end + +function cn = getClassName(docStruct) + % Extract class name from a summary doc struct + if isfield(docStruct, 'className') + cn = docStruct.className; + elseif isfield(docStruct, 'properties') && isfield(docStruct.properties, 'document_class') + cn = docStruct.properties.document_class.class_name; + elseif isfield(docStruct, 'document_class') + cn = docStruct.document_class.class_name; + else + cn = ''; + end +end + +function props = getProperties(docStruct) + % Extract the document properties from a summary doc struct + if isfield(docStruct, 'properties') + props = docStruct.properties; + else + props = docStruct; + end +end + +function deps = normalizeDeps(depsInput) + % Normalize depends_on to a consistent sortable form for comparison + if isstruct(depsInput) + deps = struct(); + for i = 1:numel(depsInput) + if isfield(depsInput(i), 'name') && isfield(depsInput(i), 'value') + deps(i).name = depsInput(i).name; + deps(i).value = depsInput(i).value; + else + deps(i) = depsInput(i); + end + end + else + deps = depsInput; + end +end diff --git a/src/did/+did/+util/databaseSummary.m b/src/did/+did/+util/databaseSummary.m new file mode 100644 index 0000000..9ba7de7 --- /dev/null +++ b/src/did/+did/+util/databaseSummary.m @@ -0,0 +1,100 @@ +function summary = databaseSummary(db) + % DATABASESUMMARY - produce a struct summarizing a DID database and its branches + % + % SUMMARY = did.util.databaseSummary(DB) + % + % Returns a struct that captures the full state of a did.database object DB, + % suitable for serialization to JSON and cross-language symmetry testing. + % + % The returned SUMMARY struct contains: + % .dbId - the database identifier string + % .branchNames - cell array of all branch IDs in the database + % .branchHierarchy - struct mapping each branch name to its parent ('' for roots) + % .branches - struct with one field per branch, each containing: + % .docCount - number of documents in the branch + % .documents - cell array of document summary structs, each with: + % .id - document unique ID + % .className - document_class.class_name + % .properties - the full document_properties struct + % + % The summary is deterministic: documents within each branch are sorted by ID. + % + % Example: + % db = did.implementations.sqlitedb('mydb.sqlite'); + % summary = did.util.databaseSummary(db); + % jsonStr = did.datastructures.jsonencodenan(summary); + % + % See also: did.util.compareDatabaseSummary + + arguments + db did.database + end + + summary = struct(); + summary.dbId = db.dbid; + + % Gather all branch names + branchNames = db.all_branch_ids(); + if ischar(branchNames) + branchNames = {branchNames}; + end + summary.branchNames = branchNames; + + % Build branch hierarchy (each branch -> its parent) + branchHierarchy = struct(); + for i = 1:numel(branchNames) + branchName = branchNames{i}; + safeName = matlab.lang.makeValidName(branchName); + try + parentId = db.get_branch_parent(branchName); + catch + parentId = ''; + end + branchHierarchy.(safeName) = struct( ... + 'branchName', branchName, ... + 'parent', parentId ... + ); + end + summary.branchHierarchy = branchHierarchy; + + % Build per-branch document summaries + branches = struct(); + for i = 1:numel(branchNames) + branchName = branchNames{i}; + safeName = matlab.lang.makeValidName(branchName); + + docIds = db.get_doc_ids(branchName); + if ischar(docIds) + docIds = {docIds}; + end + if isempty(docIds) + docIds = {}; + end + + % Sort by ID for deterministic output + docIds = sort(docIds); + + docSummaries = cell(1, numel(docIds)); + for j = 1:numel(docIds) + doc = db.get_docs(docIds{j}); + props = doc.document_properties; + + docSummary = struct(); + docSummary.id = docIds{j}; + if isfield(props, 'document_class') && isfield(props.document_class, 'class_name') + docSummary.className = props.document_class.class_name; + else + docSummary.className = ''; + end + docSummary.properties = props; + docSummaries{j} = docSummary; + end + + branchInfo = struct(); + branchInfo.branchName = branchName; + branchInfo.docCount = numel(docIds); + branchInfo.documents = docSummaries; + branches.(safeName) = branchInfo; + end + summary.branches = branches; +end diff --git a/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m index edae243..1c50f45 100644 --- a/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m +++ b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m @@ -2,8 +2,8 @@ % BUILDDATABASE - Generate DID database artifacts for cross-language symmetry testing % % This test creates a small DID database with random documents (demoA, demoB, demoC) - % across multiple branches, then exports the database file and per-branch JSON - % audit files as artifacts for comparison with other DID implementations (e.g., Python). + % across multiple branches, then uses did.util.databaseSummary to export a JSON + % summary of each branch for comparison with other DID implementations (e.g., Python). properties (Constant) dbFilename = 'symmetry_test.sqlite' @@ -55,59 +55,37 @@ function testBuildDatabaseArtifacts(testCase) % └── branch_feature branchNames = {'branch_main', 'branch_dev', 'branch_feature'}; - % Create the root branch + % Create the root branch and add documents testCase.db.add_branch(branchNames{1}); - - % Generate initial documents for the root branch (small counts) [~, ~, rootDocs] = did.test.helper.documents.make_doc_tree([3 3 3]); testCase.db.add_docs(rootDocs); - % Create branch_dev as child of branch_main + % Create branch_dev as child of branch_main and add documents testCase.db.set_branch(branchNames{1}); testCase.db.add_branch(branchNames{2}); - - % Add some additional documents to branch_dev [~, ~, devDocs] = did.test.helper.documents.make_doc_tree([2 2 2]); testCase.db.add_docs(devDocs); - % Create branch_feature as child of branch_main + % Create branch_feature as child of branch_main and add documents testCase.db.set_branch(branchNames{1}); testCase.db.add_branch(branchNames{3}); - - % Add some additional documents to branch_feature [~, ~, featureDocs] = did.test.helper.documents.make_doc_tree([2 1 2]); testCase.db.add_docs(featureDocs); - % Step 3: Export per-branch JSON audit files + % Step 3: Generate summary using did.util.databaseSummary + summary = did.util.databaseSummary(testCase.db); + summary.dbFilename = testCase.dbFilename; + + % Step 4: Write summary JSON (one file per branch + overall summary) jsonBranchesDir = fullfile(artifactDir, 'jsonBranches'); mkdir(jsonBranchesDir); - % Build metadata structure - metadata = struct(); - metadata.branchNames = {branchNames{:}}; %#ok - metadata.branchHierarchy = struct(); - metadata.branchHierarchy.branch_main = {{'branch_dev', 'branch_feature'}}; - metadata.branchHierarchy.branch_dev = {{}}; - metadata.branchHierarchy.branch_feature = {{}}; - metadata.dbFilename = testCase.dbFilename; - branchDocCounts = struct(); - for i = 1:numel(branchNames) branchName = branchNames{i}; - testCase.db.set_branch(branchName); - - % Get all document IDs in this branch - docIds = testCase.db.get_doc_ids(branchName); + safeName = matlab.lang.makeValidName(branchName); + branchData = summary.branches.(safeName); - % Retrieve full documents - branchDocsData = cell(1, numel(docIds)); - for j = 1:numel(docIds) - doc = testCase.db.get_docs(docIds{j}); - branchDocsData{j} = doc.document_properties; - end - - % Write the branch JSON file - branchJsonStr = did.datastructures.jsonencodenan(branchDocsData); + branchJsonStr = did.datastructures.jsonencodenan(branchData); branchJsonFile = fullfile(jsonBranchesDir, ['branch_' branchName '.json']); fid = fopen(branchJsonFile, 'w'); testCase.verifyGreaterThan(fid, 0, ... @@ -116,31 +94,33 @@ function testBuildDatabaseArtifacts(testCase) fprintf(fid, '%s', branchJsonStr); fclose(fid); end - - % Track document counts for metadata - branchDocCounts.(branchName) = numel(docIds); end - % Step 4: Write metadata.json - metadata.branchDocCounts = branchDocCounts; - metadataJsonStr = did.datastructures.jsonencodenan(metadata); - fid = fopen(fullfile(artifactDir, 'metadata.json'), 'w'); - testCase.verifyGreaterThan(fid, 0, 'Could not create metadata.json'); + % Write the full summary JSON + summaryJsonStr = did.datastructures.jsonencodenan(summary); + fid = fopen(fullfile(artifactDir, 'summary.json'), 'w'); + testCase.verifyGreaterThan(fid, 0, 'Could not create summary.json'); if fid > 0 - fprintf(fid, '%s', metadataJsonStr); + fprintf(fid, '%s', summaryJsonStr); fclose(fid); end - % Verify artifacts were created + % Step 5: Verify artifacts were created testCase.verifyTrue(isfile(dbPath), 'Database file was not created.'); - testCase.verifyTrue(isfolder(jsonBranchesDir), 'jsonBranches directory was not created.'); - testCase.verifyTrue(isfile(fullfile(artifactDir, 'metadata.json')), 'metadata.json was not created.'); - + testCase.verifyTrue(isfile(fullfile(artifactDir, 'summary.json')), ... + 'summary.json was not created.'); for i = 1:numel(branchNames) branchFile = fullfile(jsonBranchesDir, ['branch_' branchNames{i} '.json']); testCase.verifyTrue(isfile(branchFile), ... ['Branch JSON file missing for ' branchNames{i}]); end + + % Step 6: Self-check — re-summarize and compare to verify consistency + summaryCheck = did.util.databaseSummary(testCase.db); + summaryCheck.dbFilename = testCase.dbFilename; + selfReport = did.util.compareDatabaseSummary(summary, summaryCheck); + testCase.verifyTrue(selfReport.isEqual, ... + ['Self-check failed: ' strjoin(selfReport.messages, '; ')]); end end end diff --git a/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md b/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md index 990466a..f716eb1 100644 --- a/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md +++ b/tests/+did/+symmetry/+makeArtifacts/INSTRUCTIONS.md @@ -16,11 +16,13 @@ This folder contains MATLAB unit tests whose purpose is to generate standard DID 4. **Artifact Contents**: Each test should produce: - The SQLite database file itself. - - One JSON audit file per branch, containing all documents in that branch. Each file should be named `branch_.json` and placed in a `jsonBranches` subdirectory. - - A `metadata.json` file describing the database structure (branch hierarchy, document counts, etc.). + - A `summary.json` file produced by `did.util.databaseSummary()`, which captures the full database state (branch hierarchy, document IDs, class names, property values). + - One JSON file per branch in a `jsonBranches` subdirectory (named `branch_.json`), containing the branch slice of the summary. 5. **Deterministic Seeds**: Tests should use `rng('default')` so that the random document/branch generation is reproducible across runs. +6. **Utility Functions**: Use `did.util.databaseSummary(db)` to generate the summary struct from a database, and `did.util.compareDatabaseSummary(summaryA, summaryB)` to compare two summaries. These utilities handle the serialization format and comparison logic so that tests remain concise. + ## Example: For a test class `buildDatabase.m` in `tests/+did/+symmetry/+makeArtifacts/+database` with a test method `testBuildDatabaseArtifacts`, the artifacts should be saved to: `[tempdir(), 'DID/symmetryTest/matlabArtifacts/database/buildDatabase/testBuildDatabaseArtifacts/']` diff --git a/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m index 2ff9fd1..0bfde7e 100644 --- a/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m +++ b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m @@ -2,8 +2,8 @@ % BUILDDATABASE - Read and validate DID database artifacts for cross-language symmetry testing % % This test reads database artifacts generated by either the MATLAB or Python - % DID test suite and validates that the documents can be correctly loaded and - % that their contents match the JSON audit files. + % DID test suite, re-summarizes the database using did.util.databaseSummary, + % and compares the result against the saved summary using did.util.compareDatabaseSummary. properties (TestParameter) % Define the two potential sources of artifacts @@ -20,30 +20,38 @@ function testBuildDatabaseArtifacts(testCase, SourceType) testCase.assumeTrue(isfolder(artifactDir), ... ['Artifact directory from ' SourceType ' does not exist.']); - % Step 1: Load and validate metadata.json - metadataFile = fullfile(artifactDir, 'metadata.json'); - testCase.assumeTrue(isfile(metadataFile), ... - ['metadata.json not found in ' SourceType ' artifact directory.']); + % Step 1: Load the saved summary + summaryFile = fullfile(artifactDir, 'summary.json'); + testCase.assumeTrue(isfile(summaryFile), ... + ['summary.json not found in ' SourceType ' artifact directory.']); - fid = fopen(metadataFile, 'r'); + fid = fopen(summaryFile, 'r'); rawJson = fread(fid, inf, '*char')'; fclose(fid); - metadata = jsondecode(rawJson); + savedSummary = jsondecode(rawJson); - testCase.verifyTrue(isfield(metadata, 'branchNames'), ... - 'metadata.json missing branchNames field.'); - testCase.verifyTrue(isfield(metadata, 'dbFilename'), ... - 'metadata.json missing dbFilename field.'); + testCase.verifyTrue(isfield(savedSummary, 'branchNames'), ... + 'summary.json missing branchNames field.'); + testCase.verifyTrue(isfield(savedSummary, 'dbFilename'), ... + 'summary.json missing dbFilename field.'); - % Step 2: Open the DID database - dbPath = fullfile(artifactDir, metadata.dbFilename); + % Step 2: Open the DID database and produce a live summary + dbPath = fullfile(artifactDir, savedSummary.dbFilename); testCase.assumeTrue(isfile(dbPath), ... ['Database file not found: ' dbPath]); db = did.implementations.sqlitedb(dbPath); + liveSummary = did.util.databaseSummary(db); - % Step 3: Validate each branch against its JSON audit file - branchNames = metadata.branchNames; + % Step 3: Compare the saved summary against the live database summary + report = did.util.compareDatabaseSummary(savedSummary, liveSummary); + + testCase.verifyTrue(report.isEqual, ... + ['Database summary mismatch for ' SourceType ': ' ... + strjoin(report.messages, '; ')]); + + % Step 4: Also verify per-branch JSON files match the live database + branchNames = savedSummary.branchNames; if ischar(branchNames) branchNames = {branchNames}; end @@ -54,8 +62,8 @@ function testBuildDatabaseArtifacts(testCase, SourceType) for i = 1:numel(branchNames) branchName = branchNames{i}; + safeName = matlab.lang.makeValidName(branchName); - % Load the expected branch JSON branchJsonFile = fullfile(jsonBranchesDir, ['branch_' branchName '.json']); testCase.assumeTrue(isfile(branchJsonFile), ... ['Branch JSON file missing for ' branchName ' in ' SourceType]); @@ -63,88 +71,47 @@ function testBuildDatabaseArtifacts(testCase, SourceType) fid = fopen(branchJsonFile, 'r'); rawBranchJson = fread(fid, inf, '*char')'; fclose(fid); - expectedDocs = jsondecode(rawBranchJson); - - % Handle single-doc case where jsondecode returns a struct not array - if isstruct(expectedDocs) && ~isscalar(expectedDocs) - % Already a struct array, leave as-is - elseif isstruct(expectedDocs) && isscalar(expectedDocs) - expectedDocs = {expectedDocs}; - elseif iscell(expectedDocs) - % Cell array from jsondecode, leave as-is - end + savedBranch = jsondecode(rawBranchJson); - % Get actual documents from the database branch + % Verify document count matches the live database db.set_branch(branchName); actualDocIds = db.get_doc_ids(branchName); + testCase.verifyEqual(numel(actualDocIds), savedBranch.docCount, ... + ['Document count mismatch in branch ' branchName ' from ' SourceType]); - % Verify document count matches - if iscell(expectedDocs) - expectedCount = numel(expectedDocs); - else - expectedCount = numel(expectedDocs); + % Verify each saved document can be found in the live database + savedDocs = savedBranch.documents; + if isstruct(savedDocs) && isscalar(savedDocs) + savedDocs = {savedDocs}; end - testCase.verifyEqual(numel(actualDocIds), expectedCount, ... - ['Document count mismatch in branch ' branchName ' from ' SourceType ... - '. Expected ' num2str(expectedCount) ' but got ' num2str(numel(actualDocIds))]); - - % Verify each expected document exists in the database - for j = 1:expectedCount - if iscell(expectedDocs) - expectedDoc = expectedDocs{j}; - else - expectedDoc = expectedDocs(j); + if isstruct(savedDocs) && ~isscalar(savedDocs) + % struct array from jsondecode — convert to cell + tmp = cell(1, numel(savedDocs)); + for k = 1:numel(savedDocs) + tmp{k} = savedDocs(k); end + savedDocs = tmp; + end - expectedId = expectedDoc.base.id; + for j = 1:numel(savedDocs) + if iscell(savedDocs) + savedDoc = savedDocs{j}; + else + savedDoc = savedDocs(j); + end - % Try to retrieve the document from the database + expectedId = savedDoc.id; doc = db.get_docs(expectedId); testCase.verifyNotEmpty(doc, ... - ['Document ' expectedId ' from ' SourceType ' not found in database branch ' branchName]); + ['Document ' expectedId ' from ' SourceType ... + ' not found in database branch ' branchName]); if ~isempty(doc) actualProps = doc.document_properties; - - % Verify document class name matches testCase.verifyEqual(actualProps.document_class.class_name, ... - expectedDoc.document_class.class_name, ... - ['Class name mismatch for doc ' expectedId ' in branch ' branchName ' from ' SourceType]); - - % Verify demo-type-specific value fields - demoFields = {'demoA', 'demoB', 'demoC'}; - for k = 1:numel(demoFields) - fieldName = demoFields{k}; - if isfield(expectedDoc, fieldName) - testCase.verifyTrue(isfield(actualProps, fieldName), ... - ['Missing field ' fieldName ' in doc ' expectedId ' from ' SourceType]); - if isfield(actualProps, fieldName) - testCase.verifyEqual(actualProps.(fieldName).value, ... - expectedDoc.(fieldName).value, ... - ['Value mismatch in ' fieldName ' for doc ' expectedId ' from ' SourceType]); - end - end - end - - % Verify depends_on if present - if isfield(expectedDoc, 'depends_on') - testCase.verifyTrue(isfield(actualProps, 'depends_on'), ... - ['Missing depends_on in doc ' expectedId ' from ' SourceType]); - end - end - end - end - - % Step 4: Verify branch document counts match metadata - if isfield(metadata, 'branchDocCounts') - for i = 1:numel(branchNames) - branchName = branchNames{i}; - if isfield(metadata.branchDocCounts, branchName) - expectedCount = metadata.branchDocCounts.(branchName); - db.set_branch(branchName); - actualDocIds = db.get_doc_ids(branchName); - testCase.verifyEqual(numel(actualDocIds), expectedCount, ... - ['Metadata doc count mismatch for branch ' branchName ' from ' SourceType]); + savedDoc.className, ... + ['Class name mismatch for doc ' expectedId ... + ' in branch ' branchName ' from ' SourceType]); end end end diff --git a/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md b/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md index 5e4a7ff..76a878f 100644 --- a/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md +++ b/tests/+did/+symmetry/+readArtifacts/INSTRUCTIONS.md @@ -13,12 +13,16 @@ This folder contains MATLAB unit tests whose purpose is to read standard DID art - ``: The name of the test class (e.g., `buildDatabase`). - ``: The specific name of the test method that was executed (e.g., `testBuildDatabaseArtifacts`). -3. **Testing Goals**: The MATLAB tests located in this `+readArtifacts` package should define assertions that: - - Load the JSON branch audit files created by the target suite. - - Load the actual DID database (SQLite) produced by the target suite. - - Assert that the DID documents retrieved by MATLAB match the expected JSON structure dumped by the test suite. +3. **Testing Goals**: The MATLAB tests located in this `+readArtifacts` package should: + - Load the `summary.json` file saved by the makeArtifacts test (or its Python equivalent). + - Open the actual DID database (SQLite) from the artifact directory. + - Re-summarize the live database using `did.util.databaseSummary(db)`. + - Compare the saved summary against the live summary using `did.util.compareDatabaseSummary()`. + - Additionally verify per-branch JSON files against the live database. - Run across both `pythonArtifacts` and `matlabArtifacts` using parameterized testing to ensure parity. +4. **Utility Functions**: Use `did.util.databaseSummary(db)` to produce a summary from a live database, and `did.util.compareDatabaseSummary(savedSummary, liveSummary)` to compare. The comparison returns a report with `.isEqual` and `.messages` fields. + ## Example: Use MATLAB's `TestParameter` property block to dynamically pass the `SourceType` to your tests. @@ -35,7 +39,17 @@ classdef buildDatabase < matlab.unittest.TestCase 'database', 'buildDatabase', 'testBuildDatabaseArtifacts'); testCase.assumeTrue(isfolder(artifactDir), ... ['Artifact directory from ' SourceType ' does not exist.']); - % ... load database, compare with JSON branch files ... + + % Load saved summary + savedSummary = jsondecode(fileread(fullfile(artifactDir, 'summary.json'))); + + % Open database and produce live summary + db = did.implementations.sqlitedb(fullfile(artifactDir, savedSummary.dbFilename)); + liveSummary = did.util.databaseSummary(db); + + % Compare + report = did.util.compareDatabaseSummary(savedSummary, liveSummary); + testCase.verifyTrue(report.isEqual, strjoin(report.messages, '; ')); end end end From 17a798b157fa0fb7e8a1055c5c127705778919ee Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 00:07:00 +0000 Subject: [PATCH 3/5] Align compareDatabaseSummary with NDI's compareSessionSummary pattern Update compareDatabaseSummary to return a cell array of message strings (empty = equal) instead of a struct with .isEqual, matching the pattern used by ndi.util.compareSessionSummary on the NDI-matlab side. Also add arguments block with excludeFields option and use sprintf for messages. Update symmetry tests to use verifyEmpty(report) accordingly. https://claude.ai/code/session_01TT3ycsjcvsKLnAp1WffRPe --- src/did/+did/+util/compareDatabaseSummary.m | 167 +++++++----------- .../+makeArtifacts/+database/buildDatabase.m | 4 +- .../+readArtifacts/+database/buildDatabase.m | 4 +- 3 files changed, 71 insertions(+), 104 deletions(-) diff --git a/src/did/+did/+util/compareDatabaseSummary.m b/src/did/+did/+util/compareDatabaseSummary.m index 7b7ba42..f02e85c 100644 --- a/src/did/+did/+util/compareDatabaseSummary.m +++ b/src/did/+did/+util/compareDatabaseSummary.m @@ -1,46 +1,43 @@ -function report = compareDatabaseSummary(summaryA, summaryB) +function report = compareDatabaseSummary(summaryA, summaryB, options) % COMPAREDATABASESUMMARY - compare two database summaries and return a report % - % REPORT = did.util.compareDatabaseSummary(SUMMARYA, SUMMARYB) + % REPORT = did.util.compareDatabaseSummary(SUMMARYA, SUMMARYB, ...) % % Compares two database summary structs (as produced by did.util.databaseSummary - % or loaded from JSON) and returns a report struct describing any differences. + % or loaded from JSON) and returns a cell array of character arrays describing + % any differences found. If no differences are found, returns an empty cell + % array {}. % % SUMMARYA and SUMMARYB may be: % - structs returned by did.util.databaseSummary() % - file paths to JSON files containing serialized summaries % - did.database objects (which will be summarized automatically) % - % The returned REPORT struct contains: - % .isEqual - true if the two summaries match on all checked fields - % .messages - cell array of human-readable difference descriptions - % .branchComparison - struct with per-branch comparison details: - % ..inBoth - true if branch exists in both summaries - % ..docCountA - document count in summary A - % ..docCountB - document count in summary B - % ..docCountMatch - true if counts match - % ..missingInA - cell array of doc IDs in B but not A - % ..missingInB - cell array of doc IDs in A but not B - % ..valueMismatches - cell array of mismatch descriptions + % This function accepts name-value pair arguments: + % 'excludeFields' - A cell array of top-level field names to skip + % when comparing (e.g., {'dbId'} to ignore database IDs). % % Example: % summaryA = did.util.databaseSummary(dbA); % summaryB = did.util.databaseSummary(dbB); % report = did.util.compareDatabaseSummary(summaryA, summaryB); - % if ~report.isEqual - % disp(report.messages); + % if ~isempty(report) + % cellfun(@disp, report); % end % % See also: did.util.databaseSummary + arguments + summaryA + summaryB + options.excludeFields (1,:) cell = {} + end + % Convert inputs to summary structs if needed summaryA = toSummaryStruct(summaryA); summaryB = toSummaryStruct(summaryB); - report = struct(); - report.isEqual = true; - report.messages = {}; - report.branchComparison = struct(); + report = {}; % Compare branch names branchesA = summaryA.branchNames; @@ -48,83 +45,67 @@ if ischar(branchesA), branchesA = {branchesA}; end if ischar(branchesB), branchesB = {branchesB}; end - allBranches = union(branchesA, branchesB); - onlyInA = setdiff(branchesA, branchesB); onlyInB = setdiff(branchesB, branchesA); - if ~isempty(onlyInA) - report.isEqual = false; - for i = 1:numel(onlyInA) - report.messages{end+1} = ['Branch "' onlyInA{i} '" exists only in summary A.']; - end + for i = 1:numel(onlyInA) + report{end+1} = sprintf('Branch "%s" exists only in summary A.', onlyInA{i}); %#ok<*AGROW> end - if ~isempty(onlyInB) - report.isEqual = false; - for i = 1:numel(onlyInB) - report.messages{end+1} = ['Branch "' onlyInB{i} '" exists only in summary B.']; + for i = 1:numel(onlyInB) + report{end+1} = sprintf('Branch "%s" exists only in summary B.', onlyInB{i}); + end + + % Compare branch hierarchy if present in both + if isfield(summaryA, 'branchHierarchy') && isfield(summaryB, 'branchHierarchy') ... + && ~ismember('branchHierarchy', options.excludeFields) + commonBranches = intersect(branchesA, branchesB); + for i = 1:numel(commonBranches) + branchName = commonBranches{i}; + safeName = matlab.lang.makeValidName(branchName); + if isfield(summaryA.branchHierarchy, safeName) && isfield(summaryB.branchHierarchy, safeName) + parentA = summaryA.branchHierarchy.(safeName).parent; + parentB = summaryB.branchHierarchy.(safeName).parent; + if ~strcmp(parentA, parentB) + report{end+1} = sprintf('Branch "%s": parent mismatch ("%s" vs "%s").', branchName, parentA, parentB); + end + end end end - % Compare each branch that exists in both - for i = 1:numel(allBranches) - branchName = allBranches{i}; + % Compare each branch's documents + commonBranches = intersect(branchesA, branchesB); + for i = 1:numel(commonBranches) + branchName = commonBranches{i}; safeName = matlab.lang.makeValidName(branchName); - comp = struct(); - comp.inBoth = ismember(branchName, branchesA) && ismember(branchName, branchesB); - - if ~comp.inBoth - comp.docCountA = 0; - comp.docCountB = 0; - comp.docCountMatch = false; - comp.missingInA = {}; - comp.missingInB = {}; - comp.valueMismatches = {}; - report.branchComparison.(safeName) = comp; + if ~isfield(summaryA.branches, safeName) || ~isfield(summaryB.branches, safeName) continue; end - % Get branch data from each summary branchA = summaryA.branches.(safeName); branchB = summaryB.branches.(safeName); - comp.docCountA = branchA.docCount; - comp.docCountB = branchB.docCount; - comp.docCountMatch = (branchA.docCount == branchB.docCount); - comp.missingInA = {}; - comp.missingInB = {}; - comp.valueMismatches = {}; - - if ~comp.docCountMatch - report.isEqual = false; - report.messages{end+1} = ['Branch "' branchName '": doc count mismatch (' ... - num2str(branchA.docCount) ' vs ' num2str(branchB.docCount) ').']; + % Compare document counts + if branchA.docCount ~= branchB.docCount + report{end+1} = sprintf('Branch "%s": doc count mismatch (%d vs %d).', ... + branchName, branchA.docCount, branchB.docCount); end % Build lookup maps by document ID - docsA = branchA.documents; - docsB = branchB.documents; - mapA = buildDocMap(docsA); - mapB = buildDocMap(docsB); + mapA = buildDocMap(branchA.documents); + mapB = buildDocMap(branchB.documents); idsA = keys(mapA); idsB = keys(mapB); - comp.missingInA = setdiff(idsB, idsA); - comp.missingInB = setdiff(idsA, idsB); + missingInA = setdiff(idsB, idsA); + missingInB = setdiff(idsA, idsB); - if ~isempty(comp.missingInA) - report.isEqual = false; - for j = 1:numel(comp.missingInA) - report.messages{end+1} = ['Branch "' branchName '": doc "' comp.missingInA{j} '" missing in summary A.']; - end + for j = 1:numel(missingInA) + report{end+1} = sprintf('Branch "%s": doc "%s" missing in summary A.', branchName, missingInA{j}); end - if ~isempty(comp.missingInB) - report.isEqual = false; - for j = 1:numel(comp.missingInB) - report.messages{end+1} = ['Branch "' branchName '": doc "' comp.missingInB{j} '" missing in summary B.']; - end + for j = 1:numel(missingInB) + report{end+1} = sprintf('Branch "%s": doc "%s" missing in summary B.', branchName, missingInB{j}); end % Compare documents present in both @@ -138,16 +119,14 @@ classA = getClassName(docA); classB = getClassName(docB); if ~strcmp(classA, classB) - report.isEqual = false; - msg = ['Branch "' branchName '", doc "' docId '": class name mismatch ("' classA '" vs "' classB '").']; - report.messages{end+1} = msg; - comp.valueMismatches{end+1} = msg; + report{end+1} = sprintf('Branch "%s", doc "%s": class name mismatch ("%s" vs "%s").', ... + branchName, docId, classA, classB); end % Compare demo-type value fields - demoFields = {'demoA', 'demoB', 'demoC'}; propsA = getProperties(docA); propsB = getProperties(docB); + demoFields = {'demoA', 'demoB', 'demoC'}; for k = 1:numel(demoFields) fieldName = demoFields{k}; hasA = isfield(propsA, fieldName); @@ -156,17 +135,12 @@ valA = propsA.(fieldName).value; valB = propsB.(fieldName).value; if ~isequal(valA, valB) - report.isEqual = false; - msg = ['Branch "' branchName '", doc "' docId '": ' fieldName '.value mismatch (' ... - num2str(valA) ' vs ' num2str(valB) ').']; - report.messages{end+1} = msg; - comp.valueMismatches{end+1} = msg; + report{end+1} = sprintf('Branch "%s", doc "%s": %s.value mismatch (%s vs %s).', ... + branchName, docId, fieldName, num2str(valA), num2str(valB)); end elseif hasA ~= hasB - report.isEqual = false; - msg = ['Branch "' branchName '", doc "' docId '": field "' fieldName '" present in one summary but not the other.']; - report.messages{end+1} = msg; - comp.valueMismatches{end+1} = msg; + report{end+1} = sprintf('Branch "%s", doc "%s": field "%s" present in one summary but not the other.', ... + branchName, docId, fieldName); end end @@ -174,23 +148,16 @@ hasDepsA = isfield(propsA, 'depends_on'); hasDepsB = isfield(propsB, 'depends_on'); if hasDepsA && hasDepsB - depsA = propsA.depends_on; - depsB = propsB.depends_on; - if ~isequal(normalizeDeps(depsA), normalizeDeps(depsB)) - report.isEqual = false; - msg = ['Branch "' branchName '", doc "' docId '": depends_on mismatch.']; - report.messages{end+1} = msg; - comp.valueMismatches{end+1} = msg; + depsA = normalizeDeps(propsA.depends_on); + depsB = normalizeDeps(propsB.depends_on); + if ~isequal(depsA, depsB) + report{end+1} = sprintf('Branch "%s", doc "%s": depends_on mismatch.', branchName, docId); end elseif hasDepsA ~= hasDepsB - report.isEqual = false; - msg = ['Branch "' branchName '", doc "' docId '": depends_on present in one summary but not the other.']; - report.messages{end+1} = msg; - comp.valueMismatches{end+1} = msg; + report{end+1} = sprintf('Branch "%s", doc "%s": depends_on present in one summary but not the other.', ... + branchName, docId); end end - - report.branchComparison.(safeName) = comp; end end diff --git a/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m index 1c50f45..5cf80b0 100644 --- a/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m +++ b/tests/+did/+symmetry/+makeArtifacts/+database/buildDatabase.m @@ -119,8 +119,8 @@ function testBuildDatabaseArtifacts(testCase) summaryCheck = did.util.databaseSummary(testCase.db); summaryCheck.dbFilename = testCase.dbFilename; selfReport = did.util.compareDatabaseSummary(summary, summaryCheck); - testCase.verifyTrue(selfReport.isEqual, ... - ['Self-check failed: ' strjoin(selfReport.messages, '; ')]); + testCase.verifyEmpty(selfReport, ... + ['Self-check failed: ' strjoin(selfReport, '; ')]); end end end diff --git a/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m index 0bfde7e..c119909 100644 --- a/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m +++ b/tests/+did/+symmetry/+readArtifacts/+database/buildDatabase.m @@ -46,9 +46,9 @@ function testBuildDatabaseArtifacts(testCase, SourceType) % Step 3: Compare the saved summary against the live database summary report = did.util.compareDatabaseSummary(savedSummary, liveSummary); - testCase.verifyTrue(report.isEqual, ... + testCase.verifyEmpty(report, ... ['Database summary mismatch for ' SourceType ': ' ... - strjoin(report.messages, '; ')]); + strjoin(report, '; ')]); % Step 4: Also verify per-branch JSON files match the live database branchNames = savedSummary.branchNames; From a0ddf51bfa869f78c03e7c8bb88123fe3786e57a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 00:11:23 +0000 Subject: [PATCH 4/5] Add GitHub Actions workflow for symmetry tests Runs makeArtifacts tests first (generating the database and JSON audit artifacts in tempdir), then readArtifacts tests (validating the artifacts against the live database). Triggered on pushes to main, PRs to main, or manually via workflow_dispatch. https://claude.ai/code/session_01TT3ycsjcvsKLnAp1WffRPe --- .github/workflows/test-symmetry.yml | 63 +++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 .github/workflows/test-symmetry.yml diff --git a/.github/workflows/test-symmetry.yml b/.github/workflows/test-symmetry.yml new file mode 100644 index 0000000..9305ac2 --- /dev/null +++ b/.github/workflows/test-symmetry.yml @@ -0,0 +1,63 @@ +name: Test symmetry + +on: + push: + branches: main + + pull_request: + branches: main + + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test-symmetry: + name: Run symmetry tests + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up MATLAB + uses: matlab-actions/setup-matlab@v2 + with: + release: latest + cache: true + products: Statistics_and_Machine_Learning_Toolbox + + - name: Run makeArtifacts tests + uses: matlab-actions/run-command@v2 + with: + command: | + addpath(genpath("src")); + addpath(genpath("tests")); + import matlab.unittest.TestRunner; + import matlab.unittest.TestSuite; + import matlab.unittest.plugins.DiagnosticsOutputPlugin; + runner = TestRunner.withTextOutput; + makeSuite = TestSuite.fromPackage("did.symmetry.makeArtifacts", "IncludingSubpackages", true); + makeResults = runner.run(makeSuite); + disp(table(makeResults)); + assert(all([makeResults.Passed]), "makeArtifacts tests failed"); + + - name: Run readArtifacts tests + uses: matlab-actions/run-command@v2 + with: + command: | + addpath(genpath("src")); + addpath(genpath("tests")); + import matlab.unittest.TestRunner; + import matlab.unittest.TestSuite; + runner = TestRunner.withTextOutput; + readSuite = TestSuite.fromPackage("did.symmetry.readArtifacts", "IncludingSubpackages", true); + readResults = runner.run(readSuite); + disp(table(readResults)); + nSkipped = sum([readResults.Incomplete]); + nPassed = sum([readResults.Passed]); + nFailed = sum([readResults.Failed]); + fprintf("Results: %d passed, %d failed, %d skipped\n", nPassed, nFailed, nSkipped); + assert(nFailed == 0, "readArtifacts tests failed"); From 294982bb9e23ac35f6bc66a27b8f75adcfece998 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 00:19:43 +0000 Subject: [PATCH 5/5] Add MatBox and mksqlite installation to symmetry test workflow The sqlitedb implementation requires the mksqlite package. Add steps to install MatBox first, then use matbox.installRequirements to install mksqlite and other dependencies from requirements.txt. https://claude.ai/code/session_01TT3ycsjcvsKLnAp1WffRPe --- .github/workflows/test-symmetry.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/test-symmetry.yml b/.github/workflows/test-symmetry.yml index 9305ac2..91bc5a8 100644 --- a/.github/workflows/test-symmetry.yml +++ b/.github/workflows/test-symmetry.yml @@ -29,6 +29,17 @@ jobs: cache: true products: Statistics_and_Machine_Learning_Toolbox + - name: Install MatBox + uses: ehennestad/matbox-actions/install-matbox@v1 + + - name: Install dependencies (mksqlite etc.) + uses: matlab-actions/run-command@v2 + with: + command: | + addpath(genpath("src")); + addpath(genpath("tools")); + matbox.installRequirements(didtools.projectdir()); + - name: Run makeArtifacts tests uses: matlab-actions/run-command@v2 with: