Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions datastore/gcs/extensions/datastores/_lib/gcs_cache_sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ const SYNC_STATE_FILE = ".datastore-sync-state.json";
* without it, `swamp datastore sync --push` would walk `_catalog.db*`
* into `toPush`, SQLite would rewrite the WAL mid-upload, and the
* push would fail on `_catalog.db-wal`.
* - basename `.lock` at any depth — per-target FileLock files written
* by the data tier's lock subsystem (e.g.
* `data/<kind>/<type>/<id>/.lock`). The lock subsystem creates and
* deletes these directly via GCS Insert/Delete; they must not flow
* through cache sync because (a) the bucket listing in
* `discoverIndexFromBucket` would otherwise capture transient
* `.lock` files into the synthesized index, leaving the index
* referencing objects the lock subsystem deletes on release, and
* (b) a fresh reader hydrating from that stale index would 404 on
* the missing `.lock` and abort `datastore setup`.
*
* Exported for unit tests; not part of the public extension API.
*/
Expand All @@ -95,6 +105,7 @@ export function isInternalCacheFile(rel: string): boolean {
return true;
}
const base = rel.split("/").pop() ?? "";
if (base === ".lock") return true;
return base === "_catalog.db" || base.startsWith("_catalog.db-");
}

Expand Down
18 changes: 18 additions & 0 deletions datastore/gcs/extensions/datastores/_lib/gcs_cache_sync_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,24 @@ Deno.test("isInternalCacheFile: excludes the sync-state sidecar", () => {
assertEquals(isInternalCacheFile("regular/file.yaml"), false);
});

Deno.test("isInternalCacheFile: excludes per-target FileLock files at any depth", () => {
// Data tier writes per-target locks at `data/<kind>/<type>/<id>/.lock`.
// Without this exclusion, `discoverIndexFromBucket` captures transient
// `.lock` files into the synthesized index, the lock subsystem deletes
// them on release, and a fresh reader 404s on the missing object during
// setup hydration.
assertEquals(
isInternalCacheFile(
"data/command/shell/c19f88eb-de4f-4227-ade7-8162aec3d6a6/.lock",
),
true,
);
assertEquals(isInternalCacheFile("data/@m/.lock"), true);
assertEquals(isInternalCacheFile(".datastore.lock"), true);
assertEquals(isInternalCacheFile("data/@m/.locked.yaml"), false);
assertEquals(isInternalCacheFile("data/@m/lock"), false);
});

// -- (2) post-verified pullChanged short-circuits with zero index GETs ----

Deno.test("pullChanged: post-verified second call hits fast path with zero index GETs", async () => {
Expand Down
2 changes: 1 addition & 1 deletion datastore/gcs/manifest.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
manifestVersion: 1
name: "@swamp/gcs-datastore"
version: "2026.05.04.4"
version: "2026.05.05.1"
description: |
Store data in a Google Cloud Storage bucket with local cache synchronization.
Provides distributed locking via GCS generation-based preconditions and
Expand Down
19 changes: 17 additions & 2 deletions datastore/s3/extensions/datastores/_lib/s3_cache_sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,23 @@ const SYNC_STATE_FILE = ".datastore-sync-state.json";
* without it, `swamp datastore sync --push` would walk `_catalog.db*`
* into `toPush`, SQLite would rewrite the WAL mid-upload, and the
* push would fail on `_catalog.db-wal`.
* - basename `.lock` at any depth — per-target FileLock files written
* by the data tier's lock subsystem (e.g.
* `data/<kind>/<type>/<id>/.lock`). The lock subsystem creates and
* deletes these directly via S3 PutObject/DeleteObject; they must
* not flow through cache sync because (a) the bucket listing in
* `discoverIndexFromBucket` would otherwise capture transient
* `.lock` files into the synthesized index, leaving the index
* referencing objects the lock subsystem deletes on release, and
* (b) a fresh reader hydrating from that stale index would 404 on
* the missing `.lock` and abort `datastore setup`. Manifests in CI
* as the reader's `datastore sync --pull` reporting "Current
* datastore type: filesystem" because setup fails to persist the
* datastore config to `.swamp.yaml`.
*
* Uses basename matching for the catalog pattern so the filter is
* robust to any future change in the data tier subdirectory name.
* Uses basename matching for the catalog and `.lock` patterns so the
* filter is robust to any future change in the data tier subdirectory
* name.
*
* Exported for unit tests; not part of the public extension API.
*/
Expand All @@ -94,6 +108,7 @@ export function isInternalCacheFile(rel: string): boolean {
return true;
}
const base = rel.split("/").pop() ?? "";
if (base === ".lock") return true;
return base === "_catalog.db" || base.startsWith("_catalog.db-");
}

Expand Down
24 changes: 24 additions & 0 deletions datastore/s3/extensions/datastores/_lib/s3_cache_sync_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,30 @@ Deno.test("isInternalCacheFile: excludes the sync-state sidecar", () => {
assertEquals(isInternalCacheFile(".datastore-sync-state.json"), true);
});

Deno.test("isInternalCacheFile: excludes per-target FileLock files at any depth", () => {
// The data tier writes per-target locks at `data/<kind>/<type>/<id>/.lock`
// via the lock subsystem (PUT/DELETE direct to the bucket). Without this
// exclusion, `discoverIndexFromBucket` captures transient `.lock` files
// from the listing into the synthesized index, the lock subsystem then
// deletes them on release, and a fresh reader hydrating from that stale
// index 404s on the missing object — surfacing as `datastore setup`
// failing to persist `.swamp.yaml` and the next `datastore sync --pull`
// reporting "Current datastore type: filesystem".
assertEquals(
isInternalCacheFile(
"data/command/shell/c19f88eb-de4f-4227-ade7-8162aec3d6a6/.lock",
),
true,
);
assertEquals(isInternalCacheFile("data/@m/.lock"), true);
// The top-level distributed lock stays excluded (already covered by the
// exact-match branch, but the basename branch catches it too).
assertEquals(isInternalCacheFile(".datastore.lock"), true);
// Non-`.lock` data files are not affected.
assertEquals(isInternalCacheFile("data/@m/.locked.yaml"), false);
assertEquals(isInternalCacheFile("data/@m/lock"), false);
});

// -- (2) post-verified pullChanged short-circuits with zero index GETs ----

Deno.test("pullChanged: post-verified second call hits fast path with zero index GETs", async () => {
Expand Down
2 changes: 1 addition & 1 deletion datastore/s3/manifest.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
manifestVersion: 1
name: "@swamp/s3-datastore"
version: "2026.05.04.4"
version: "2026.05.05.1"
description: |
Store data in an Amazon S3 bucket with local cache synchronization.
Provides distributed locking via S3 conditional writes and bidirectional
Expand Down
Loading