diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 0000000..63ae301 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,25 @@ +name: Integration + +# Manual-dispatch only: these tests need Docker (a fake-gcs-server emulator) and +# are not part of the default network-free CI. Run them before merging changes to +# real-service adapters such as store/gcs. +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + integration: + name: Integration (GCS emulator) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + - name: Integration tests + run: make test-integration diff --git a/Makefile b/Makefile index 78b97c2..f25ab3c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ -.PHONY: build test test-coverage lint fix fix-imports tidy install-lint install-goimports install-hooks clean +.PHONY: build test test-integration test-coverage lint fix fix-imports tidy install-lint install-goimports install-hooks clean + +FAKEGCS_CONTAINER := maestro-cms-fakegcs # Build all packages. build: lint @@ -9,6 +11,17 @@ build: lint test: go test -cover $(TESTARGS) ./... +# Run build-tagged integration tests against a Dockerized fake-gcs-server. +# Requires Docker. Starts the emulator, waits for readiness, runs the +# integration-tagged tests with STORAGE_EMULATOR_HOST set, then tears it down. +# Single test: make test-integration TESTARGS='-run TestGCSRoundTrip ./store/gcs/...' +test-integration: + @docker rm -f $(FAKEGCS_CONTAINER) >/dev/null 2>&1 || true + docker run -d --rm --name $(FAKEGCS_CONTAINER) -p 4443:4443 fsouza/fake-gcs-server -scheme http -backend memory -public-host localhost:4443 >/dev/null + @for i in $$(seq 1 50); do curl -sf "http://localhost:4443/storage/v1/b?project=test" >/dev/null 2>&1 && break || sleep 0.2; done + @STORAGE_EMULATOR_HOST=http://localhost:4443 go test -tags=integration $(TESTARGS) ./... ; \ + status=$$? ; docker stop $(FAKEGCS_CONTAINER) >/dev/null 2>&1 || true ; exit $$status + # Generate an HTML coverage report. test-coverage: @mkdir -p coverage diff --git a/docs/deferred-tooling.md b/docs/deferred-tooling.md index 0198cf8..476f73b 100644 --- a/docs/deferred-tooling.md +++ b/docs/deferred-tooling.md @@ -4,21 +4,21 @@ Repo-tooling items intentionally left out of the initial scaffolding (`maestro-llms` has them; we don't yet, because the trigger doesn't exist). Add each when its trigger lands so we don't rediscover the gap later. -Status: items 1 and 4 open; items 2 and 3 done (kept here for the audit trail). - -## 1. Integration test target + workflow - -- **What `maestro-llms` has:** a `test-integration` Make target (OS-aware: - macOS routes through an ad-hoc-codesign script, Linux/CI runs - `go test -tags=integration`), a `test-integration-local` escape hatch, and a - manual-dispatch `integration.yml` GitHub workflow that runs live tests against - real services. -- **Why deferred:** `maestro-cms` has no integration tests yet. Core packages - (extract/chunk/content/tokens) are pure and unit-tested. -- **Add when:** the first adapter that talks to a real external service lands — - e.g. `store/gcs` (real GCS / emulator) or `index/pgvector` (real Postgres). - At that point add the build-tagged tests, the `test-integration` target, and a - manual-dispatch workflow; keep the default `make test` and CI network-free. +Status: item 4 open; items 1, 2, and 3 done (kept here for the audit trail). + +## 1. Integration test target + workflow — DONE + +- **Done:** landed with the first real-service adapter, `store/gcs`. A + `test-integration` Make target starts a Dockerized `fsouza/fake-gcs-server` + (no official Google GCS emulator exists), waits for readiness, runs the + `//go:build integration` tests with `STORAGE_EMULATOR_HOST` set, and tears the + container down. A manual-dispatch `integration.yml` workflow runs the same on + CI. The default `make test` and CI stay network-/Docker-free: the tagged tests + are excluded without the `integration` tag and `t.Skip` when the emulator host + is unset. +- **Extend when:** the next real-service adapter lands (e.g. `index/pgvector` + against real Postgres) — add its build-tagged tests under the same target. If a + macOS ad-hoc-codesign step is ever needed (as in `maestro-llms`), add it then. ## 2. golangci-lint depguard: core-must-not-import-adapters — DONE diff --git a/docs/spec-v1.md b/docs/spec-v1.md index 4d869d9..a547394 100644 --- a/docs/spec-v1.md +++ b/docs/spec-v1.md @@ -222,7 +222,7 @@ type BatchFailure struct { | `chunk` | ✅ | **Pure, boundary-aware**: segments at semantic boundaries (`Paragraphs` default; `Headings` for Markdown — fence-aware ATX/setext, ADR 0008; pluggable `Boundaries` for pages/sections/code/transcripts/caller units), packs units to a token budget, and hard-splits an oversize unit only as a last resort (code-aware for fenced spans: line-boundary cuts, ADR 0008). Token estimation is a budget *constraint*, not the strategy: injected `func(string) int` — standard injection `llms.EstimateTextTokens` (v0.6.0+), local rune-counted char/4 default. Imports no `maestro-llms`. | | `content` | ✅ | `Source` + `Artifact` + media type + single-parent provenance + stable IDs + optional neutral metadata map. New, minimal code. | | `embed` | ✅ | **Runner**, not a contract: `Run` takes `[]Input` (a chunk + its source/artifact provenance, so batches span documents), packs them by input-count and token budget, and embeds over `llms.EmbeddingClient`, returning persist-ready `Record`s in input order. Defensive ID matching (opaque per-input IDs, dup/missing/unknown → batch failure); retry is delegated to `llms` middleware (the runner does not retry); a failed batch is bisected by default to isolate a poison input (`DisableBisect` to opt out). Invalid inputs are reported as failures, never panics. The optional `extract→chunk→embed` `Pipeline` is deferred until a real consumer (Morris) shapes its ergonomics. (Failure semantics: ADR 0004; vocabulary: ADR 0001.) | -| `store` | ✅ | `Get/Put/Delete/Exists(key)` object-store interface — opaque, adapter-defined keys, **no path conventions** — plus optional GCS adapter. Clean lift from Morris. A `content.StoreHandle{Backend, Key}` names which adapter resolves a given key. | +| `store` | ✅ | `Get/Put/Delete/Exists(key)` object-store interface — opaque, adapter-defined keys, **no path conventions**. The optional `store/gcs` adapter (over `cloud.google.com/go/storage`, an opt-in subpackage per ADR 0006) has landed: keys are GCS object names verbatim, `storage.ErrObjectNotExist` maps to `store.ErrObjectNotFound`, and `Delete` reports not-found per this interface (not Morris's idempotent delete). Integration-tested against a Dockerized fake-gcs-server (`make test-integration`). A `content.StoreHandle{Backend, Key}` names which adapter resolves a given key. | | `testcms` | ✅ | Deterministic fakes, including a fake embedder. | | `retrieval`| v1.x | Search request/response, context-window, source-handle, citation contracts. Deferred until a consumer is ready (§6). | | `graph` | v2 | Generic directed-graph primitive with caller-defined schema (ADR 0005). | diff --git a/go.mod b/go.mod index 7791ea7..222978c 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,58 @@ module github.com/SnapdragonPartners/maestro-cms -go 1.26.3 +go 1.26.4 require ( + cloud.google.com/go/storage v1.62.3 github.com/SnapdragonPartners/maestro-llms v0.7.1 github.com/dslipak/pdf v0.0.2 golang.org/x/net v0.55.0 + google.golang.org/api v0.274.0 +) + +require ( + cel.dev/expr v0.25.1 // indirect + cloud.google.com/go v0.123.0 // indirect + cloud.google.com/go/auth v0.19.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + cloud.google.com/go/iam v1.7.0 // indirect + cloud.google.com/go/monitoring v1.24.3 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect + github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-jose/go-jose/v4 v4.1.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect + github.com/googleapis/gax-go/v2 v2.21.0 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect + github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect + go.opentelemetry.io/otel v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/otel/sdk v1.43.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect + go.opentelemetry.io/otel/trace v1.43.0 // indirect + golang.org/x/crypto v0.51.0 // indirect + golang.org/x/oauth2 v0.36.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/sys v0.45.0 // indirect + golang.org/x/text v0.37.0 // indirect + golang.org/x/time v0.15.0 // indirect + google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index 642d81a..7ed5eab 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,129 @@ +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/auth v0.19.0 h1:DGYwtbcsGsT1ywuxsIoWi1u/vlks0moIblQHgSDgQkQ= +cloud.google.com/go/auth v0.19.0/go.mod h1:2Aph7BT2KnaSFOM0JDPyiYgNh6PL9vGMiP8CUIXZ+IY= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +cloud.google.com/go/iam v1.7.0 h1:JD3zh0C6LHl16aCn5Akff0+GELdp1+4hmh6ndoFLl8U= +cloud.google.com/go/iam v1.7.0/go.mod h1:tetWZW1PD/m6vcuY2Zj/aU0eCHNPuxedbnbRTyKXvdY= +cloud.google.com/go/logging v1.13.2 h1:qqlHCBvieJT9Cdq4QqYx1KPadCQ2noD4FK02eNqHAjA= +cloud.google.com/go/logging v1.13.2/go.mod h1:zaybliM3yun1J8mU2dVQ1/qDzjbOqEijZCn6hSBtKak= +cloud.google.com/go/longrunning v0.9.0 h1:0EzbDEGsAvOZNbqXopgniY0w0a1phvu5IdUFq8grmqY= +cloud.google.com/go/longrunning v0.9.0/go.mod h1:pkTz846W7bF4o2SzdWJ40Hu0Re+UoNT6Q5t+igIcb8E= +cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE= +cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI= +cloud.google.com/go/storage v1.62.3 h1:SZq1t23NCI+e96dH77Dg3PEfsNNEjqO8zE5AnD8gVD0= +cloud.google.com/go/storage v1.62.3/go.mod h1:cpYz/kRVZ+UQAF1uHeea10/9ewcRbxGoGNKsS9daSXA= +cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U= +cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 h1:UnDZ/zFfG1JhH/DqxIZYU/1CUAlTUScoXD/LcM2Ykk8= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0/go.mod h1:IA1C1U7jO/ENqm/vhi7V9YYpBsp+IMyqNrEN94N7tVc= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0 h1:7t/qx5Ost0s0wbA/VDrByOooURhp+ikYwv20i9Y07TQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.55.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0 h1:0s6TxfCu2KHkkZPnBfsQ2y5qia0jl3MMrmBhu3nCOYk= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.55.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc= github.com/SnapdragonPartners/maestro-llms v0.7.1 h1:2mK4SpIXLa2V5jNGfk3vuWx25w8FD9LoYronL5RBWJ0= github.com/SnapdragonPartners/maestro-llms v0.7.1/go.mod h1:NzLOE7aVN2DpHVQ0y4W7qXyw1AO56clLyyoNoz4Vw3Y= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dslipak/pdf v0.0.2 h1:djAvcM5neg9Ush+zR6QXB+VMJzR6TdnX766HPIg1JmI= github.com/dslipak/pdf v0.0.2/go.mod h1:2L3SnkI9cQwnAS9gfPz2iUoLC0rUZwbucpbKi5R1mUo= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= +github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= +github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA= +github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8= +github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= +github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl/wMbiI= +github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= +github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0 h1:TC+BewnDpeiAmcscXbGMfxkO+mwYUwE/VySwvw88PfA= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0/go.mod h1:J/ZyF4vfPwsSr9xJSPyQ4LqtcTPULFR64KwTikGLe+A= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= +golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= +golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= +golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA= +google.golang.org/api v0.274.0/go.mod h1:JbAt7mF+XVmWu6xNP8/+CTiGH30ofmCmk9nM8d8fHew= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/store/gcs/gcs.go b/store/gcs/gcs.go new file mode 100644 index 0000000..33ef72e --- /dev/null +++ b/store/gcs/gcs.go @@ -0,0 +1,163 @@ +// Package gcs implements store.ObjectStore over a Google Cloud Storage bucket. +// +// It is an opt-in subpackage so the core store package stays standard-library +// only: importing this package pulls in the Google Cloud Storage SDK and its +// transitive tree (gRPC, OpenTelemetry, genproto). A depguard rule keeps core +// packages from importing it (see +// docs/adr/0006-optional-adapters-as-subpackages.md). Wire it in with: +// +// st, err := gcs.New(ctx, "my-bucket") +// defer st.Close() +// +// Keys are used verbatim as GCS object names: no prefix is prepended and no +// normalization happens, matching store.ObjectStore's opaque-key contract. The +// caller owns naming (see store's path-convention notes). Per-bucket encryption, +// lifecycle, and access policy are provisioned outside this package. +// +// For tests and local development, set the STORAGE_EMULATOR_HOST environment +// variable (e.g. to a fsouza/fake-gcs-server instance); the underlying SDK +// routes to it and skips authentication automatically. +package gcs + +import ( + "context" + "errors" + "fmt" + "io" + + "cloud.google.com/go/storage" + "google.golang.org/api/option" + + "github.com/SnapdragonPartners/maestro-cms/store" +) + +// Store is a store.ObjectStore backed by a single GCS bucket. All operations are +// scoped to the bucket passed at construction. +type Store struct { + client *storage.Client + bucket string + ownsClient bool +} + +var _ store.ObjectStore = (*Store)(nil) + +// New constructs a Store over bucket, creating a GCS client with the given +// options. In normal operation pass no options and the client authenticates via +// Application Default Credentials; for tests set STORAGE_EMULATOR_HOST, which the +// SDK honors without credentials. +// +// ctx is used only to construct the client (auth and connection setup) and is +// not retained: canceling it later does not close the client — call Close for +// that. A Store created by New owns its client and closes it on Close. +func New(ctx context.Context, bucket string, opts ...option.ClientOption) (*Store, error) { + if bucket == "" { + return nil, errors.New("gcs: bucket must not be empty") + } + client, err := storage.NewClient(ctx, opts...) + if err != nil { + return nil, fmt.Errorf("gcs: new client: %w", err) + } + return &Store{client: client, bucket: bucket, ownsClient: true}, nil +} + +// NewWithClient wraps an existing *storage.Client as a Store over bucket — the +// seam for callers that build and share their own client (one client across +// several buckets, or a client pointed at an emulator in tests). +// +// Ownership stays with the caller: Store.Close does NOT close a client passed +// here, so sharing one client across multiple Stores is safe and the caller +// closes it once when done. (A Store from New, by contrast, owns and closes the +// client it created.) +// +// It panics if bucket is empty or client is nil — both are wiring bugs that +// would otherwise produce a Store that panics on first use. +func NewWithClient(bucket string, client *storage.Client) *Store { + if bucket == "" { + panic("gcs: NewWithClient requires a non-empty bucket") + } + if client == nil { + panic("gcs: NewWithClient requires a non-nil client") + } + return &Store{client: client, bucket: bucket, ownsClient: false} +} + +// Bucket returns the bucket name this store is scoped to. +func (s *Store) Bucket() string { return s.bucket } + +// Close releases the client's connection pool, but only if this Store created +// the client (via New). A client supplied to NewWithClient is owned by the +// caller and left open, so Close is a no-op for such a Store. +func (s *Store) Close() error { + if !s.ownsClient || s.client == nil { + return nil + } + if err := s.client.Close(); err != nil { + return fmt.Errorf("gcs: close client: %w", err) + } + return nil +} + +// Get returns a reader for the object at key, or store.ErrObjectNotFound if it +// does not exist. The caller must close the returned reader. +func (s *Store) Get(ctx context.Context, key string) (io.ReadCloser, error) { + rc, err := s.client.Bucket(s.bucket).Object(key).NewReader(ctx) + if err != nil { + if errors.Is(err, storage.ErrObjectNotExist) { + return nil, store.ErrObjectNotFound + } + return nil, fmt.Errorf("gcs: get %q: %w", key, err) + } + return rc, nil +} + +// Put writes the bytes read from r to key, replacing any existing object. The +// upload is finalized atomically on success; a copy failure aborts it without +// committing a partial object. +// +// Aborting matters: a GCS Writer buffers and uploads on Close, so calling Close +// after a partial io.Copy would finalize a truncated object. To prevent that we +// give the Writer a child context and cancel it on copy failure, so Close aborts +// the upload (the SDK directs callers to cancel the context rather than use the +// deprecated Writer.CloseWithError). +func (s *Store) Put(ctx context.Context, key string, r io.Reader) error { + wctx, cancel := context.WithCancel(ctx) + defer cancel() + + w := s.client.Bucket(s.bucket).Object(key).NewWriter(wctx) + if _, err := io.Copy(w, r); err != nil { + cancel() // abort the upload before Close so no partial object commits + _ = w.Close() // returns the cancellation error; the copy error is the real cause + return fmt.Errorf("gcs: put %q: %w", key, err) + } + if err := w.Close(); err != nil { + return fmt.Errorf("gcs: put %q: close: %w", key, err) + } + return nil +} + +// Delete removes the object at key. It returns store.ErrObjectNotFound if the +// key does not exist, per the store.ObjectStore contract. (This differs from a +// GCS-idempotent delete: the interface distinguishes "deleted" from "was not +// there" so callers can detect a missing object.) +func (s *Store) Delete(ctx context.Context, key string) error { + if err := s.client.Bucket(s.bucket).Object(key).Delete(ctx); err != nil { + if errors.Is(err, storage.ErrObjectNotExist) { + return store.ErrObjectNotFound + } + return fmt.Errorf("gcs: delete %q: %w", key, err) + } + return nil +} + +// Exists reports whether an object is present at key. A nil error with false is +// the not-found outcome; a non-nil error indicates a transport or auth failure, +// which the caller should surface rather than read as absence. +func (s *Store) Exists(ctx context.Context, key string) (bool, error) { + if _, err := s.client.Bucket(s.bucket).Object(key).Attrs(ctx); err != nil { + if errors.Is(err, storage.ErrObjectNotExist) { + return false, nil + } + return false, fmt.Errorf("gcs: exists %q: %w", key, err) + } + return true, nil +} diff --git a/store/gcs/gcs_integration_test.go b/store/gcs/gcs_integration_test.go new file mode 100644 index 0000000..de9fff0 --- /dev/null +++ b/store/gcs/gcs_integration_test.go @@ -0,0 +1,176 @@ +//go:build integration + +// Integration tests for the GCS adapter. They run only under the `integration` +// build tag and require a GCS-compatible endpoint named by STORAGE_EMULATOR_HOST +// (e.g. a fsouza/fake-gcs-server container). `make test-integration` starts that +// container, sets the variable, and runs these; without it they are skipped. +package gcs_test + +import ( + "bytes" + "context" + "errors" + "io" + "os" + "testing" + + "cloud.google.com/go/storage" + "google.golang.org/api/option" + + "github.com/SnapdragonPartners/maestro-cms/store" + "github.com/SnapdragonPartners/maestro-cms/store/gcs" +) + +const testBucket = "maestro-cms-it" + +// newStore returns a Store backed by the emulator, creating the test bucket if +// needed. It skips the test when no emulator endpoint is configured. +func newStore(t *testing.T) *gcs.Store { + t.Helper() + if os.Getenv("STORAGE_EMULATOR_HOST") == "" { + t.Skip("STORAGE_EMULATOR_HOST not set; run `make test-integration`") + } + ctx := context.Background() + // STORAGE_EMULATOR_HOST routes the client to the emulator; WithoutAuthentication + // stops it from attempting Application Default Credentials, which do not exist + // in CI/dev. + client, err := storage.NewClient(ctx, option.WithoutAuthentication()) + if err != nil { + t.Fatalf("new client: %v", err) + } + // Create the bucket; tolerate "already exists" since the emulator persists for + // the life of the container across tests. + if err := client.Bucket(testBucket).Create(ctx, "maestro-cms-test", nil); err != nil { + if _, aerr := client.Bucket(testBucket).Attrs(ctx); aerr != nil { + t.Fatalf("create bucket %q: %v", testBucket, err) + } + } + st := gcs.NewWithClient(testBucket, client) + // NewWithClient does not take ownership, so the test closes the client it + // created (st.Close would be a no-op here). + t.Cleanup(func() { _ = client.Close() }) + return st +} + +// errReader yields data once, then fails — to exercise a mid-stream reader error. +type errReader struct { + data []byte + err error + done bool +} + +func (e *errReader) Read(p []byte) (int, error) { + if !e.done { + e.done = true + return copy(p, e.data), nil + } + return 0, e.err +} + +func TestGCSRoundTrip(t *testing.T) { + st := newStore(t) + ctx := context.Background() + const key = "roundtrip/object.bin" + payload := []byte("hello, gcs adapter") + + if err := st.Put(ctx, key, bytes.NewReader(payload)); err != nil { + t.Fatalf("Put: %v", err) + } + + ok, err := st.Exists(ctx, key) + if err != nil || !ok { + t.Fatalf("Exists after Put = (%v, %v), want (true, nil)", ok, err) + } + + rc, err := st.Get(ctx, key) + if err != nil { + t.Fatalf("Get: %v", err) + } + got, err := io.ReadAll(rc) + _ = rc.Close() + if err != nil { + t.Fatalf("read: %v", err) + } + if !bytes.Equal(got, payload) { + t.Fatalf("Get returned %q, want %q", got, payload) + } + + if err := st.Delete(ctx, key); err != nil { + t.Fatalf("Delete: %v", err) + } + ok, err = st.Exists(ctx, key) + if err != nil || ok { + t.Fatalf("Exists after Delete = (%v, %v), want (false, nil)", ok, err) + } +} + +func TestGCSOverwrite(t *testing.T) { + st := newStore(t) + ctx := context.Background() + const key = "overwrite/object.bin" + + if err := st.Put(ctx, key, bytes.NewReader([]byte("first"))); err != nil { + t.Fatalf("Put 1: %v", err) + } + if err := st.Put(ctx, key, bytes.NewReader([]byte("second"))); err != nil { + t.Fatalf("Put 2: %v", err) + } + rc, err := st.Get(ctx, key) + if err != nil { + t.Fatalf("Get: %v", err) + } + got, err := io.ReadAll(rc) + _ = rc.Close() + if err != nil { + t.Fatalf("read: %v", err) + } + if string(got) != "second" { + t.Fatalf("after overwrite Get = %q, want %q", got, "second") + } + _ = st.Delete(ctx, key) +} + +// TestGCSPutAbortsOnReaderError verifies that a reader error mid-stream aborts +// the upload instead of finalizing a truncated object: Put must fail and leave +// no object behind. +func TestGCSPutAbortsOnReaderError(t *testing.T) { + st := newStore(t) + ctx := context.Background() + const key = "abort/partial.bin" + + r := &errReader{data: []byte("partial data"), err: errors.New("reader blew up")} + if err := st.Put(ctx, key, r); err == nil { + t.Fatal("Put with an erroring reader returned nil, want error") + } + ok, err := st.Exists(ctx, key) + if err != nil { + t.Fatalf("Exists: %v", err) + } + if ok { + t.Fatal("Put aborted but an object was still committed; want no object") + } +} + +func TestGCSGetMissingIsNotFound(t *testing.T) { + st := newStore(t) + if _, err := st.Get(context.Background(), "missing/nope.bin"); !errors.Is(err, store.ErrObjectNotFound) { + t.Fatalf("Get missing err = %v, want store.ErrObjectNotFound", err) + } +} + +func TestGCSDeleteMissingIsNotFound(t *testing.T) { + // Per the store.ObjectStore contract (unlike a GCS-idempotent delete), + // deleting an absent key reports ErrObjectNotFound. + st := newStore(t) + if err := st.Delete(context.Background(), "missing/nope.bin"); !errors.Is(err, store.ErrObjectNotFound) { + t.Fatalf("Delete missing err = %v, want store.ErrObjectNotFound", err) + } +} + +func TestGCSExistsMissingIsFalse(t *testing.T) { + st := newStore(t) + ok, err := st.Exists(context.Background(), "missing/nope.bin") + if err != nil || ok { + t.Fatalf("Exists missing = (%v, %v), want (false, nil)", ok, err) + } +}