diff --git a/pkgs/defang/cli.nix b/pkgs/defang/cli.nix index 1fba07f07..c1b8f7ab2 100644 --- a/pkgs/defang/cli.nix +++ b/pkgs/defang/cli.nix @@ -7,7 +7,7 @@ buildGo124Module { pname = "defang-cli"; version = "git"; src = lib.cleanSource ../../src; - vendorHash = "sha256-G23v/mmyRRY2Xqq8N7knKcL4ucfBSuhgvttJ5pRKN/U="; + vendorHash = "sha256-zxQuu/RcVgA67++LuRs5xpDiq2e7gepkV8nqQ2GCR74="; subPackages = [ "cmd/cli" ]; diff --git a/src/go.mod b/src/go.mod index f34dd1dcf..f1d86e5ba 100644 --- a/src/go.mod +++ b/src/go.mod @@ -69,6 +69,7 @@ require ( golang.org/x/term v0.38.0 google.golang.org/api v0.236.0 google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 + google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 google.golang.org/grpc v1.79.3 google.golang.org/protobuf v1.36.11 ) @@ -141,7 +142,6 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.40.0 // indirect golang.org/x/net v0.48.0 // indirect google.golang.org/genai v1.30.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect gopkg.in/ini.v1 v1.66.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/src/pkg/cli/client/byoc/gcp/byoc_test.go b/src/pkg/cli/client/byoc/gcp/byoc_test.go index 169eaa0a0..3fe6c7348 100644 --- a/src/pkg/cli/client/byoc/gcp/byoc_test.go +++ b/src/pkg/cli/client/byoc/gcp/byoc_test.go @@ -77,6 +77,10 @@ func (m MockGcpLogsClient) GetBuildInfo(ctx context.Context, buildId string) (*g }, nil } +func (m MockGcpLogsClient) GetInstanceGroupManagerLabels(ctx context.Context, project, region, name string) (map[string]string, error) { + return nil, nil +} + type MockGcpLoggingLister struct { logEntries []*loggingpb.LogEntry } diff --git a/src/pkg/cli/client/byoc/gcp/query.go b/src/pkg/cli/client/byoc/gcp/query.go index e76bcf813..16e4d0c87 100644 --- a/src/pkg/cli/client/byoc/gcp/query.go +++ b/src/pkg/cli/client/byoc/gcp/query.go @@ -285,33 +285,10 @@ protoPayload.response.spec.template.metadata.labels."defang-service"=~"^(%v)$"`, } func (q *Query) AddComputeEngineInstanceGroupInsertOrPatch(stack, project, etag string, services []string) { - query := `protoPayload.methodName=~"beta.compute.regionInstanceGroupManagers.(insert|patch)" AND operation.first="true"` - - if stack != "" { - query += fmt.Sprintf(` -protoPayload.request.allInstancesConfig.properties.labels.key="defang-stack" -protoPayload.request.allInstancesConfig.properties.labels.value="%v"`, gcp.SafeLabelValue(stack)) - } - - if project != "" { - query += fmt.Sprintf(` -protoPayload.request.allInstancesConfig.properties.labels.key="defang-project" -protoPayload.request.allInstancesConfig.properties.labels.value="%v"`, gcp.SafeLabelValue(project)) - } - - if etag != "" { - query += fmt.Sprintf(` -protoPayload.request.allInstancesConfig.properties.labels.key="defang-etag" -protoPayload.request.allInstancesConfig.properties.labels.value="%v"`, gcp.SafeLabelValue(etag)) - } - - if len(services) > 0 { - query += fmt.Sprintf(` -protoPayload.request.allInstancesConfig.properties.labels.key="defang-service" -protoPayload.request.allInstancesConfig.properties.labels.value=~"^(%v)$"`, servicesPattern(services)) - } - - q.AddQuery(query) + // Do not filter by allInstancesConfig.properties.labels here: PATCH requests only carry changed + // fields and omit labels when only the instance template is being updated. The parser reads + // labels from the live resource via GetInstanceGroupManagerLabels instead. + q.AddQuery(`protoPayload.methodName=~"beta.compute.regionInstanceGroupManagers.(insert|patch)" AND operation.first="true"`) } func (q *Query) AddComputeEngineInstanceGroupAddInstances() { diff --git a/src/pkg/cli/client/byoc/gcp/query_test.go b/src/pkg/cli/client/byoc/gcp/query_test.go new file mode 100644 index 000000000..63a204b68 --- /dev/null +++ b/src/pkg/cli/client/byoc/gcp/query_test.go @@ -0,0 +1,41 @@ +package gcp + +import ( + "strings" + "testing" +) + +func TestAddComputeEngineInstanceGroupInsertOrPatch(t *testing.T) { + tests := []struct { + name string + stack string + project string + etag string + services []string + }{ + {"no args", "", "", "", nil}, + {"with all args", "my-stack", "my-project", "abc123", []string{"svc1", "svc2"}}, + {"with stack only", "my-stack", "", "", nil}, + {"with services only", "", "", "", []string{"svc1"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + q := NewSubscribeQuery() + q.AddComputeEngineInstanceGroupInsertOrPatch(tt.stack, tt.project, tt.etag, tt.services) + query := q.GetQuery() + + if !strings.Contains(query, `regionInstanceGroupManagers.(insert|patch)`) { + t.Errorf("query missing method name filter:\n%v", query) + } + if strings.Contains(query, "allInstancesConfig") { + t.Errorf("query must not contain allInstancesConfig label filters (PATCH requests omit labels):\n%v", query) + } + for _, label := range []string{"defang-stack", "defang-project", "defang-etag", "defang-service"} { + if strings.Contains(query, label) { + t.Errorf("query must not filter by %q label (labels absent from PATCH request body):\n%v", label, query) + } + } + }) + } +} diff --git a/src/pkg/cli/client/byoc/gcp/stream.go b/src/pkg/cli/client/byoc/gcp/stream.go index 19fd0ea27..6e3de50b8 100644 --- a/src/pkg/cli/client/byoc/gcp/stream.go +++ b/src/pkg/cli/client/byoc/gcp/stream.go @@ -33,6 +33,7 @@ type GcpLogsClient interface { GetExecutionEnv(ctx context.Context, executionName string) (map[string]string, error) GetProjectID() gcp.ProjectId GetBuildInfo(ctx context.Context, buildId string) (*gcp.BuildTag, error) + GetInstanceGroupManagerLabels(ctx context.Context, project, region, name string) (map[string]string, error) } type ServerStream[T any] struct { @@ -582,29 +583,29 @@ func getActivityParser(ctx context.Context, gcpLogsClient GcpLogsClient, waitFor return nil, nil } case "gce_instance_group_manager": // Compute engine update start - request := auditLog.GetRequest() - if request == nil { - term.Warnf("missing request in audit log for instance group manager %v", path.Base(auditLog.GetResourceName())) + // The patch request body only contains changed fields (e.g. the new instance template), + // so allInstancesConfig.properties.labels is absent for updates. Read labels from the + // live resource instead using the manager name, project, and region from resource labels. + project := entry.Resource.Labels["project_id"] + region := entry.Resource.Labels["location"] + managerName := entry.Resource.Labels["instance_group_manager_name"] + labels, err := gcpLogsClient.GetInstanceGroupManagerLabels(ctx, project, region, managerName) + if err != nil { + term.Warnf("failed to get instance group manager labels for %v: %v", managerName, err) return nil, nil } - labels := GetListInStruct(request, "allInstancesConfig.properties.labels") - if labels == nil { - term.Warnf("missing labels in audit log for instance group manager %v", path.Base(auditLog.GetResourceName())) + serviceName := labels["defang-service"] + if serviceName == "" { + term.Warnf("missing defang-service label in instance group manager %v", managerName) return nil, nil } - // Find the service name from the labels - serviceName := "" - for _, label := range labels { - fields := label.GetStructValue().GetFields() - if fields["key"].GetStringValue() == "defang-service" { - serviceName = fields["value"].GetStringValue() - break + if etag != "" { + labelEtag := labels["defang-etag"] + if labelEtag != etag { + term.Warnf("skipping instance group manager %v: etag mismatch (got %q, want %q)", managerName, labelEtag, etag) + return nil, nil } } - if serviceName == "" { - term.Warnf("missing defang-service label in audit log for instance group manager %v", path.Base(auditLog.GetResourceName())) - return nil, nil - } rootTriggerId := entry.GetLabels()["compute.googleapis.com/root_trigger_id"] if rootTriggerId == "" { term.Warnf("missing root_trigger_id in audit log for instance group manager %v", path.Base(auditLog.GetResourceName())) diff --git a/src/pkg/cli/client/byoc/gcp/stream_test.go b/src/pkg/cli/client/byoc/gcp/stream_test.go index 325da78a7..74e18c13a 100644 --- a/src/pkg/cli/client/byoc/gcp/stream_test.go +++ b/src/pkg/cli/client/byoc/gcp/stream_test.go @@ -2,6 +2,7 @@ package gcp import ( "context" + "errors" "iter" "strconv" "testing" @@ -11,6 +12,11 @@ import ( "github.com/DefangLabs/defang/src/pkg/clouds/gcp" defangv1 "github.com/DefangLabs/defang/src/protos/io/defang/v1" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + monitoredres "google.golang.org/genproto/googleapis/api/monitoredres" + auditpb "google.golang.org/genproto/googleapis/cloud/audit" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -250,3 +256,219 @@ func TestServerStream_Follow_SkipsNilEntries(t *testing.T) { assert.Equal(t, []string{"real log", "cancel"}, messages, "Follow() should skip nil tailer entries and yield real entries") } + +// activityParserMock wraps MockGcpLogsClient with a configurable GetInstanceGroupManagerLabels. +type activityParserMock struct { + MockGcpLogsClient + labels map[string]string + labelsErr error +} + +func (m *activityParserMock) GetInstanceGroupManagerLabels(_ context.Context, _, _, _ string) (map[string]string, error) { + return m.labels, m.labelsErr +} + +// makeAuditLogEntry builds a loggingpb.LogEntry whose payload is a marshaled auditpb.AuditLog. +func makeAuditLogEntry(resourceType string, resourceLabels, entryLabels map[string]string, auditLog *auditpb.AuditLog) *loggingpb.LogEntry { + payload, err := anypb.New(auditLog) + if err != nil { + panic(err) + } + return &loggingpb.LogEntry{ + Payload: &loggingpb.LogEntry_ProtoPayload{ProtoPayload: payload}, + Resource: &monitoredres.MonitoredResource{ + Type: resourceType, + Labels: resourceLabels, + }, + Labels: entryLabels, + } +} + +func TestActivityParser_GceInstanceGroupManager(t *testing.T) { + tests := []struct { + name string + etag string + labels map[string]string + labelsErr error + rootTriggerId string + wantResp *defangv1.SubscribeResponse + }{ + { + name: "happy path", + labels: map[string]string{"defang-service": "my-svc", "defang-stack": "beta"}, + rootTriggerId: "trigger-abc", + wantResp: &defangv1.SubscribeResponse{ + Name: "my-svc", + State: defangv1.ServiceState_DEPLOYMENT_PENDING, + }, + }, + { + name: "labels API error", + labelsErr: errors.New("rpc error"), + rootTriggerId: "trigger-abc", + wantResp: nil, + }, + { + name: "nil labels (no allInstancesConfig)", + labels: nil, + rootTriggerId: "trigger-abc", + wantResp: nil, + }, + { + name: "missing defang-service label", + labels: map[string]string{"defang-stack": "beta"}, + rootTriggerId: "trigger-abc", + wantResp: nil, + }, + { + name: "missing root_trigger_id still returns DEPLOYMENT_PENDING", + labels: map[string]string{"defang-service": "my-svc"}, + wantResp: &defangv1.SubscribeResponse{ + Name: "my-svc", + State: defangv1.ServiceState_DEPLOYMENT_PENDING, + }, + }, + // etag scoping tests + { + name: "etag matches — accepted", + etag: "abc123", + labels: map[string]string{"defang-service": "my-svc", "defang-etag": "abc123"}, + rootTriggerId: "trigger-abc", + wantResp: &defangv1.SubscribeResponse{ + Name: "my-svc", + State: defangv1.ServiceState_DEPLOYMENT_PENDING, + }, + }, + { + name: "etag mismatch — skipped", + etag: "abc123", + labels: map[string]string{"defang-service": "my-svc", "defang-etag": "other-etag"}, + rootTriggerId: "trigger-abc", + wantResp: nil, + }, + { + name: "defang-etag label missing when etag expected — skipped", + etag: "abc123", + labels: map[string]string{"defang-service": "my-svc"}, + rootTriggerId: "trigger-abc", + wantResp: nil, + }, + { + name: "no expected etag — etag label ignored", + etag: "", + labels: map[string]string{"defang-service": "my-svc", "defang-etag": "any-etag"}, + rootTriggerId: "trigger-abc", + wantResp: &defangv1.SubscribeResponse{ + Name: "my-svc", + State: defangv1.ServiceState_DEPLOYMENT_PENDING, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := t.Context() + mock := &activityParserMock{labels: tt.labels, labelsErr: tt.labelsErr} + parser := getActivityParser(ctx, mock, false, tt.etag) + + entry := makeAuditLogEntry( + "gce_instance_group_manager", + map[string]string{ + "project_id": "test-project", + "location": "us-central1", + "instance_group_manager_name": "test-manager", + }, + map[string]string{ + "compute.googleapis.com/root_trigger_id": tt.rootTriggerId, + }, + &auditpb.AuditLog{}, + ) + + resps, err := parser(entry) + require.NoError(t, err) + + if tt.wantResp == nil { + assert.Nil(t, resps) + } else { + require.Len(t, resps, 1) + assert.Equal(t, tt.wantResp.Name, resps[0].Name) + assert.Equal(t, tt.wantResp.State, resps[0].State) + } + }) + } +} + +// TestActivityParser_GceInstanceGroupFlow verifies the full flow: a gce_instance_group_manager +// entry populates the root-trigger map, and a subsequent gce_instance_group addInstances entry +// uses that map to emit DEPLOYMENT_COMPLETED. +func TestActivityParser_GceInstanceGroupFlow(t *testing.T) { + ctx := t.Context() + const rootTriggerId = "trigger-xyz" + const serviceName = "my-svc" + + mock := &activityParserMock{ + labels: map[string]string{"defang-service": serviceName}, + } + parser := getActivityParser(ctx, mock, false, "") + + // First: gce_instance_group_manager entry (insert/patch) — populates trigger map + mgrEntry := makeAuditLogEntry( + "gce_instance_group_manager", + map[string]string{ + "project_id": "test-project", + "location": "us-central1", + "instance_group_manager_name": "test-manager", + }, + map[string]string{ + "compute.googleapis.com/root_trigger_id": rootTriggerId, + }, + &auditpb.AuditLog{}, + ) + resps, err := parser(mgrEntry) + require.NoError(t, err) + require.Len(t, resps, 1) + assert.Equal(t, serviceName, resps[0].Name) + assert.Equal(t, defangv1.ServiceState_DEPLOYMENT_PENDING, resps[0].State) + + // Second: gce_instance_group addInstances entry — resolves via trigger map + doneResponse, err := structpb.NewStruct(map[string]any{"status": "DONE"}) + require.NoError(t, err) + groupEntry := makeAuditLogEntry( + "gce_instance_group", + map[string]string{"project_id": "test-project"}, + map[string]string{ + "compute.googleapis.com/root_trigger_id": rootTriggerId, + }, + &auditpb.AuditLog{ + Response: doneResponse, + }, + ) + resps, err = parser(groupEntry) + require.NoError(t, err) + require.Len(t, resps, 1) + assert.Equal(t, serviceName, resps[0].Name) + assert.Equal(t, defangv1.ServiceState_DEPLOYMENT_COMPLETED, resps[0].State) +} + +// TestActivityParser_GceInstanceGroupDropsUnknownTrigger verifies that gce_instance_group +// events with an unrecognized root_trigger_id are silently dropped. +func TestActivityParser_GceInstanceGroupDropsUnknownTrigger(t *testing.T) { + ctx := t.Context() + mock := &activityParserMock{labels: map[string]string{"defang-service": "my-svc"}} + parser := getActivityParser(ctx, mock, false, "") + + doneResponse, err := structpb.NewStruct(map[string]any{"status": "DONE"}) + require.NoError(t, err) + entry := makeAuditLogEntry( + "gce_instance_group", + map[string]string{"project_id": "test-project"}, + map[string]string{ + "compute.googleapis.com/root_trigger_id": "unknown-trigger", + }, + &auditpb.AuditLog{Response: doneResponse}, + ) + + resps, err := parser(entry) + require.NoError(t, err) + assert.Nil(t, resps) +} diff --git a/src/pkg/cli/subscribe.go b/src/pkg/cli/subscribe.go index 7d0ddad96..223965452 100644 --- a/src/pkg/cli/subscribe.go +++ b/src/pkg/cli/subscribe.go @@ -75,6 +75,15 @@ func WaitServiceState( return serviceStates, err } + pendingServices := []string{} + for _, service := range services { + if serviceStates[service] != targetState { + pendingServices = append(pendingServices, service) + } + } + + term.Infof("Waiting for %q to be in state %s...\n", pendingServices, targetState) // TODO: don't print in Go-routine + if msg == nil { continue } diff --git a/src/pkg/cli/tailAndMonitor.go b/src/pkg/cli/tailAndMonitor.go index 1c922446f..183136d8f 100644 --- a/src/pkg/cli/tailAndMonitor.go +++ b/src/pkg/cli/tailAndMonitor.go @@ -55,6 +55,8 @@ func TailAndMonitor(ctx context.Context, project *compose.Project, provider clie cdErr = err // When CD fails, stop WaitServiceState cancelSvcStatus(cdErr) + } else { + term.Info("Deployment complete. Waiting for services to be healthy...") } }() diff --git a/src/pkg/clouds/gcp/compute.go b/src/pkg/clouds/gcp/compute.go new file mode 100644 index 000000000..32370d63c --- /dev/null +++ b/src/pkg/clouds/gcp/compute.go @@ -0,0 +1,27 @@ +package gcp + +import ( + "context" + "fmt" + + compute "google.golang.org/api/compute/v1" +) + +// GetInstanceGroupManagerLabels fetches the allInstancesConfig.properties.labels from a regional +// instance group manager. The patch audit log only carries changed fields (e.g. the new instance +// template version), so the defang-service label is absent from the audit log request body and +// must be read from the live resource. +func (gcp Gcp) GetInstanceGroupManagerLabels(ctx context.Context, project, region, name string) (map[string]string, error) { + svc, err := compute.NewService(ctx, gcp.Options...) + if err != nil { + return nil, fmt.Errorf("failed to create compute client: %w", err) + } + mgr, err := svc.RegionInstanceGroupManagers.Get(project, region, name).Context(ctx).Do() + if err != nil { + return nil, fmt.Errorf("failed to get instance group manager %q: %w", name, err) + } + if mgr.AllInstancesConfig == nil || mgr.AllInstancesConfig.Properties == nil { + return nil, nil + } + return mgr.AllInstancesConfig.Properties.Labels, nil +}