From bfd28b99850e77eb428bc74c7e2b91d0ba4b1bcb Mon Sep 17 00:00:00 2001 From: Devin AI Date: Mon, 4 May 2026 23:56:00 +0000 Subject: [PATCH 1/8] frontend/gateway: pre-read http/2 preface with configurable timeout The vendored golang.org/x/net/http2 package enforces a hard-coded 10s preface timeout in http2.Server.ServeConn (vendor/golang.org/x/net/http2/server.go: const prefaceTimeout). When buildkitd spawns a frontend container (e.g. dockerfile-frontend) and slow runc bundle setup, overlay-mount, or cold-cache rootfs reads on a contended storage backend delay the frontend container from sending its preface past 10s, ServeConn aborts with errPrefaceTimeout and the build fails with `frontend grpc server closed unexpectedly`. The gateway now reads the 24-byte HTTP/2 client preface itself with a configurable, longer-by-default deadline (60s, overridable via BUILDKIT_GATEWAY_PREFACE_TIMEOUT), then replays it under a wrapped net.Conn so http2.Server.ServeConn observes the preface instantly from memory and proceeds normally. The gateway pipe-conn in *pipe overrides SetReadDeadline to no-op, so the deadline is enforced via an explicit timer race mirroring the upstream readPreface logic. Adds two OTel instruments via the existing meter provider: buildkit_frontend_preface_wait_seconds (Float64Histogram) and buildkit_frontend_preface_error_total (Int64Counter, labeled by reason). On both success and failure the gateway now emits a structured bklog line with preface_wait_ms / preface_timeout_ms / preface_error_reason, so failures are easy to attribute and distributions are queryable in Axiom. Co-Authored-By: Elton --- frontend/gateway/gateway.go | 169 ++++++++++++++++++++++++++++++- frontend/gateway/gateway_test.go | 84 +++++++++++++++ 2 files changed, 251 insertions(+), 2 deletions(-) diff --git a/frontend/gateway/gateway.go b/frontend/gateway/gateway.go index 983942b1cd47..7c1f7be971ac 100644 --- a/frontend/gateway/gateway.go +++ b/frontend/gateway/gateway.go @@ -52,6 +52,9 @@ import ( digest "github.com/opencontainers/go-digest" ocispecs "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "golang.org/x/net/http2" "golang.org/x/sync/errgroup" spb "google.golang.org/genproto/googleapis/rpc/status" @@ -1720,13 +1723,175 @@ func (lbf *llbBridgeForwarder) cloneRef(id string) (solver.ResultProxy, error) { return s2, nil } +// HTTP/2 client preface handling for gateway connections. +// +// The vendored golang.org/x/net/http2 package enforces a hard-coded 10s +// preface timeout in http2.Server.ServeConn (see +// vendor/golang.org/x/net/http2/server.go: const prefaceTimeout). When +// buildkitd spawns a frontend container (e.g. dockerfile-frontend) and +// slow runc bundle setup, overlay-mount, or cold-cache rootfs reads +// delay the frontend's first preface bytes past 10s, ServeConn aborts +// with errPrefaceTimeout, the gateway tears down the gRPC connection, +// and the build fails with `frontend grpc server closed unexpectedly`. +// +// The gateway now reads the 24-byte client preface itself with a +// configurable, longer-by-default deadline, then replays it under a +// wrapped net.Conn so http2.Server.ServeConn observes the preface +// instantly from memory and proceeds normally. +const ( + // httpClientPreface is the 24-byte HTTP/2 client connection preface, + // as defined in RFC 7540 §3.5 and exported in golang.org/x/net/http2 + // as ClientPreface. The format is fixed by spec. + httpClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" + + // defaultPrefaceTimeout is the default ceiling on how long the + // gateway waits for the frontend container to send its HTTP/2 client + // preface. Significantly longer than the vendored 10s upstream + // prefaceTimeout to absorb slow runc bundle setup and cold-cache + // rootfs reads on contended storage backends. + defaultPrefaceTimeout = 60 * time.Second + + // prefaceTimeoutEnvVar lets operators tune the preface wait without a + // binary rebuild. Value is parsed as a Go time.Duration string, + // e.g. "30s", "2m". Invalid or non-positive values fall back to + // defaultPrefaceTimeout. + prefaceTimeoutEnvVar = "BUILDKIT_GATEWAY_PREFACE_TIMEOUT" +) + +var ( + gatewayMeter = otel.Meter("github.com/moby/buildkit/frontend/gateway") + gatewayMetricsOnce sync.Once + prefaceWaitHistogram metric.Float64Histogram + prefaceErrorCounter metric.Int64Counter +) + +func initGatewayMetrics() { + gatewayMetricsOnce.Do(func() { + if h, err := gatewayMeter.Float64Histogram( + "buildkit_frontend_preface_wait_seconds", + metric.WithDescription("Time the gateway waited for the HTTP/2 client preface from the frontend container after spawning it."), + metric.WithUnit("s"), + ); err == nil { + prefaceWaitHistogram = h + } + if c, err := gatewayMeter.Int64Counter( + "buildkit_frontend_preface_error_total", + metric.WithDescription("Number of times the gateway failed to receive the HTTP/2 client preface from the frontend container, partitioned by reason."), + ); err == nil { + prefaceErrorCounter = c + } + }) +} + +// prefaceConn is a net.Conn that returns previously buffered HTTP/2 +// client preface bytes from Read before delegating to the underlying +// connection, so http2.Server.ServeConn observes the preface +// synchronously from memory. +type prefaceConn struct { + net.Conn + mu sync.Mutex + buf []byte +} + +func (c *prefaceConn) Read(p []byte) (int, error) { + c.mu.Lock() + if len(c.buf) > 0 { + n := copy(p, c.buf) + c.buf = c.buf[n:] + c.mu.Unlock() + return n, nil + } + c.mu.Unlock() + return c.Conn.Read(p) +} + +// prefaceReadTimeout returns the configured timeout for the gateway's +// HTTP/2 client preface pre-read. Defaults to defaultPrefaceTimeout, +// overridable via the BUILDKIT_GATEWAY_PREFACE_TIMEOUT environment +// variable. +func prefaceReadTimeout() time.Duration { + if v := os.Getenv(prefaceTimeoutEnvVar); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + bklog.L.Warnf("frontend gateway: invalid %s=%q, falling back to %s", prefaceTimeoutEnvVar, v, defaultPrefaceTimeout) + } + return defaultPrefaceTimeout +} + +var errPrefaceReadTimeout = errors.New("timed out reading HTTP/2 client preface from frontend container") + +// readPrefaceWithTimeout reads exactly len(httpClientPreface) bytes from +// conn or returns an error after timeout. Mirrors the timer-race +// approach in vendored x/net/http2/server.go:readPreface, which is +// required because the gateway's pipe-based net.Conn (see *pipe.conn) +// overrides the deadline setters to no-op so SetReadDeadline cannot be +// used to bound the read. +func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) ([]byte, error) { + buf := make([]byte, len(httpClientPreface)) + errc := make(chan error, 1) + go func() { + _, err := io.ReadFull(conn, buf) + errc <- err + }() + timer := time.NewTimer(timeout) + defer timer.Stop() + select { + case <-timer.C: + // Close the underlying connection to unblock the goroutine's + // io.ReadFull. errc is buffered so the goroutine will not leak. + _ = conn.Close() + return nil, errPrefaceReadTimeout + case err := <-errc: + if err != nil { + return nil, err + } + return buf, nil + } +} + func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) { + initGatewayMetrics() go func() { <-ctx.Done() conn.Close() }() - bklog.G(ctx).Debugf("serving grpc connection") - (&http2.Server{}).ServeConn(conn, &http2.ServeConnOpts{Handler: grpcServer}) + + timeout := prefaceReadTimeout() + start := time.Now() + pre, err := readPrefaceWithTimeout(conn, timeout) + elapsed := time.Since(start) + + logger := bklog.G(ctx). + WithField("preface_wait_ms", elapsed.Milliseconds()). + WithField("preface_timeout_ms", timeout.Milliseconds()) + + if err != nil { + reason := "read_error" + switch { + case errors.Is(err, errPrefaceReadTimeout): + reason = "timeout" + case err == io.EOF, err == io.ErrUnexpectedEOF: + reason = "eof" + } + if prefaceErrorCounter != nil { + prefaceErrorCounter.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason))) + } + logger.WithField("preface_error_reason", reason). + Warnf("frontend serve: failed to read HTTP/2 client preface from frontend container: %v", err) + // ensure conn is closed so the upstream goroutine observes the + // failure as server-closed rather than blocking on Run() + _ = conn.Close() + return + } + + if prefaceWaitHistogram != nil { + prefaceWaitHistogram.Record(ctx, elapsed.Seconds()) + } + logger.Debugf("frontend serve: received HTTP/2 client preface") + + bufConn := &prefaceConn{Conn: conn, buf: pre} + (&http2.Server{}).ServeConn(bufConn, &http2.ServeConnOpts{Handler: grpcServer}) } type markTypeFrontend struct{} diff --git a/frontend/gateway/gateway_test.go b/frontend/gateway/gateway_test.go index 79b88d7c3301..b059d8b6f7a5 100644 --- a/frontend/gateway/gateway_test.go +++ b/frontend/gateway/gateway_test.go @@ -1,7 +1,12 @@ package gateway import ( + "errors" + "io" + "net" + "sync" "testing" + "time" "github.com/stretchr/testify/require" ) @@ -43,3 +48,82 @@ func TestCheckSourceIsAllowed(t *testing.T) { err = gw.checkSourceIsAllowed("docker.io/library/alpine") require.NoError(t, err) } + +func TestPrefaceConnReplaysBufferedBytes(t *testing.T) { + // Pair of in-memory net.Conns; we'll wrap one end and verify that + // reads on the wrapper first drain a pre-buffered chunk before + // falling through to the underlying conn. + c1, c2 := net.Pipe() + defer c2.Close() + + const buffered = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" + const trailing = "DATA-AFTER-PREFACE" + pc := &prefaceConn{Conn: c1, buf: []byte(buffered)} + + // Writer goroutine pushes the trailing payload to the underlying conn + // after the wrapper has had a chance to drain its buffer. + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, _ = c2.Write([]byte(trailing)) + c2.Close() + }() + + out, err := io.ReadAll(pc) + require.NoError(t, err) + require.Equal(t, buffered+trailing, string(out)) + wg.Wait() +} + +func TestReadPrefaceWithTimeoutSuccess(t *testing.T) { + c1, c2 := net.Pipe() + defer c1.Close() + defer c2.Close() + + preface := []byte("PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n") + + go func() { + _, _ = c2.Write(preface) + }() + + got, err := readPrefaceWithTimeout(c1, 5*time.Second) + require.NoError(t, err) + require.Equal(t, preface, got) +} + +func TestReadPrefaceWithTimeoutFiresOnSlowSender(t *testing.T) { + c1, c2 := net.Pipe() + defer c2.Close() + + // Sender writes nothing; the timer race must fire and close c1. + start := time.Now() + _, err := readPrefaceWithTimeout(c1, 50*time.Millisecond) + elapsed := time.Since(start) + + require.Error(t, err) + require.True(t, errors.Is(err, errPrefaceReadTimeout), "expected errPrefaceReadTimeout, got %v", err) + require.GreaterOrEqual(t, elapsed, 40*time.Millisecond) + require.Less(t, elapsed, 2*time.Second) + + // c1 should be closed by readPrefaceWithTimeout on the timeout path + // to unblock the inner io.ReadFull goroutine. + _, werr := c1.Write([]byte("x")) + require.Error(t, werr) +} + +func TestPrefaceReadTimeoutEnvOverride(t *testing.T) { + t.Setenv(prefaceTimeoutEnvVar, "") + require.Equal(t, defaultPrefaceTimeout, prefaceReadTimeout()) + + t.Setenv(prefaceTimeoutEnvVar, "2m") + require.Equal(t, 2*time.Minute, prefaceReadTimeout()) + + // invalid value falls back to default + t.Setenv(prefaceTimeoutEnvVar, "not-a-duration") + require.Equal(t, defaultPrefaceTimeout, prefaceReadTimeout()) + + // non-positive falls back to default + t.Setenv(prefaceTimeoutEnvVar, "0s") + require.Equal(t, defaultPrefaceTimeout, prefaceReadTimeout()) +} From d520ab71b22b35e603d7cb58b2c2211d63d424f9 Mon Sep 17 00:00:00 2001 From: Devin AI Date: Mon, 4 May 2026 23:56:08 +0000 Subject: [PATCH 2/8] executor/runcexecutor: log container-started timestamp Mirrors the existing "> creating " debug log already emitted just before runc.Run. Emitting a paired "> started " line on the startedOnce.Do callback gives us a per-VM measurement of runc bundle setup + exec latency directly from logs (started_ts - creating_ts), without needing to instrument runc internals or stand up additional metrics. This is the latency that consumes the gateway HTTP/2 preface budget on contended storage backends, and is the on-path companion to the gateway-side preface_wait_ms log added in frontend/gateway/gateway.go. Co-Authored-By: Elton --- executor/runcexecutor/executor.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/executor/runcexecutor/executor.go b/executor/runcexecutor/executor.go index c4e5f8fd0674..8d1dd334084a 100644 --- a/executor/runcexecutor/executor.go +++ b/executor/runcexecutor/executor.go @@ -328,6 +328,12 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount, trace.SpanFromContext(ctx).AddEvent("Container created") err = w.run(ctx, id, bundle, process, func() { startedOnce.Do(func() { + // Mirrors the existing "> creating" log line above. The + // delta between "creating" and "started" is the runc + // bundle setup + exec latency, which is the dominant + // budget consumer for the gateway HTTP/2 preface deadline + // when the storage backend is contended. + bklog.G(ctx).Debugf("> started %s %v", id, meta.Args) trace.SpanFromContext(ctx).AddEvent("Container started") if started != nil { close(started) From 3052b12cba2ea2057e7ee03da10a0d86bfada6d6 Mon Sep 17 00:00:00 2001 From: Devin AI Date: Tue, 5 May 2026 00:10:44 +0000 Subject: [PATCH 3/8] frontend/gateway: use errors.Is and require.ErrorIs for error checks CI lint failures: errorlint flagged the io.EOF/io.ErrUnexpectedEOF == comparisons in serve(), and testifylint flagged require.True(errors.Is(...)) in TestReadPrefaceWithTimeoutFiresOnSlowSender. Switch both to the canonical errors.Is / require.ErrorIs forms to match the rest of the file. Co-Authored-By: Elton --- frontend/gateway/gateway.go | 2 +- frontend/gateway/gateway_test.go | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/frontend/gateway/gateway.go b/frontend/gateway/gateway.go index 7c1f7be971ac..79fe67279c82 100644 --- a/frontend/gateway/gateway.go +++ b/frontend/gateway/gateway.go @@ -1871,7 +1871,7 @@ func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) { switch { case errors.Is(err, errPrefaceReadTimeout): reason = "timeout" - case err == io.EOF, err == io.ErrUnexpectedEOF: + case errors.Is(err, io.EOF), errors.Is(err, io.ErrUnexpectedEOF): reason = "eof" } if prefaceErrorCounter != nil { diff --git a/frontend/gateway/gateway_test.go b/frontend/gateway/gateway_test.go index b059d8b6f7a5..61c631b21387 100644 --- a/frontend/gateway/gateway_test.go +++ b/frontend/gateway/gateway_test.go @@ -1,7 +1,6 @@ package gateway import ( - "errors" "io" "net" "sync" @@ -101,8 +100,7 @@ func TestReadPrefaceWithTimeoutFiresOnSlowSender(t *testing.T) { _, err := readPrefaceWithTimeout(c1, 50*time.Millisecond) elapsed := time.Since(start) - require.Error(t, err) - require.True(t, errors.Is(err, errPrefaceReadTimeout), "expected errPrefaceReadTimeout, got %v", err) + require.ErrorIs(t, err, errPrefaceReadTimeout) require.GreaterOrEqual(t, elapsed, 40*time.Millisecond) require.Less(t, elapsed, 2*time.Second) From cbb60de3a40a2328b145b5eef10b64c7733ae56e Mon Sep 17 00:00:00 2001 From: Devin AI Date: Tue, 5 May 2026 00:17:34 +0000 Subject: [PATCH 4/8] vendor: promote go.opentelemetry.io/otel/metric to direct dep frontend/gateway/gateway.go now imports go.opentelemetry.io/otel/metric directly to declare the prefaceWaitHistogram and prefaceErrorCounter, so the dependency must be in the direct require block. Also drops the unnecessary sync.WaitGroup in TestPrefaceConnReplaysBufferedBytes (gopls modernize analyzer flagged it). Co-Authored-By: Elton --- frontend/gateway/gateway_test.go | 12 ++++-------- go.mod | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/frontend/gateway/gateway_test.go b/frontend/gateway/gateway_test.go index 61c631b21387..5daeb8d9bd1e 100644 --- a/frontend/gateway/gateway_test.go +++ b/frontend/gateway/gateway_test.go @@ -3,7 +3,6 @@ package gateway import ( "io" "net" - "sync" "testing" "time" @@ -59,20 +58,17 @@ func TestPrefaceConnReplaysBufferedBytes(t *testing.T) { const trailing = "DATA-AFTER-PREFACE" pc := &prefaceConn{Conn: c1, buf: []byte(buffered)} - // Writer goroutine pushes the trailing payload to the underlying conn - // after the wrapper has had a chance to drain its buffer. - var wg sync.WaitGroup - wg.Add(1) + // Writer goroutine pushes the trailing payload to the underlying conn, + // then closes c2 so io.ReadAll terminates after draining the wrapper's + // buffer and the trailing bytes from c1. go func() { - defer wg.Done() _, _ = c2.Write([]byte(trailing)) - c2.Close() + _ = c2.Close() }() out, err := io.ReadAll(pc) require.NoError(t, err) require.Equal(t, buffered+trailing, string(out)) - wg.Wait() } func TestReadPrefaceWithTimeoutSuccess(t *testing.T) { diff --git a/go.mod b/go.mod index 2523057f9ce3..bd4c0dcca51e 100644 --- a/go.mod +++ b/go.mod @@ -97,6 +97,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.40.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0 go.opentelemetry.io/otel/exporters/prometheus v0.60.0 + go.opentelemetry.io/otel/metric v1.40.0 go.opentelemetry.io/otel/sdk v1.40.0 go.opentelemetry.io/otel/sdk/metric v1.40.0 go.opentelemetry.io/otel/trace v1.40.0 @@ -223,7 +224,6 @@ require ( github.com/vishvananda/netns v0.0.5 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/otel/metric v1.40.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/term v0.41.0 // indirect From 3631fc78b0b5a5b95e36572f898a49e06ac2a2b8 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 15:39:05 +0000 Subject: [PATCH 5/8] frontend/gateway: use http2 SawClientPreface to skip vendored preface read Drop the prefaceConn wrapper, its mutex, and the buffer-replay Read() path. golang.org/x/net/http2 already exposes ServeConnOpts.SawClientPreface for exactly this case: if set, http2's internal readPreface() returns nil immediately and skips its hard-coded 10s prefaceTimeout. The gateway still pre-reads (and now validates) the 24-byte HTTP/2 client preface itself with the configurable BUILDKIT_GATEWAY_PREFACE_TIMEOUT deadline, then hands the already-drained net.Conn straight to ServeConn. A new errPrefaceMismatch is emitted (and counted under reason="mismatch") if the bytes are not the expected client preface. Co-Authored-By: Elton --- frontend/gateway/gateway.go | 55 ++++++++++++++------------------ frontend/gateway/gateway_test.go | 46 ++++++++++---------------- 2 files changed, 41 insertions(+), 60 deletions(-) diff --git a/frontend/gateway/gateway.go b/frontend/gateway/gateway.go index 79fe67279c82..88163449fcc2 100644 --- a/frontend/gateway/gateway.go +++ b/frontend/gateway/gateway.go @@ -1783,28 +1783,6 @@ func initGatewayMetrics() { }) } -// prefaceConn is a net.Conn that returns previously buffered HTTP/2 -// client preface bytes from Read before delegating to the underlying -// connection, so http2.Server.ServeConn observes the preface -// synchronously from memory. -type prefaceConn struct { - net.Conn - mu sync.Mutex - buf []byte -} - -func (c *prefaceConn) Read(p []byte) (int, error) { - c.mu.Lock() - if len(c.buf) > 0 { - n := copy(p, c.buf) - c.buf = c.buf[n:] - c.mu.Unlock() - return n, nil - } - c.mu.Unlock() - return c.Conn.Read(p) -} - // prefaceReadTimeout returns the configured timeout for the gateway's // HTTP/2 client preface pre-read. Defaults to defaultPrefaceTimeout, // overridable via the BUILDKIT_GATEWAY_PREFACE_TIMEOUT environment @@ -1819,15 +1797,19 @@ func prefaceReadTimeout() time.Duration { return defaultPrefaceTimeout } -var errPrefaceReadTimeout = errors.New("timed out reading HTTP/2 client preface from frontend container") +var ( + errPrefaceReadTimeout = errors.New("timed out reading HTTP/2 client preface from frontend container") + errPrefaceMismatch = errors.New("frontend container did not send a valid HTTP/2 client preface") +) // readPrefaceWithTimeout reads exactly len(httpClientPreface) bytes from // conn or returns an error after timeout. Mirrors the timer-race // approach in vendored x/net/http2/server.go:readPreface, which is // required because the gateway's pipe-based net.Conn (see *pipe.conn) // overrides the deadline setters to no-op so SetReadDeadline cannot be -// used to bound the read. -func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) ([]byte, error) { +// used to bound the read. The returned bytes are validated against +// httpClientPreface; a mismatch returns errPrefaceMismatch. +func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) error { buf := make([]byte, len(httpClientPreface)) errc := make(chan error, 1) go func() { @@ -1841,12 +1823,15 @@ func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) ([]byte, error // Close the underlying connection to unblock the goroutine's // io.ReadFull. errc is buffered so the goroutine will not leak. _ = conn.Close() - return nil, errPrefaceReadTimeout + return errPrefaceReadTimeout case err := <-errc: if err != nil { - return nil, err + return err } - return buf, nil + if string(buf) != httpClientPreface { + return errPrefaceMismatch + } + return nil } } @@ -1859,7 +1844,7 @@ func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) { timeout := prefaceReadTimeout() start := time.Now() - pre, err := readPrefaceWithTimeout(conn, timeout) + err := readPrefaceWithTimeout(conn, timeout) elapsed := time.Since(start) logger := bklog.G(ctx). @@ -1871,6 +1856,8 @@ func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) { switch { case errors.Is(err, errPrefaceReadTimeout): reason = "timeout" + case errors.Is(err, errPrefaceMismatch): + reason = "mismatch" case errors.Is(err, io.EOF), errors.Is(err, io.ErrUnexpectedEOF): reason = "eof" } @@ -1890,8 +1877,14 @@ func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) { } logger.Debugf("frontend serve: received HTTP/2 client preface") - bufConn := &prefaceConn{Conn: conn, buf: pre} - (&http2.Server{}).ServeConn(bufConn, &http2.ServeConnOpts{Handler: grpcServer}) + // SawClientPreface tells http2.Server.ServeConn that the 24-byte + // HTTP/2 client preface has already been consumed from conn, so its + // internal readPreface() returns nil immediately and skips the + // hard-coded 10s prefaceTimeout. + (&http2.Server{}).ServeConn(conn, &http2.ServeConnOpts{ + Handler: grpcServer, + SawClientPreface: true, + }) } type markTypeFrontend struct{} diff --git a/frontend/gateway/gateway_test.go b/frontend/gateway/gateway_test.go index 5daeb8d9bd1e..30eb6729c88f 100644 --- a/frontend/gateway/gateway_test.go +++ b/frontend/gateway/gateway_test.go @@ -1,7 +1,6 @@ package gateway import ( - "io" "net" "testing" "time" @@ -47,30 +46,6 @@ func TestCheckSourceIsAllowed(t *testing.T) { require.NoError(t, err) } -func TestPrefaceConnReplaysBufferedBytes(t *testing.T) { - // Pair of in-memory net.Conns; we'll wrap one end and verify that - // reads on the wrapper first drain a pre-buffered chunk before - // falling through to the underlying conn. - c1, c2 := net.Pipe() - defer c2.Close() - - const buffered = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" - const trailing = "DATA-AFTER-PREFACE" - pc := &prefaceConn{Conn: c1, buf: []byte(buffered)} - - // Writer goroutine pushes the trailing payload to the underlying conn, - // then closes c2 so io.ReadAll terminates after draining the wrapper's - // buffer and the trailing bytes from c1. - go func() { - _, _ = c2.Write([]byte(trailing)) - _ = c2.Close() - }() - - out, err := io.ReadAll(pc) - require.NoError(t, err) - require.Equal(t, buffered+trailing, string(out)) -} - func TestReadPrefaceWithTimeoutSuccess(t *testing.T) { c1, c2 := net.Pipe() defer c1.Close() @@ -82,9 +57,7 @@ func TestReadPrefaceWithTimeoutSuccess(t *testing.T) { _, _ = c2.Write(preface) }() - got, err := readPrefaceWithTimeout(c1, 5*time.Second) - require.NoError(t, err) - require.Equal(t, preface, got) + require.NoError(t, readPrefaceWithTimeout(c1, 5*time.Second)) } func TestReadPrefaceWithTimeoutFiresOnSlowSender(t *testing.T) { @@ -93,7 +66,7 @@ func TestReadPrefaceWithTimeoutFiresOnSlowSender(t *testing.T) { // Sender writes nothing; the timer race must fire and close c1. start := time.Now() - _, err := readPrefaceWithTimeout(c1, 50*time.Millisecond) + err := readPrefaceWithTimeout(c1, 50*time.Millisecond) elapsed := time.Since(start) require.ErrorIs(t, err, errPrefaceReadTimeout) @@ -106,6 +79,21 @@ func TestReadPrefaceWithTimeoutFiresOnSlowSender(t *testing.T) { require.Error(t, werr) } +func TestReadPrefaceWithTimeoutMismatch(t *testing.T) { + c1, c2 := net.Pipe() + defer c1.Close() + defer c2.Close() + + // Sender writes 24 bytes that do NOT match the HTTP/2 client preface. + bogus := []byte("NOT-A-VALID-PREFACE-XXXX") + require.Equal(t, len("PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"), len(bogus)) + go func() { + _, _ = c2.Write(bogus) + }() + + require.ErrorIs(t, readPrefaceWithTimeout(c1, 5*time.Second), errPrefaceMismatch) +} + func TestPrefaceReadTimeoutEnvOverride(t *testing.T) { t.Setenv(prefaceTimeoutEnvVar, "") require.Equal(t, defaultPrefaceTimeout, prefaceReadTimeout()) From 89d9fb2356f892a595950fd72d04a96485e552e1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 15:48:33 +0000 Subject: [PATCH 6/8] cmd/buildkitd: register meter provider globally for otel.Meter callers newMeterProvider already wires the Prometheus reader and the optional OTLP metric exporter, but the resulting *sdkmetric.MeterProvider was only handed to the otelgrpc stats handler. Code paths that look up a meter via the package-level otel.Meter("...") (e.g. the new frontend/gateway preface metrics) were therefore receiving the no-op global meter, and their recordings were silently dropped before reaching the Prometheus reader. Call otel.SetMeterProvider(mp) right after the provider is constructed so otel.Meter(...) returns the real, configured provider. While here, refresh the now-stale gateway preface comment block: after the SawClientPreface refactor there is no wrapped net.Conn / replay path, the gateway hands the already-drained conn straight to http2.Server.ServeConn. Co-Authored-By: Elton --- cmd/buildkitd/main.go | 8 ++++++++ frontend/gateway/gateway.go | 8 +++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cmd/buildkitd/main.go b/cmd/buildkitd/main.go index 7de17c050559..9827f07b1805 100644 --- a/cmd/buildkitd/main.go +++ b/cmd/buildkitd/main.go @@ -68,6 +68,7 @@ import ( "github.com/sirupsen/logrus" "github.com/urfave/cli" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/prometheus" "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" @@ -297,6 +298,13 @@ func main() { } closers = append(closers, mp.Shutdown) + // Register the meter provider globally so that callers using + // otel.Meter(...) (e.g. frontend/gateway preface metrics) see + // the configured Prometheus + OTLP readers instead of the + // no-op default. Without this, otel.Meter(...) returns a + // no-op meter and instrument recordings are silently dropped. + otel.SetMeterProvider(mp) + statsHandler := tracing.ServerStatsHandler( otelgrpc.WithTracerProvider(tp), otelgrpc.WithMeterProvider(mp), diff --git a/frontend/gateway/gateway.go b/frontend/gateway/gateway.go index 88163449fcc2..ded060cae0ae 100644 --- a/frontend/gateway/gateway.go +++ b/frontend/gateway/gateway.go @@ -1735,9 +1735,11 @@ func (lbf *llbBridgeForwarder) cloneRef(id string) (solver.ResultProxy, error) { // and the build fails with `frontend grpc server closed unexpectedly`. // // The gateway now reads the 24-byte client preface itself with a -// configurable, longer-by-default deadline, then replays it under a -// wrapped net.Conn so http2.Server.ServeConn observes the preface -// instantly from memory and proceeds normally. +// configurable, longer-by-default deadline, validates it, and then +// hands the already-drained net.Conn straight to http2.Server.ServeConn +// with ServeConnOpts.SawClientPreface=true. With that flag set, the +// vendored readPreface() returns nil immediately and skips the +// hard-coded 10s prefaceTimeout entirely. const ( // httpClientPreface is the 24-byte HTTP/2 client connection preface, // as defined in RFC 7540 §3.5 and exported in golang.org/x/net/http2 From 96bd5795cc8d79597e1b9c161c0a790d375d1783 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 16:12:22 +0000 Subject: [PATCH 7/8] frontend/gateway: close all conn fds to unblock preface read goroutine The *conn type embeds io.Reader, io.Writer and io.Closer separately. In production newPipe builds them from two os.Pipe() pairs: the Reader and Closer are different ends of *different* pipes, and their other ends are passed to the spawned frontend container's runc process via inherited Stdin/Stdout. The previously promoted Close (from the embedded io.Closer) only closed one fd, which is not sufficient to unblock a Read parked on the Reader fd because the child process holds a duplicate fd of the matching write end. readPrefaceWithTimeout's timeout path relied on conn.Close to unblock the io.ReadFull goroutine; in production it did not, leaving the goroutine parked until the container shut down. The unit tests exercising this path used net.Pipe (which closes both directions on Close) and so masked the production behavior. Define a Close method on *conn that closes Reader and Writer in addition to Closer. Add tests that reproduce the production fd shape using os.Pipe + dup(2) so the leak path is exercised in CI. Co-Authored-By: Elton --- frontend/gateway/dup_unix_test.go | 13 ++++ frontend/gateway/dup_windows_test.go | 11 +++ frontend/gateway/gateway.go | 50 ++++++++++++- frontend/gateway/gateway_test.go | 104 +++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 frontend/gateway/dup_unix_test.go create mode 100644 frontend/gateway/dup_windows_test.go diff --git a/frontend/gateway/dup_unix_test.go b/frontend/gateway/dup_unix_test.go new file mode 100644 index 000000000000..845c477cc2f5 --- /dev/null +++ b/frontend/gateway/dup_unix_test.go @@ -0,0 +1,13 @@ +//go:build !windows + +package gateway + +import "syscall" + +// dupFD returns a duplicate of the given file descriptor. Used only by +// tests in this package to simulate the runc-spawned child process +// holding an inherited fd of one end of an os.Pipe pair, so that +// closing the parent's end does not cause the other end to see EOF. +func dupFD(fd int) (int, error) { + return syscall.Dup(fd) +} diff --git a/frontend/gateway/dup_windows_test.go b/frontend/gateway/dup_windows_test.go new file mode 100644 index 000000000000..32f4ce7af769 --- /dev/null +++ b/frontend/gateway/dup_windows_test.go @@ -0,0 +1,11 @@ +//go:build windows + +package gateway + +import "errors" + +// dupFD is a stub on windows; tests that call it gate themselves on +// runtime.GOOS != "windows" and do not exercise this path. +func dupFD(fd int) (int, error) { + return 0, errors.New("dupFD not supported on windows") +} diff --git a/frontend/gateway/gateway.go b/frontend/gateway/gateway.go index ded060cae0ae..39960eaed49c 100644 --- a/frontend/gateway/gateway.go +++ b/frontend/gateway/gateway.go @@ -505,6 +505,42 @@ type conn struct { io.Closer } +// Close closes all three underlying file descriptors, not just the +// io.Closer field. This matters because newPipe builds the conn from +// two os.Pipe() pairs: the Reader and Closer are different ends of +// different pipes (pr2 and pw2 respectively), and the other ends +// (pw2 inherited as Stdout, pr1 inherited as Stdin) are passed to the +// spawned frontend container's runc process. Closing only the Closer +// (the previous, promoted-method behavior) does not cause the +// in-process Reader to see EOF, because the frontend process still +// holds a duplicate fd of that pipe end via its inherited Stdout. +// readPrefaceWithTimeout's timeout path relies on Close to unblock +// its io.ReadFull goroutine reading from Reader; closing the Reader +// fd directly causes any in-flight or subsequent Read to return +// immediately with a closed-pipe error, which is the documented +// behavior of os.File.Close while another goroutine is in Read. +// +// Concurrent and repeated calls are safe: os.File handles internal +// synchronisation, and existing call sites (defer lbf.conn.Close, +// the ctx.Done goroutine in serve, and read-error paths) ignore the +// returned error. +func (s *conn) Close() error { + var firstErr error + closeIfCloser := func(v any) { + if c, ok := v.(io.Closer); ok { + if err := c.Close(); err != nil && firstErr == nil { + firstErr = err + } + } + } + closeIfCloser(s.Reader) + closeIfCloser(s.Writer) + if err := s.Closer.Close(); err != nil && firstErr == nil { + firstErr = err + } + return firstErr +} + func (s *conn) LocalAddr() net.Addr { return dummyAddr{} } @@ -1807,7 +1843,7 @@ var ( // readPrefaceWithTimeout reads exactly len(httpClientPreface) bytes from // conn or returns an error after timeout. Mirrors the timer-race // approach in vendored x/net/http2/server.go:readPreface, which is -// required because the gateway's pipe-based net.Conn (see *pipe.conn) +// required because the gateway's pipe-based net.Conn (see *conn above) // overrides the deadline setters to no-op so SetReadDeadline cannot be // used to bound the read. The returned bytes are validated against // httpClientPreface; a mismatch returns errPrefaceMismatch. @@ -1822,8 +1858,16 @@ func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) error { defer timer.Stop() select { case <-timer.C: - // Close the underlying connection to unblock the goroutine's - // io.ReadFull. errc is buffered so the goroutine will not leak. + // Close the underlying connection to unblock the io.ReadFull + // goroutine reading from conn. *conn.Close (defined above) + // closes the Reader fd directly, which causes the in-flight + // Read to return immediately with a closed-pipe error so the + // goroutine returns and writes to the buffered errc channel. + // errc is sized 1 so even if conn.Close were a no-op (e.g. a + // hypothetical net.Conn implementation that doesn't unblock + // Read on Close), the goroutine would still not leak: it would + // remain parked on the Read until the underlying transport + // closes, then write to the already-buffered channel and exit. _ = conn.Close() return errPrefaceReadTimeout case err := <-errc: diff --git a/frontend/gateway/gateway_test.go b/frontend/gateway/gateway_test.go index 30eb6729c88f..be788abd053d 100644 --- a/frontend/gateway/gateway_test.go +++ b/frontend/gateway/gateway_test.go @@ -1,7 +1,11 @@ package gateway import ( + "errors" + "io" "net" + "os" + "runtime" "testing" "time" @@ -94,6 +98,106 @@ func TestReadPrefaceWithTimeoutMismatch(t *testing.T) { require.ErrorIs(t, readPrefaceWithTimeout(c1, 5*time.Second), errPrefaceMismatch) } +// TestConnCloseUnblocksReaderInPipeMode reproduces the production conn +// shape (Reader and Closer are different ends of different os.Pipe +// pairs, with the "other ends" held by a separate process via +// inherited fds) and asserts that *conn.Close unblocks an in-flight +// Read on the Reader side. Without an explicit Close on the Reader fd, +// closing only the Closer leaves the read parked because the +// duplicate fd (modeled here by a kept-alive pw2Dup) prevents the +// kernel from sending EOF on the read side. This is the goroutine-leak +// failure mode in readPrefaceWithTimeout's timeout path. +func TestConnCloseUnblocksReaderInPipeMode(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("os.Pipe semantics around dup'd fds differ on windows; production runc-spawn path is linux-only") + } + + pr1, pw1, err := os.Pipe() + require.NoError(t, err) + pr2, pw2, err := os.Pipe() + require.NoError(t, err) + + // Simulate the runc-spawned frontend's inherited Stdout: a duplicate + // fd of pw2 held by the "child". In production this is the runc + // process's Stdout; here we hold a separate *os.File pointing at the + // same pipe write end via dup(). While this fd is open, closing pw2 + // alone in the parent does not cause pr2 to see EOF. + pw2Fd, err := dupFD(int(pw2.Fd())) + require.NoError(t, err) + pw2Dup := os.NewFile(uintptr(pw2Fd), "pw2-dup") + defer pw2Dup.Close() + + c := &conn{ + Reader: pr2, + Writer: pw1, + Closer: pw2, + } + + // Sanity: keep pr1 alive so the Writer side has a peer; otherwise + // the parent's first write would EPIPE in unrelated test paths. + _ = pr1 + + // Start a Read that will block until either data arrives on pr2 or + // pr2 is closed. In production this is the io.ReadFull goroutine + // inside readPrefaceWithTimeout. + readDone := make(chan error, 1) + go func() { + buf := make([]byte, 24) + _, err := io.ReadFull(c, buf) + readDone <- err + }() + + // Give the goroutine a moment to actually park on Read. + time.Sleep(50 * time.Millisecond) + + // Close the conn. The fix asserts that this closes the Reader fd + // (pr2) and unblocks io.ReadFull, even though the simulated child + // process (pw2Dup) still holds a writer-side fd open. + require.NoError(t, c.Close()) + + select { + case err := <-readDone: + // io.ReadFull returns either io.ErrUnexpectedEOF or a closed-pipe + // error depending on which side wins the close race; both are + // acceptable for the leak-fix assertion (the goroutine exited). + require.Error(t, err) + require.True(t, + errors.Is(err, io.ErrUnexpectedEOF) || + errors.Is(err, os.ErrClosed) || + errors.Is(err, io.EOF), + "expected close-pipe-style error, got %v", err) + case <-time.After(2 * time.Second): + t.Fatal("io.ReadFull goroutine still blocked 2s after conn.Close; goroutine-leak fix regressed") + } +} + +// TestConnCloseIsIdempotent asserts that calling Close more than once +// (which happens in production: defer lbf.conn.Close, the ctx.Done +// goroutine in serve, and read-error paths can all fire) does not +// panic and is safe. +func TestConnCloseIsIdempotent(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("os.Pipe semantics differ on windows") + } + + pr1, pw1, err := os.Pipe() + require.NoError(t, err) + pr2, pw2, err := os.Pipe() + require.NoError(t, err) + defer pr1.Close() + + c := &conn{ + Reader: pr2, + Writer: pw1, + Closer: pw2, + } + + require.NoError(t, c.Close()) + // Second close should not panic. os.File.Close on an already-closed + // file returns an error that callers (defer/ignore) can drop. + _ = c.Close() +} + func TestPrefaceReadTimeoutEnvOverride(t *testing.T) { t.Setenv(prefaceTimeoutEnvVar, "") require.Equal(t, defaultPrefaceTimeout, prefaceReadTimeout()) From b8ab1833ac5e7e6bad7f834c0043d0b1acc2edff Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 16:17:52 +0000 Subject: [PATCH 8/8] frontend/gateway: silence gopls unusedparams in windows dup stub Co-Authored-By: Elton --- frontend/gateway/dup_windows_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/gateway/dup_windows_test.go b/frontend/gateway/dup_windows_test.go index 32f4ce7af769..80739e25f3ff 100644 --- a/frontend/gateway/dup_windows_test.go +++ b/frontend/gateway/dup_windows_test.go @@ -6,6 +6,6 @@ import "errors" // dupFD is a stub on windows; tests that call it gate themselves on // runtime.GOOS != "windows" and do not exercise this path. -func dupFD(fd int) (int, error) { +func dupFD(_ int) (int, error) { return 0, errors.New("dupFD not supported on windows") }