Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cmd/buildkitd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import (
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
Expand Down Expand Up @@ -297,6 +298,13 @@ func main() {
}
closers = append(closers, mp.Shutdown)

// Register the meter provider globally so that callers using
// otel.Meter(...) (e.g. frontend/gateway preface metrics) see
// the configured Prometheus + OTLP readers instead of the
// no-op default. Without this, otel.Meter(...) returns a
// no-op meter and instrument recordings are silently dropped.
otel.SetMeterProvider(mp)

statsHandler := tracing.ServerStatsHandler(
otelgrpc.WithTracerProvider(tp),
otelgrpc.WithMeterProvider(mp),
Expand Down
6 changes: 6 additions & 0 deletions executor/runcexecutor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,12 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
trace.SpanFromContext(ctx).AddEvent("Container created")
err = w.run(ctx, id, bundle, process, func() {
startedOnce.Do(func() {
// Mirrors the existing "> creating" log line above. The
// delta between "creating" and "started" is the runc
// bundle setup + exec latency, which is the dominant
// budget consumer for the gateway HTTP/2 preface deadline
// when the storage backend is contended.
bklog.G(ctx).Debugf("> started %s %v", id, meta.Args)
trace.SpanFromContext(ctx).AddEvent("Container started")
if started != nil {
close(started)
Expand Down
13 changes: 13 additions & 0 deletions frontend/gateway/dup_unix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//go:build !windows

package gateway

import "syscall"

// dupFD returns a duplicate of the given file descriptor. Used only by
// tests in this package to simulate the runc-spawned child process
// holding an inherited fd of one end of an os.Pipe pair, so that
// closing the parent's end does not cause the other end to see EOF.
func dupFD(fd int) (int, error) {
return syscall.Dup(fd)
}
11 changes: 11 additions & 0 deletions frontend/gateway/dup_windows_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build windows

package gateway

import "errors"

// dupFD is a stub on windows; tests that call it gate themselves on
// runtime.GOOS != "windows" and do not exercise this path.
func dupFD(_ int) (int, error) {
return 0, errors.New("dupFD not supported on windows")
}
208 changes: 206 additions & 2 deletions frontend/gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ import (
digest "github.com/opencontainers/go-digest"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"golang.org/x/net/http2"
"golang.org/x/sync/errgroup"
spb "google.golang.org/genproto/googleapis/rpc/status"
Expand Down Expand Up @@ -502,6 +505,42 @@ type conn struct {
io.Closer
}

// Close closes all three underlying file descriptors, not just the
// io.Closer field. This matters because newPipe builds the conn from
// two os.Pipe() pairs: the Reader and Closer are different ends of
// different pipes (pr2 and pw2 respectively), and the other ends
// (pw2 inherited as Stdout, pr1 inherited as Stdin) are passed to the
// spawned frontend container's runc process. Closing only the Closer
// (the previous, promoted-method behavior) does not cause the
// in-process Reader to see EOF, because the frontend process still
// holds a duplicate fd of that pipe end via its inherited Stdout.
// readPrefaceWithTimeout's timeout path relies on Close to unblock
// its io.ReadFull goroutine reading from Reader; closing the Reader
// fd directly causes any in-flight or subsequent Read to return
// immediately with a closed-pipe error, which is the documented
// behavior of os.File.Close while another goroutine is in Read.
//
// Concurrent and repeated calls are safe: os.File handles internal
// synchronisation, and existing call sites (defer lbf.conn.Close,
// the ctx.Done goroutine in serve, and read-error paths) ignore the
// returned error.
func (s *conn) Close() error {
var firstErr error
closeIfCloser := func(v any) {
if c, ok := v.(io.Closer); ok {
if err := c.Close(); err != nil && firstErr == nil {
firstErr = err
}
}
}
closeIfCloser(s.Reader)
closeIfCloser(s.Writer)
if err := s.Closer.Close(); err != nil && firstErr == nil {
firstErr = err
}
return firstErr
}

func (s *conn) LocalAddr() net.Addr {
return dummyAddr{}
}
Expand Down Expand Up @@ -1720,13 +1759,178 @@ func (lbf *llbBridgeForwarder) cloneRef(id string) (solver.ResultProxy, error) {
return s2, nil
}

// HTTP/2 client preface handling for gateway connections.
//
// The vendored golang.org/x/net/http2 package enforces a hard-coded 10s
// preface timeout in http2.Server.ServeConn (see
// vendor/golang.org/x/net/http2/server.go: const prefaceTimeout). When
// buildkitd spawns a frontend container (e.g. dockerfile-frontend) and
// slow runc bundle setup, overlay-mount, or cold-cache rootfs reads
// delay the frontend's first preface bytes past 10s, ServeConn aborts
// with errPrefaceTimeout, the gateway tears down the gRPC connection,
// and the build fails with `frontend grpc server closed unexpectedly`.
//
// The gateway now reads the 24-byte client preface itself with a
// configurable, longer-by-default deadline, validates it, and then
// hands the already-drained net.Conn straight to http2.Server.ServeConn
// with ServeConnOpts.SawClientPreface=true. With that flag set, the
// vendored readPreface() returns nil immediately and skips the
// hard-coded 10s prefaceTimeout entirely.
const (
// httpClientPreface is the 24-byte HTTP/2 client connection preface,
// as defined in RFC 7540 §3.5 and exported in golang.org/x/net/http2
// as ClientPreface. The format is fixed by spec.
httpClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"

// defaultPrefaceTimeout is the default ceiling on how long the
// gateway waits for the frontend container to send its HTTP/2 client
// preface. Significantly longer than the vendored 10s upstream
// prefaceTimeout to absorb slow runc bundle setup and cold-cache
// rootfs reads on contended storage backends.
defaultPrefaceTimeout = 60 * time.Second

// prefaceTimeoutEnvVar lets operators tune the preface wait without a
// binary rebuild. Value is parsed as a Go time.Duration string,
// e.g. "30s", "2m". Invalid or non-positive values fall back to
// defaultPrefaceTimeout.
prefaceTimeoutEnvVar = "BUILDKIT_GATEWAY_PREFACE_TIMEOUT"
)

var (
gatewayMeter = otel.Meter("github.com/moby/buildkit/frontend/gateway")
gatewayMetricsOnce sync.Once
prefaceWaitHistogram metric.Float64Histogram
prefaceErrorCounter metric.Int64Counter
Comment thread
cursor[bot] marked this conversation as resolved.
)

func initGatewayMetrics() {
gatewayMetricsOnce.Do(func() {
if h, err := gatewayMeter.Float64Histogram(
"buildkit_frontend_preface_wait_seconds",
metric.WithDescription("Time the gateway waited for the HTTP/2 client preface from the frontend container after spawning it."),
metric.WithUnit("s"),
); err == nil {
prefaceWaitHistogram = h
}
if c, err := gatewayMeter.Int64Counter(
"buildkit_frontend_preface_error_total",
metric.WithDescription("Number of times the gateway failed to receive the HTTP/2 client preface from the frontend container, partitioned by reason."),
); err == nil {
prefaceErrorCounter = c
}
})
}

// prefaceReadTimeout returns the configured timeout for the gateway's
// HTTP/2 client preface pre-read. Defaults to defaultPrefaceTimeout,
// overridable via the BUILDKIT_GATEWAY_PREFACE_TIMEOUT environment
// variable.
func prefaceReadTimeout() time.Duration {
if v := os.Getenv(prefaceTimeoutEnvVar); v != "" {
if d, err := time.ParseDuration(v); err == nil && d > 0 {
return d
}
bklog.L.Warnf("frontend gateway: invalid %s=%q, falling back to %s", prefaceTimeoutEnvVar, v, defaultPrefaceTimeout)
}
return defaultPrefaceTimeout
}

var (
errPrefaceReadTimeout = errors.New("timed out reading HTTP/2 client preface from frontend container")
errPrefaceMismatch = errors.New("frontend container did not send a valid HTTP/2 client preface")
)

// readPrefaceWithTimeout reads exactly len(httpClientPreface) bytes from
// conn or returns an error after timeout. Mirrors the timer-race
// approach in vendored x/net/http2/server.go:readPreface, which is
// required because the gateway's pipe-based net.Conn (see *conn above)
// overrides the deadline setters to no-op so SetReadDeadline cannot be
// used to bound the read. The returned bytes are validated against
// httpClientPreface; a mismatch returns errPrefaceMismatch.
func readPrefaceWithTimeout(conn net.Conn, timeout time.Duration) error {
buf := make([]byte, len(httpClientPreface))
errc := make(chan error, 1)
go func() {
_, err := io.ReadFull(conn, buf)
errc <- err
}()
timer := time.NewTimer(timeout)
defer timer.Stop()
select {
case <-timer.C:
// Close the underlying connection to unblock the io.ReadFull
// goroutine reading from conn. *conn.Close (defined above)
// closes the Reader fd directly, which causes the in-flight
// Read to return immediately with a closed-pipe error so the
// goroutine returns and writes to the buffered errc channel.
// errc is sized 1 so even if conn.Close were a no-op (e.g. a
// hypothetical net.Conn implementation that doesn't unblock
// Read on Close), the goroutine would still not leak: it would
// remain parked on the Read until the underlying transport
// closes, then write to the already-buffered channel and exit.
_ = conn.Close()
return errPrefaceReadTimeout
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
case err := <-errc:
if err != nil {
return err
}
if string(buf) != httpClientPreface {
return errPrefaceMismatch
}
return nil
}
}

func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) {
initGatewayMetrics()
go func() {
<-ctx.Done()
conn.Close()
}()
bklog.G(ctx).Debugf("serving grpc connection")
(&http2.Server{}).ServeConn(conn, &http2.ServeConnOpts{Handler: grpcServer})

timeout := prefaceReadTimeout()
start := time.Now()
err := readPrefaceWithTimeout(conn, timeout)
elapsed := time.Since(start)

logger := bklog.G(ctx).
WithField("preface_wait_ms", elapsed.Milliseconds()).
WithField("preface_timeout_ms", timeout.Milliseconds())

if err != nil {
reason := "read_error"
switch {
case errors.Is(err, errPrefaceReadTimeout):
reason = "timeout"
case errors.Is(err, errPrefaceMismatch):
reason = "mismatch"
case errors.Is(err, io.EOF), errors.Is(err, io.ErrUnexpectedEOF):
reason = "eof"
Comment thread
cursor[bot] marked this conversation as resolved.
}
if prefaceErrorCounter != nil {
prefaceErrorCounter.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason)))
}
logger.WithField("preface_error_reason", reason).
Warnf("frontend serve: failed to read HTTP/2 client preface from frontend container: %v", err)
// ensure conn is closed so the upstream goroutine observes the
// failure as server-closed rather than blocking on Run()
_ = conn.Close()
return
}

if prefaceWaitHistogram != nil {
prefaceWaitHistogram.Record(ctx, elapsed.Seconds())
}
logger.Debugf("frontend serve: received HTTP/2 client preface")

// SawClientPreface tells http2.Server.ServeConn that the 24-byte
// HTTP/2 client preface has already been consumed from conn, so its
// internal readPreface() returns nil immediately and skips the
// hard-coded 10s prefaceTimeout.
(&http2.Server{}).ServeConn(conn, &http2.ServeConnOpts{
Handler: grpcServer,
SawClientPreface: true,
})
}

type markTypeFrontend struct{}
Expand Down
Loading
Loading