Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CubeAPI/src/services/sandboxes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const RET_CODE_HTTP_OK: i32 = 200;
const RET_CODE_NOT_FOUND: i32 = 130404;
const RET_CODE_CONFLICT: i32 = 130409;
const HOSTDIR_MOUNT_KEY: &str = "host-mount";
/// Annotation carrying create-time env_vars as a JSON object string {"K":"V"}.
/// The cube.master prefix ensures CubeMaster forwards it to Cubelet.
const CREATE_ENV_VARS_ANNOTATION: &str = "cube.master.sandbox.create_env_vars";

#[derive(Clone)]
pub struct SandboxService {
Expand Down Expand Up @@ -135,6 +138,16 @@ impl SandboxService {
meta
});

// Carry create-time env_vars as sandbox runtime metadata for Cubelet:
// once the sandbox is ready Cubelet injects them through envd's native
// POST /init, so later commands.run can read them (same as E2B), without
// writing to rootfs/profile or the OCI container spec. The annotation uses
// the cube.master prefix so CubeMaster forwards it to Cubelet automatically.
if let Some(env_vars) = body.env_vars.as_ref().filter(|m| !m.is_empty()) {
let encoded = serde_json::to_string(env_vars).map_err(internal_error)?;
annotations.insert(CREATE_ENV_VARS_ANNOTATION.to_string(), encoded);
}

let cube_network_config =
build_cube_network_config(body.allow_internet_access, body.network.as_ref())?;

Expand Down
8 changes: 7 additions & 1 deletion CubeMaster/pkg/base/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,13 @@ const (
CubeAnnotationsFallbackToSlowPath = "cube.master.fallback_to_slow_path"
CubeAnnotationsSystemDiskSize = "cube.master.system_disk_size"

CubeAnnotationsAppSnapshotCreate = "cube.master.appsnapshot.create"
CubeAnnotationsAppSnapshotCreate = "cube.master.appsnapshot.create"
// CubeAnnotationSandboxCreateEnvVars carries create-time env_vars as a JSON
// object string. It is written by CubeAPI and consumed by Cubelet (which
// injects them via envd /init). As per-invocation runtime metadata it must be
// stripped on template commit so instance-level secrets are not persisted
// into template snapshots.
CubeAnnotationSandboxCreateEnvVars = "cube.master.sandbox.create_env_vars"
CubeAnnotationAppSnapshotTemplateID = "cube.master.appsnapshot.template.id"
CubeAnnotationAppSnapshotVersion = "cube.master.appsnapshot.version"
CubeAnnotationAppSnapshotTemplateVersion = "cube.master.appsnapshot.template.version"
Expand Down
3 changes: 3 additions & 0 deletions CubeMaster/pkg/templatecenter/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ func normalizeStoredTemplateRequest(req *sandboxtypes.CreateCubeSandboxReq) (*sa
// longer exist).
delete(cloned.Annotations, constants.CubeAnnotationRuntimeSnapshotID)
delete(cloned.Annotations, constants.CubeAnnotationRuntimeSnapshotAttachedAt)
// Create-time env_vars are per-instance runtime metadata (may contain
// secrets) and must not be persisted with the template snapshot.
delete(cloned.Annotations, constants.CubeAnnotationSandboxCreateEnvVars)
cloned.Annotations[constants.CubeAnnotationAppSnapshotTemplateID] = templateID
return cloned, nil
}
Expand Down
6 changes: 6 additions & 0 deletions Cubelet/services/cubebox/cube_container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,12 @@ func (l *local) createContainers(ctx context.Context, flowOpts *workflow.CreateC
if err := l.doProbe(param.ctxTmp, param.cntrReq, param.ci); err != nil {
return err
}
// Once the sandbox is ready, inject create-time env_vars into the guest
// envd (via its native /init) so later commands.run / run_code can read
// them. No-op when the annotation is absent.
if err := l.syncCreateEnvToEnvd(param.ctxTmp, sandBox, param.ci); err != nil {
return err
Comment on lines +337 to +338

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance: syncCreateEnvToEnvd is called inside the per-container loop, should be per-sandbox

syncCreateEnvToEnvd calls POST /init on the envd instance inside the guest — and envd is a single instance per sandbox, not per container. Calling it N times for an N-container sandbox is redundant and multiplies latency: with the 15-second retry budget (envdInitMaxWait), a 3-container sandbox could stall for up to 45s in the worst case.

This should be moved outside the for _, param := range params loop, called once after all containers have been created and probed. Since syncCreateEnvToEnvd uses ci.IP as the primary IP source (falling back to sandBox.IP), you'd need to resolve the IP outside the loop — but the first/probe container's IP should be sufficient for the envd connection.

}
err = l.cbriManager.PostCreateContainer(ctx, sandBox, param.ci)
if err != nil {
containerLog.Errorf("post create container failed, err: %v", err)
Expand Down
154 changes: 154 additions & 0 deletions Cubelet/services/cubebox/envd_init.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// Copyright (c) 2024 Tencent Inc.
// SPDX-License-Identifier: Apache-2.0
//

package cubebox

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"strconv"
"time"

"github.com/tencentcloud/CubeSandbox/Cubelet/pkg/log"
cubeboxstore "github.com/tencentcloud/CubeSandbox/Cubelet/pkg/store/cubebox"
)

const (
// createEnvVarsAnnotation is the contract with CubeAPI: it carries the
// create-time env_vars as a JSON object string {"K":"V"}. CubeMaster forwards
// it here automatically via the cube.master prefix passthrough.
createEnvVarsAnnotation = "cube.master.sandbox.create_env_vars"
// envdServerPort is the HTTP port envd listens on inside the guest
// (E2B envd is fixed at 49983).
envdServerPort = 49983

// Retry budget for /init: envd may not be listening yet right after the
// sandbox becomes ready, so we short-poll until it is up or we time out.
envdInitMaxWait = 15 * time.Second
envdInitInterval = 200 * time.Millisecond
envdInitReqTimeout = 3 * time.Second
)

// envdInitBody is the envd POST /init request body (e2b-dev/infra). We only use
// envVars and timestamp here:
// - envVars: stored into envd's global defaults.EnvVars and merged into child
// process environments on Process/Start;
// - timestamp: used by envd to accept only newer requests, and to correct the
// guest clock as a side effect.
type envdInitBody struct {
EnvVars map[string]string `json:"envVars,omitempty"`
Timestamp string `json:"timestamp,omitempty"`
}

// syncCreateEnvToEnvd injects the create-time env_vars (carried via annotation)
// into the guest envd.
//
// It mirrors the E2B orchestrator initEnvd flow: the host side connects directly
// to the guest's ip:49983 and calls the native POST /init. envd stores these
// variables into its global defaults.EnvVars, so processes it later starts
// (commands.run / run_code) inherit them (precedence: global < per-command, where
// per-command can override). envd merges additively and never clears existing
// entries, so any env already present in the runtime is preserved.
//
// Nothing is written to rootfs/profile and nothing goes into the OCI container
// spec, so neither the image/template nor the template snapshot is polluted with
// instance-level secrets. Sandboxes without create env are a no-op.
func (l *local) syncCreateEnvToEnvd(ctx context.Context, sandBox *cubeboxstore.CubeBox,
ci *cubeboxstore.Container) error {
raw, ok := sandBox.Annotations[createEnvVarsAnnotation]
if !ok || raw == "" {
return nil
}

envVars := map[string]string{}
if err := json.Unmarshal([]byte(raw), &envVars); err != nil {
return fmt.Errorf("parse create env_vars annotation failed: %w", err)
}
if len(envVars) == 0 {
return nil
}

ip := ci.IP
if ip == "" || ip == "<nil>" {
ip = sandBox.IP
}
if ip == "" || ip == "<nil>" {
return fmt.Errorf("sync create env to envd: empty sandbox IP")
}

addr := fmt.Sprintf("http://%s/init", net.JoinHostPort(ip, strconv.Itoa(envdServerPort)))
body, err := json.Marshal(envdInitBody{
EnvVars: envVars,
Timestamp: time.Now().UTC().Format(time.RFC3339Nano),
})
if err != nil {
return fmt.Errorf("marshal envd init body failed: %w", err)
}

start := time.Now()
deadline := start.Add(envdInitMaxWait)
client := &http.Client{Timeout: envdInitReqTimeout}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code quality: HTTP client created per invocation, no connection reuse

A new http.Client (with a default http.Transport) is created every time syncCreateEnvToEnvd runs. The default transport maintains an idle connection pool, but since the client is discarded after the function returns, those connections leak until the idle timer GCs them.

Suggestion: Lift client to a package-level or struct-level cached variable:

var envdHTTPClient = &http.Client{
    Timeout: envdInitReqTimeout,
    Transport: &http.Transport{
        MaxIdleConnsPerHost: 4,
        IdleConnTimeout:     30 * time.Second,
        DialContext: (&net.Dialer{
            Timeout:   2 * time.Second,
        }).DialContext,
    },
}


var (
lastErr error
attempt int
)
for {
attempt++
lastErr = postEnvdInit(ctx, client, addr, body)
if lastErr == nil {
log.G(ctx).Infof("envd /init ok: sandbox=%s ip=%s vars=%d attempts=%d cost=%v",
sandBox.SandboxID, ip, len(envVars), attempt, time.Since(start))
return nil
}

if ctx.Err() != nil {
lastErr = ctx.Err()
break
}
if time.Now().After(deadline) {
break
}
select {
case <-ctx.Done():
lastErr = ctx.Err()
case <-time.After(envdInitInterval):
}
if ctx.Err() != nil {
break
}
}
Comment on lines +111 to +126

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code quality: Redundant context-cancellation checks in retry loop

The retry loop checks ctx.Err() at three points (lines 111, 119, 123) with slightly different control flow, making it harder to reason about exit paths. When ctx.Done() fires in the select at line 119, lastErr is set but the loop doesn't break — it falls through to the check at line 123, which then breaks. This works but is roundabout.

Suggestion: Simplify to a for + select pattern:

for time.Since(start) < envdInitMaxWait {
    attempt++
    if err := postEnvdInit(ctx, client, addr, body); err == nil {
        // log success...
        return nil
    }
    lastErr = err
    select {
    case <-ctx.Done():
        lastErr = ctx.Err()
    case <-time.After(envdInitInterval):
    }
}
return fmt.Errorf("envd /init failed: ...")

This eliminates the three-way context check and makes the control flow self-documenting.


return fmt.Errorf("envd /init failed: sandbox=%s ip=%s attempts=%d cost=%v: %w",
sandBox.SandboxID, ip, attempt, time.Since(start), lastErr)
}

// postEnvdInit sends one /init request; only 204/200 is treated as success.
func postEnvdInit(ctx context.Context, client *http.Client, addr string, body []byte) error {
reqCtx, cancel := context.WithTimeout(ctx, envdInitReqTimeout)
defer cancel()

req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, addr, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("build envd init request failed: %w", err)
}
req.Header.Set("Content-Type", "application/json")

resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
io.Copy(io.Discard, resp.Body)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code quality: io.Copy error silently discarded

io.Copy(io.Discard, resp.Body) drains the body for connection reuse, but its error return is ignored. If the body is truncated or the TCP connection is reset mid-read, this error is invisible. Consider logging the error at debug/trace level.

Also, defer resp.Body.Close() runs after the function returns — if you want to see a Close error as well, a named return or explicit close would help, though the body is expected to be small for this endpoint.


if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK {
return fmt.Errorf("envd /init unexpected status %d", resp.StatusCode)
}
return nil
}
Loading