Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/api/src/services/nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ export async function provisionNode(
controlPlaneUrl: `https://api.${env.BASE_DOMAIN}`,
jwksUrl: `https://api.${env.BASE_DOMAIN}/.well-known/jwks.json`,
callbackToken,
provider: targetProvider,
logJournalMaxUse: env.LOG_JOURNAL_MAX_USE,
logJournalKeepFree: env.LOG_JOURNAL_KEEP_FREE,
logJournalMaxRetention: env.LOG_JOURNAL_MAX_RETENTION,
Expand Down
12 changes: 12 additions & 0 deletions packages/cloud-init/src/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ export function validateCloudInitVariables(variables: CloudInitVariables): void
errors.push(`taskMode: must be 'task' or 'conversation' (got ${JSON.stringify(variables.taskMode)})`);
}
}
if (variables.provider !== undefined && variables.provider !== '') {
if (!VALID_CLOUD_PROVIDERS.includes(variables.provider as CloudProvider)) {
errors.push(`provider: must be one of ${VALID_CLOUD_PROVIDERS.join(', ')} (got ${JSON.stringify(variables.provider)})`);
}
}
if (variables.logJournalMaxUse !== undefined && variables.logJournalMaxUse !== '') {
if (!JOURNALD_SIZE_RE.test(variables.logJournalMaxUse)) {
errors.push(`logJournalMaxUse: must match ${JOURNALD_SIZE_RE} (got ${JSON.stringify(variables.logJournalMaxUse)})`);
Expand Down Expand Up @@ -129,6 +134,10 @@ export function validateCloudInitVariables(variables: CloudInitVariables): void
}
}

/** Valid cloud provider values for cloud-init. */
export const VALID_CLOUD_PROVIDERS = ['hetzner', 'scaleway', 'gcp'] as const;
export type CloudProvider = (typeof VALID_CLOUD_PROVIDERS)[number];

/**
* Variables for cloud-init generation.
*/
Expand All @@ -138,6 +147,8 @@ export interface CloudInitVariables {
controlPlaneUrl: string;
jwksUrl: string;
callbackToken: string;
/** Cloud provider (hetzner, scaleway, gcp). Used for provider-specific apt mirrors. */
provider?: string;
/** journald SystemMaxUse (default: 500M) */
logJournalMaxUse?: string;
/** journald SystemKeepFree (default: 1G) */
Expand Down Expand Up @@ -204,6 +215,7 @@ export function generateCloudInit(
'{{ tls_cert_path }}': variables.originCaCert ? '/etc/sam/tls/origin-ca.pem' : '',
'{{ tls_key_path }}': variables.originCaCert ? '/etc/sam/tls/origin-ca-key.pem' : '',
'{{ cf_ip_fetch_timeout }}': variables.cfIpFetchTimeout ?? '10',
'{{ provider }}': variables.provider ?? '',
};

// Use function replacement to prevent $-pattern interpretation in values.
Expand Down
4 changes: 2 additions & 2 deletions packages/cloud-init/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export { generateCloudInit, HETZNER_USER_DATA_MAX_BYTES, validateCloudInitSize, validateCloudInitVariables } from './generate';
export type { CloudInitVariables, GenerateCloudInitOptions } from './generate';
export { generateCloudInit, HETZNER_USER_DATA_MAX_BYTES, VALID_CLOUD_PROVIDERS, validateCloudInitSize, validateCloudInitVariables } from './generate';
export type { CloudInitVariables, CloudProvider, GenerateCloudInitOptions } from './generate';
export { CLOUD_INIT_TEMPLATE } from './template';
26 changes: 26 additions & 0 deletions packages/cloud-init/src/template.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ write_files:
Environment=VM_AGENT_PORT={{ vm_agent_port }}
Environment=TLS_CERT_PATH={{ tls_cert_path }}
Environment=TLS_KEY_PATH={{ tls_key_path }}
Environment=PROVIDER={{ provider }}
ExecStart=/usr/local/bin/vm-agent
Restart=always
RestartSec=5
Expand Down Expand Up @@ -280,5 +281,30 @@ write_files:
{{ origin_ca_key }}
permissions: '0600'

- path: /etc/apt/apt.conf.d/80-retries
content: |
Acquire::Retries "3";
Acquire::http::Timeout "30";
Acquire::https::Timeout "30";
permissions: '0644'

- path: /etc/sam/apt-mirror-config.sh
permissions: '0755'
content: |
#!/bin/bash
# Provider-specific apt mirror configuration for Docker containers.
# Sourced by the VM agent bootstrap to inject fast mirrors into containers.
# Only overrides for providers with known fast local mirrors.
PROVIDER="{{ provider }}"
case "$PROVIDER" in
hetzner)
APT_MIRROR="mirror.hetzner.com"
;;
*)
APT_MIRROR=""
;;
esac
export APT_MIRROR

final_message: "Simple Agent Manager node {{ node_id }} provisioning started!"
`;
87 changes: 87 additions & 0 deletions packages/cloud-init/tests/generate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,93 @@ describe('regex injection prevention ($-pattern in replacement values)', () => {
});
});

describe('provider field and apt mirror configuration', () => {
it('substitutes PROVIDER env var in systemd service when provider is set', () => {
const config = generateCloudInit(baseVariables({ provider: 'hetzner' }));
expect(config).toContain('Environment=PROVIDER=hetzner');
});

it('produces empty PROVIDER env var when provider is undefined', () => {
const config = generateCloudInit(baseVariables());
expect(config).toContain('Environment=PROVIDER=');
expect(config).not.toContain('PROVIDER=undefined');
});

it('includes apt retry configuration in write_files', () => {
const config = generateCloudInit(baseVariables());
const parsed = YAML.parse(config);

const aptRetry = parsed.write_files.find(
(f: { path: string }) => f.path === '/etc/apt/apt.conf.d/80-retries'
);
expect(aptRetry).toBeDefined();
expect(aptRetry.content).toContain('Acquire::Retries "3"');
expect(aptRetry.content).toContain('Acquire::http::Timeout "30"');
expect(aptRetry.content).toContain('Acquire::https::Timeout "30"');
});

it('includes provider-specific apt mirror script in write_files', () => {
const config = generateCloudInit(baseVariables({ provider: 'hetzner' }));
const parsed = YAML.parse(config);

const mirrorScript = parsed.write_files.find(
(f: { path: string }) => f.path === '/etc/sam/apt-mirror-config.sh'
);
expect(mirrorScript).toBeDefined();
expect(mirrorScript.permissions).toBe('0755');
expect(mirrorScript.content).toContain('PROVIDER="hetzner"');
expect(mirrorScript.content).toContain('APT_MIRROR="mirror.hetzner.com"');
});

it('apt mirror script sets empty APT_MIRROR for non-hetzner providers', () => {
const config = generateCloudInit(baseVariables({ provider: 'scaleway' }));
const parsed = YAML.parse(config);

const mirrorScript = parsed.write_files.find(
(f: { path: string }) => f.path === '/etc/sam/apt-mirror-config.sh'
);
expect(mirrorScript).toBeDefined();
expect(mirrorScript.content).toContain('PROVIDER="scaleway"');
// The default case sets APT_MIRROR=""
expect(mirrorScript.content).toContain('APT_MIRROR=""');
});

it('apt mirror script sets empty PROVIDER when provider is omitted', () => {
const config = generateCloudInit(baseVariables());
const parsed = YAML.parse(config);

const mirrorScript = parsed.write_files.find(
(f: { path: string }) => f.path === '/etc/sam/apt-mirror-config.sh'
);
expect(mirrorScript).toBeDefined();
expect(mirrorScript.content).toContain('PROVIDER=""');
});
});

describe('validateCloudInitVariables — provider field', () => {
it('accepts valid provider values', () => {
for (const provider of ['hetzner', 'scaleway', 'gcp']) {
expect(() => validateCloudInitVariables(baseVariables({ provider }))).not.toThrow();
}
});

it('accepts empty string for provider', () => {
expect(() => validateCloudInitVariables(baseVariables({ provider: '' }))).not.toThrow();
});

it('accepts undefined provider', () => {
expect(() => validateCloudInitVariables(baseVariables({ provider: undefined }))).not.toThrow();
});

it('rejects invalid provider', () => {
expect(() => validateCloudInitVariables(baseVariables({ provider: 'aws' }))).toThrow('provider');
});

it('rejects provider with shell metacharacters', () => {
expect(() => validateCloudInitVariables(baseVariables({ provider: 'hetzner; rm -rf /' }))).toThrow('provider');
});
});

describe('integrated size validation in generateCloudInit', () => {
it('throws when output exceeds 32KB (default behavior)', () => {
// Create variables that will produce a config exceeding 32KB
Expand Down
108 changes: 105 additions & 3 deletions packages/vm-agent/internal/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ func Run(ctx context.Context, cfg *config.Config, reporter *bootlog.Reporter) er
reporter.Log("devcontainer_up", "completed", "Devcontainer ready")
}

// Inject apt retry config (all providers) and mirror config (provider-specific) before package installs.
// Non-fatal: if injection fails, apt will use default settings.
if containerID, findErr := findDevcontainerID(ctx, cfg); findErr == nil {
injectAptRetryConfig(ctx, containerID)
injectAptMirrorConfig(ctx, cfg, containerID)
} else {
slog.Debug("Could not find devcontainer for apt config injection (non-fatal)", "error", findErr)
}

// Ensure gh CLI is available (install if missing from custom devcontainers).
// Non-fatal: workspace still works without gh, just can't create PRs.
reporter.Log("gh_cli", "started", "Checking GitHub CLI availability")
Expand Down Expand Up @@ -362,6 +371,15 @@ func PrepareWorkspace(ctx context.Context, cfg *config.Config, state ProvisionSt
}
}

// Inject apt retry config (all providers) and mirror config (provider-specific) before package installs.
// Non-fatal: if injection fails, apt will use default settings.
if containerID, findErr := findDevcontainerID(ctx, cfg); findErr == nil {
injectAptRetryConfig(ctx, containerID)
injectAptMirrorConfig(ctx, cfg, containerID)
} else {
slog.Debug("Could not find devcontainer for apt config injection (non-fatal)", "error", findErr)
}

// Ensure gh CLI is available (install if missing from custom devcontainers).
reporter.Log("gh_cli", "started", "Checking GitHub CLI availability")
if err := ensureGitHubCLI(ctx, cfg); err != nil {
Expand Down Expand Up @@ -857,11 +875,13 @@ func ensureDevcontainerReady(ctx context.Context, cfg *config.Config, volumeName
slog.Info("Repo has its own devcontainer config, skipping additional-features injection")
}

cmd := exec.CommandContext(ctx, "devcontainer", args...)
buildCtx, buildCancel := devcontainerBuildContext(ctx, cfg)
cmd := exec.CommandContext(buildCtx, "devcontainer", args...)
output, err := cmd.CombinedOutput()
buildCancel() // Release timer immediately; fallback uses parent ctx.
if err != nil {
// Repo config failed — log the error and fall back to default image.
slog.Warn("Devcontainer build failed with repo config, falling back to default image", "error", err, "output", strings.TrimSpace(string(output)))
slog.Warn("Devcontainer build failed with repo config, falling back to default image", "error", err, "output", strings.TrimSpace(string(output)), "timedOut", buildCtx.Err() == context.DeadlineExceeded)
var fallbackErr error
usedFallback, fallbackErr = fallbackToDefaultDevcontainer(ctx, cfg, volumeName, credHelperHostPath, err, output)
if fallbackErr != nil {
Expand All @@ -871,7 +891,9 @@ func ensureDevcontainerReady(ctx context.Context, cfg *config.Config, volumeName
}
} else {
// No config — use default.
_, err := runDevcontainerWithDefault(ctx, cfg, volumeName, credHelperHostPath)
buildCtx, buildCancel := devcontainerBuildContext(ctx, cfg)
defer buildCancel()
_, err := runDevcontainerWithDefault(buildCtx, cfg, volumeName, credHelperHostPath)
if err != nil {
return false, err
}
Expand All @@ -887,6 +909,86 @@ func ensureDevcontainerReady(ctx context.Context, cfg *config.Config, volumeName
return usedFallback, nil
}

// devcontainerBuildContext wraps the parent context with a DevcontainerBuildTimeout deadline.
// This prevents devcontainer up from hanging indefinitely when network/apt operations fail.
// If DevcontainerBuildTimeout is zero (e.g. DEVCONTAINER_BUILD_TIMEOUT=0), no deadline is
// applied and only parent cancellation is forwarded.
func devcontainerBuildContext(parent context.Context, cfg *config.Config) (context.Context, context.CancelFunc) {
if cfg.DevcontainerBuildTimeout > 0 {
slog.Debug("Applying devcontainer build timeout", "timeout", cfg.DevcontainerBuildTimeout)
return context.WithTimeout(parent, cfg.DevcontainerBuildTimeout)
}
return context.WithCancel(parent)
}

// injectAptRetryConfig injects apt retry and timeout configuration into a running container.
// This makes apt operations resilient to transient network failures regardless of cloud provider.
// Non-fatal: if injection fails, apt will use default settings (no retries).
func injectAptRetryConfig(ctx context.Context, containerID string) {
retryScript := `mkdir -p /etc/apt/apt.conf.d && printf 'Acquire::Retries "3";\nAcquire::http::Timeout "30";\nAcquire::https::Timeout "30";\n' > /etc/apt/apt.conf.d/80-retries`
cmd := exec.CommandContext(ctx, "docker", "exec", "-u", "root", containerID, "sh", "-c", retryScript)
output, err := cmd.CombinedOutput()
if err != nil {
slog.Warn("Failed to inject apt retry config into container (non-fatal)", "error", err, "output", strings.TrimSpace(string(output)))
return
}
slog.Info("Injected apt retry config into container", "containerID", containerID)
}

// injectAptMirrorConfig injects provider-specific apt mirror configuration into a running container.
// This ensures containers on Hetzner use mirror.hetzner.com instead of archive.ubuntu.com,
// which is slow/unreachable through Docker bridge NAT on Hetzner networks.
// Non-fatal: if injection fails, apt will fall back to default archive.ubuntu.com.
func injectAptMirrorConfig(ctx context.Context, cfg *config.Config, containerID string) {
if cfg.Provider == "" {
return
}

mirror := resolveAptMirror(cfg.Provider)
if mirror == "" {
return
}

if !isValidMirrorHostname(mirror) {
slog.Warn("APT_MIRROR value looks unsafe, skipping injection", "mirror", mirror, "provider", cfg.Provider)
return
}

// Uses exec.Command with containerID as a direct argument (not shell-interpolated)
// to prevent any injection via containerID.
innerScript := fmt.Sprintf(
`{ [ -f /etc/apt/sources.list ] && sed -i 's|http://archive.ubuntu.com|http://%[1]s|g; s|http://security.ubuntu.com|http://%[1]s|g' /etc/apt/sources.list || true; } && `+
`{ [ -f /etc/apt/sources.list.d/ubuntu.sources ] && sed -i 's|http://archive.ubuntu.com|http://%[1]s|g; s|http://security.ubuntu.com|http://%[1]s|g' /etc/apt/sources.list.d/ubuntu.sources || true; }`,
mirror,
)

cmd := exec.CommandContext(ctx, "docker", "exec", "-u", "root", containerID, "sh", "-c", innerScript)
output, err := cmd.CombinedOutput()
if err != nil {
slog.Warn("Failed to inject apt mirror config into container (non-fatal)", "error", err, "output", strings.TrimSpace(string(output)), "provider", cfg.Provider)
return
}
slog.Info("Injected apt mirror config into container", "provider", cfg.Provider, "containerID", containerID, "mirror", mirror)
}

// resolveAptMirror returns the apt mirror hostname for the given cloud provider.
// Returns empty string if no specific mirror is configured for the provider.
func resolveAptMirror(provider string) string {
switch provider {
case "hetzner":
return "mirror.hetzner.com"
default:
return ""
}
}

// isValidMirrorHostname validates that a mirror value contains only safe hostname characters.
var validMirrorRe = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9.\-]*[a-zA-Z0-9])?$`)

func isValidMirrorHostname(mirror string) bool {
return validMirrorRe.MatchString(mirror)
}

func fallbackToDefaultDevcontainer(
ctx context.Context,
cfg *config.Config,
Expand Down
Loading
Loading