Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions lib/hypervisor/firecracker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,21 @@ type snapshotCreateParams struct {

type snapshotLoadParams struct {
MemFilePath string `json:"mem_file_path,omitempty"`
MemBackend *memBackend `json:"mem_backend,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
ResumeVM bool `json:"resume_vm,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
}

// memBackend selects how firecracker materializes guest memory during
// restore. backend_type "Uffd" hands page-fault handling off to a
// userfaultfd page server reachable at backend_path (Unix domain socket).
type memBackend struct {
BackendType string `json:"backend_type"`
BackendPath string `json:"backend_path"`
}

type networkOverride struct {
IfaceID string `json:"iface_id"`
HostDevName string `json:"host_dev_name"`
Expand All @@ -103,6 +112,11 @@ type instanceInfo struct {
type restoreMetadata struct {
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
// UffdSocketPath, when non-empty, makes loadSnapshot send a Uffd
// mem_backend pointing at the page server instead of letting
// firecracker mmap the mem-file directly. PrepareFork records it
// per fork so RestoreVM can pick it up after a hypeman restart.
UffdSocketPath string `json:"uffd_socket_path,omitempty"`
}

func toBootSource(cfg hypervisor.VMConfig) bootSource {
Expand Down Expand Up @@ -212,14 +226,25 @@ func toSnapshotCreateParams(snapshotDir string) snapshotCreateParams {
}
}

func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride) snapshotLoadParams {
return snapshotLoadParams{
MemFilePath: snapshotMemoryPath(snapshotDir),
func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride, uffdSocketPath string) snapshotLoadParams {
params := snapshotLoadParams{
SnapshotPath: snapshotStatePath(snapshotDir),
EnableDiffSnapshots: true,
ResumeVM: false,
NetworkOverrides: networkOverrides,
}
if uffdSocketPath != "" {
// Firecracker rejects load requests that set both mem_file_path
// and a uffd backend. The page server takes the file path through
// its own configuration, so we drop it from the request.
params.MemBackend = &memBackend{
BackendType: "Uffd",
BackendPath: uffdSocketPath,
}
} else {
params.MemFilePath = snapshotMemoryPath(snapshotDir)
}
return params
}

func snapshotStatePath(snapshotDir string) string {
Expand Down
9 changes: 8 additions & 1 deletion lib/hypervisor/firecracker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,19 @@ func TestSnapshotParamPaths(t *testing.T) {

load := toSnapshotLoadParams("/tmp/snapshot-latest", []networkOverride{
{IfaceID: "eth0", HostDevName: "hype-abc123"},
})
}, "")
assert.Equal(t, "/tmp/snapshot-latest/state", load.SnapshotPath)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemFilePath)
assert.Nil(t, load.MemBackend)
assert.True(t, load.EnableDiffSnapshots)
assert.False(t, load.ResumeVM)
require.Len(t, load.NetworkOverrides, 1)

loadUffd := toSnapshotLoadParams("/tmp/snapshot-latest", nil, "/run/uffd/abc.sock")
assert.Equal(t, "", loadUffd.MemFilePath, "mem_file_path must be empty when a uffd backend is set")
require.NotNil(t, loadUffd.MemBackend)
assert.Equal(t, "Uffd", loadUffd.MemBackend.BackendType)
assert.Equal(t, "/run/uffd/abc.sock", loadUffd.MemBackend.BackendPath)
}

func TestToBalloonConfig(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ func (f *Firecracker) instanceStart(ctx context.Context) error {
return f.postAction(ctx, "InstanceStart")
}

func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides)
func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride, uffdSocketPath string) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides, uffdSocketPath)
if _, err := f.do(ctx, http.MethodPut, "/snapshot/load", params, http.StatusNoContent); err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions lib/hypervisor/firecracker/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ func (s *Starter) PrepareFork(ctx context.Context, req hypervisor.ForkPrepareReq
changed = true
}
}
if meta.UffdSocketPath != req.UffdSocketPath {
meta.UffdSocketPath = req.UffdSocketPath
changed = true
}

if changed {
if err := saveRestoreMetadataState(instanceDir, meta); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/firecracker/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
snapshotSourceAliasMu.Lock()
defer snapshotSourceAliasMu.Unlock()
return withSnapshotSourceDirAlias(meta, filepath.Dir(socketPath), func() error {
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides)
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides, meta.UffdSocketPath)
})
}()
if err != nil {
Expand Down
6 changes: 6 additions & 0 deletions lib/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ type ForkPrepareRequest struct {

SerialLogPath string
Network *ForkNetworkConfig

// UffdSocketPath is set when the fork should restore from a userfaultfd
// page-server socket instead of mmap'ing its mem-file directly. The
// hypervisor records this so RestoreVM can attach a uffd memory backend
// in the snapshot/load request. Empty means use the default mmap path.
UffdSocketPath string
}

// ForkPrepareResult describes which optional fork rewrites were actually applied.
Expand Down
6 changes: 6 additions & 0 deletions lib/instances/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ func (m *manager) deleteInstance(
return fmt.Errorf("delete instance data: %w", err)
}

if stored.ForkOfTemplate != "" && m.uffd != nil {
if err := m.uffd.releaseUffdForFork(stored.ForkOfTemplate, id); err != nil {
log.WarnContext(ctx, "failed to release uffd page server for fork", "instance_id", id, "template_id", stored.ForkOfTemplate, "error", err)
}
}

log.InfoContext(ctx, "instance deleted successfully", "instance_id", id)
return nil
}
Expand Down
114 changes: 114 additions & 0 deletions lib/instances/firecracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"strings"
"syscall"
"testing"
"time"

Expand Down Expand Up @@ -551,3 +552,116 @@ func TestFirecrackerSnapshotFeature(t *testing.T) {
forkName: "fc-snapshot-fork",
})
}

// TestFirecrackerForkFromTemplate exercises the full template-driven fork
// path under the state-machine design: a firecracker source goes Running →
// Standby → Template (explicit promote), then a fork:
//
// (a) reaches Running,
// (b) has its mem-file hardlinked to the source's snapshot mem-file
// (the fan-out optimisation),
// (c) is counted as a live fork of the template,
// (d) registers with the per-template uffd page server,
// (e) on delete, the fork count drops back to 0 and the fork detaches
// from uffd.
func TestFirecrackerForkFromTemplate(t *testing.T) {
t.Parallel()
requireFirecrackerIntegrationPrereqs(t)

mgr, tmpDir := setupTestManagerForFirecracker(t)
ctx := context.Background()
p := paths.New(tmpDir)

imageManager, err := images.NewManager(p, 1, nil)
require.NoError(t, err)
createNginxImageAndWait(t, ctx, imageManager)

systemManager := system.NewManager(p)
require.NoError(t, systemManager.EnsureSystemFiles(ctx))
require.NoError(t, mgr.networkManager.Initialize(ctx, nil))

source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
Name: "fc-tpl-src",
Image: integrationTestImageRef(t, "docker.io/library/nginx:alpine"),
Size: 1024 * 1024 * 1024,
HotplugSize: 256 * 1024 * 1024,
OverlaySize: 5 * 1024 * 1024 * 1024,
Vcpus: 1,
NetworkEnabled: true,
Hypervisor: hypervisor.TypeFirecracker,
})
require.NoError(t, err)
source, err = waitForInstanceState(ctx, mgr, source.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
sourceID := source.Id
t.Cleanup(func() { _ = mgr.DeleteInstance(context.Background(), sourceID) })

// Standby is the precondition for fan-out: it produces the snapshot the
// fork will descend from.
source, err = mgr.StandbyInstance(ctx, sourceID, StandbyInstanceRequest{})
require.NoError(t, err)
require.Equal(t, StateStandby, source.State)
require.True(t, source.HasSnapshot)

// Promote to Template explicitly — only Template sources get fan-out.
source, err = mgr.PromoteToTemplate(ctx, sourceID)
require.NoError(t, err)
require.Equal(t, StateTemplate, source.State)

forked, err := mgr.ForkInstance(ctx, sourceID, ForkInstanceRequest{
Name: "fc-tpl-fork",
TargetState: StateRunning,
})
require.NoError(t, err)
forked, err = waitForInstanceState(ctx, mgr, forked.Id, StateRunning, integrationTestTimeout(30*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, forked.State)
forkID := forked.Id
deletedFork := false
t.Cleanup(func() {
if !deletedFork {
_ = mgr.DeleteInstance(context.Background(), forkID)
}
})

// (b) The fork's mem-file must share the source's inode (hardlink), not
// be a copy. We can't compare paths because the link is by inode; we
// compare st_ino + st_dev between the two instances' mem-files.
//
// Firecracker retains the post-restore snapshot dir as snapshot-base
// (see restoreRetainedSnapshotBase), so after the Standby -> Running
// transition the hardlink lives under snapshot-base/, not snapshot-latest/.
// Hardlinks survive the rename because they bind to the inode.
forkMemPath := filepath.Join(p.InstanceSnapshotBase(forkID), templateSharedMemFileName)
srcMemPath := filepath.Join(p.InstanceSnapshotLatest(sourceID), templateSharedMemFileName)
forkInfo, err := os.Stat(forkMemPath)
require.NoError(t, err, "fork mem-file should exist at snapshot-base/memory after restore")
assert.True(t, forkInfo.Mode().IsRegular(), "fork mem-file should be a regular file (hardlink), not a symlink")
srcInfo, err := os.Stat(srcMemPath)
require.NoError(t, err)
forkSys := forkInfo.Sys().(*syscall.Stat_t)
srcSys := srcInfo.Sys().(*syscall.Stat_t)
assert.Equal(t, srcSys.Ino, forkSys.Ino, "fork mem-file should share the source's inode (hardlink, not copy)")
assert.Equal(t, srcSys.Dev, forkSys.Dev, "fork mem-file should be on the same filesystem as source")

// (c) The source instance is a Template with exactly one live fork.
sourceMeta, err := mgr.loadMetadata(sourceID)
require.NoError(t, err)
assert.True(t, sourceMeta.StoredMetadata.IsTemplate, "source should be a Template")
forks, err := mgr.countTemplateForks(sourceID)
require.NoError(t, err)
assert.Equal(t, 1, forks, "template fork count should be 1 after one fork")

// (d) The per-template uffd page server should be tracking this fork.
require.NotNil(t, mgr.uffd)
assert.True(t, mgr.uffd.hasFork(sourceID, forkID), "uffd tracker should report fork as registered against its template")

// Deleting the fork drops the refcount and detaches from uffd.
require.NoError(t, mgr.DeleteInstance(ctx, forkID))
deletedFork = true

forksAfter, err := mgr.countTemplateForks(sourceID)
require.NoError(t, err)
assert.Equal(t, 0, forksAfter, "template fork count should drop back to 0")
assert.False(t, mgr.uffd.hasFork(sourceID, forkID), "uffd tracker should no longer track the deleted fork")
}
14 changes: 14 additions & 0 deletions lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,19 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
if forkMeta.NetworkEnabled {
netCfg = &hypervisor.ForkNetworkConfig{TAPDevice: network.GenerateTAPName(forkID)}
}
uffdSourceID := ""
if shareMemFile {
uffdSourceID = stored.Id
}
uffdSocketPath, err := m.acquireForkUffdIfApplicable(ctx, uffdSourceID, forkID, stored.HypervisorType)
if err != nil {
return nil, fmt.Errorf("attach uffd page server: %w", err)
}
if uffdSocketPath != "" {
cu.Add(func() {
_ = m.uffd.releaseUffdForFork(uffdSourceID, forkID)
})
}
if _, err := starter.PrepareFork(ctx, hypervisor.ForkPrepareRequest{
SnapshotConfigPath: snapshotConfigPath,
SourceDataDir: stored.DataDir,
Expand All @@ -353,6 +366,7 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
VsockSocket: forkMeta.VsockSocket,
SerialLogPath: m.paths.InstanceAppLog(forkID),
Network: netCfg,
UffdSocketPath: uffdSocketPath,
}); err != nil {
if errors.Is(err, hypervisor.ErrNotSupported) {
return nil, fmt.Errorf("%w: fork is not supported for hypervisor %s", ErrNotSupported, stored.HypervisorType)
Expand Down
6 changes: 6 additions & 0 deletions lib/instances/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ type manager struct {
vmStarters map[hypervisor.Type]hypervisor.VMStarter
defaultHypervisor hypervisor.Type // Default hypervisor type when not specified in request
guestMemoryPolicy guestmemory.Policy

// uffd is the per-template userfaultfd page-server tracker. nil on
// non-Linux hosts; on Linux it is started lazily for forks that
// resolve to a template and torn down once no forks remain.
uffd *uffdTracker
}

// platformStarters is populated by platform-specific init functions.
Expand Down Expand Up @@ -211,6 +216,7 @@ func NewManagerWithConfig(p *paths.Paths, imageManager images.Manager, systemMan
compressionJobs: make(map[string]*compressionJob),
nativeCodecPaths: make(map[string]string),
lifecycleEvents: newLifecycleSubscribersWithBufferSize(managerConfig.LifecycleEventBufferSize),
uffd: newUffdTracker(),
}
m.deleteSnapshotFn = m.deleteSnapshot

Expand Down
Loading
Loading