Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ sudo cargo test --test dm_thin -- --ignored --test-threads=1

- Prefer explicit error handling. Use `?` for propagation, not `.unwrap()`.
- Shell out to platform CLI tools — no fragile C library bindings. Linux: `zfs`/`zpool`/`iptables`. macOS: `hdiutil`/`diskutil`/`cp -c`/`ember-vz`.
- Value clear interfaces, boundaries, and abstractions; avoid leaks between them. Subsystems own their own formats — dm-thin owns its pool/volume names, networking owns its TAP prefix and iptables comment, and so on. Shared types like `GlobalConfig` expose generic identity (e.g. `instance_namespace()`) and stay free of subsystem trivia. If you find yourself reaching across a boundary to format a name, match a string, or branch on another subsystem's mode, that's the cue to move the logic to the side that owns the concept.

## Architecture

Expand Down
24 changes: 21 additions & 3 deletions crates/ember-core/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ pub struct InitConfig {
pub storage_backend: crate::config::StorageKind,
/// Path to the state directory (e.g., `/var/lib/ember` or `~/Library/Application Support/ember`).
pub state_dir: PathBuf,
/// Per-installation namespace embedded in dm-thin pool / device
/// names so `ember init` against a fresh state-dir doesn't trample
/// another install's pool. Mirrors `GlobalConfig::instance_id`.
pub instance_id: String,
/// ZFS pool name. Used on Linux for `zfs create`; ignored on macOS.
pub pool: String,
/// Dataset name within the ZFS pool. Used on Linux; ignored on macOS.
Expand Down Expand Up @@ -208,7 +212,7 @@ pub trait StorageBackend {
/// Mountable device path for a VM's root disk.
///
/// Linux/ZFS: `/dev/zvol/pool/dataset/vms/vm_name`.
/// Linux/dm-thin: `/dev/mapper/ember-vm-<vm_name>`.
/// Linux/dm-thin: `/dev/mapper/ember-<instance_id>-vm-<vm_name>`.
/// macOS/APFS: `<state_dir>/vms/<vm_name>/rootfs.img`.
///
/// Backends that lazily activate kernel state (notably dm-thin: pool
Expand Down Expand Up @@ -314,9 +318,10 @@ pub trait NetworkBackend {

/// Tear down networking for a VM.
///
/// Linux: removes iptables rules, deletes TAP device, releases IP.
/// Linux: removes iptables rules (matched by per-installation
/// comment), deletes TAP device, releases IP.
/// macOS: no-op (vmnet cleans up automatically).
fn teardown(&self, vm: &VmMetadata) -> Result<()>;
fn teardown(&self, vm: &VmMetadata, config: &GlobalConfig) -> Result<()>;

/// Discover the guest's IP address from its MAC address.
///
Expand Down Expand Up @@ -383,6 +388,19 @@ pub trait Platform {
/// Linux: `/var/lib/ember`. macOS: `~/Library/Application Support/ember`.
fn default_state_dir() -> PathBuf;

/// Default IP subnet handed to `GlobalConfig.ip_subnet` at
/// `ember init` when the user doesn't pass `--ip-subnet`.
///
/// Linux carves a `/16` slot inside `10.0.0.0/8` and uses /30
/// blocks per VM (host has full control of routing), scaling to
/// ~16k VMs per install. macOS sub-allocates a `/27` inside
/// vmnet's host-wide `192.168.64.0/24` and uses single-IP
/// allocation (vmnet's shared L2 bridge means /30 P2P links are
/// pointless), giving ~30 VMs per install. A `/8` collision
/// between two installs is unlikely (1/8 per pair) and
/// resolvable via the `--ip-subnet` override.
fn default_ip_subnet(instance_id: &str) -> String;

/// Console device name for inittab injection.
///
/// Linux/Firecracker: `"ttyS0"`. macOS/AVF: `"hvc0"`.
Expand Down
141 changes: 141 additions & 0 deletions crates/ember-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,22 @@ pub struct GlobalConfig {
/// Populated during `ember init`; defaults to empty path for backwards compat.
#[serde(default)]
pub state_dir: PathBuf,
/// Per-installation namespace, derived at `ember init` from a hash
/// of the canonicalized state directory (or supplied via
/// `--instance-id`). 4 hex chars; embedded in every host-global
/// resource name (dm-thin pool, TAP devices, iptables comments) so
/// two ember installations on the same host don't clash. Empty
/// values are treated as a malformed config — `ember init` always
/// pins a non-empty value.
#[serde(default)]
pub instance_id: String,
/// IPv4 base subnet handed out as /30 links to VMs. Defaults at
/// init time to `10.{slot}.0.0/16` where `slot` is derived from the
/// instance-id hash, so two installs get non-overlapping ranges
/// without the user having to think about it. Overridable via
/// `--ip-subnet`.
#[serde(default = "default_ip_subnet")]
pub ip_subnet: String,
/// Backing path for non-ZFS backends.
///
/// * btrfs: block device or sparse image file containing the btrfs filesystem.
Expand All @@ -92,6 +108,49 @@ pub struct GlobalConfig {
pub dm_thin_mode: Option<DmThinMode>,
}

/// Fallback subnet used when a config predates `ip_subnet`. New
/// installs derive this at init from the instance-id hash; this value
/// only applies to deserialization of older `config.json` files.
pub fn default_ip_subnet() -> String {
"10.100.0.0/16".to_string()
}

/// Derive a default 4-hex-char `instance_id` from the canonicalized
/// state directory path. Two installations with distinct state
/// directories almost always get distinct ids; the same state
/// directory is stable across invocations so reactivation finds the
/// resources it created at init time.
///
/// 16-bit space is small (~256-instance birthday-collision threshold),
/// but two installs on one host is a personal-use scenario; users who
/// hit a collision can pass `--instance-id` explicitly.
pub fn derive_instance_id(state_dir: &std::path::Path) -> String {
// Canonicalize when possible so `/var/lib/ember` and
// `/var/lib/ember/` hash to the same id; fall back to the literal
// path bytes when the directory does not yet exist.
let canonical = state_dir
.canonicalize()
.unwrap_or_else(|_| state_dir.to_path_buf());
let bytes = canonical.as_os_str().as_encoded_bytes();
format!("{:04x}", fnv1a_32(bytes) as u16)
}

/// FNV-1a 32-bit hash. Stable across Rust versions (unlike
/// `DefaultHasher`) and small enough to inline rather than pulling in
/// a crypto dep just for non-security-critical name derivation.
///
/// Exposed for platform crates that derive their own scoped names
/// from the instance id (e.g. Linux's `/16`-in-`10.0.0.0/8` subnet
/// slot).
pub fn fnv1a_32(bytes: &[u8]) -> u32 {
let mut h: u32 = 0x811c_9dc5;
for &b in bytes {
h ^= b as u32;
h = h.wrapping_mul(0x0100_0193);
}
h
}

impl GlobalConfig {
/// Full ZFS dataset path for images (e.g. `ember/ember/images`).
pub fn images_dataset(&self) -> String {
Expand All @@ -102,4 +161,86 @@ impl GlobalConfig {
pub fn vms_dataset(&self) -> String {
format!("{}/{}/vms", self.pool, self.dataset)
}

/// The per-installation namespace, or `None` for legacy configs
/// that predate isolation.
///
/// This is the only generic identity surface `GlobalConfig`
/// exposes for host-global resource scoping: each subsystem
/// (dm-thin pool/volume names, TAP device prefix, iptables
/// comment) derives its own scoped strings from the namespace
/// rather than asking `GlobalConfig` to know dm-thin or
/// networking trivia. Subsystems handle the `None` case in their
/// own modules so legacy literals live next to the code that
/// reads them.
pub fn instance_namespace(&self) -> Option<&str> {
if self.instance_id.is_empty() {
None
} else {
Some(&self.instance_id)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;

fn config_with_id(id: &str) -> GlobalConfig {
GlobalConfig {
storage_backend: StorageKind::Zfs,
pool: "tank".to_string(),
dataset: "ember".to_string(),
kernel_path: None,
wan_iface: None,
state_dir: PathBuf::default(),
instance_id: id.to_string(),
ip_subnet: "10.100.0.0/16".to_string(),
storage_path: None,
dm_thin_block_size: None,
dm_thin_mode: None,
}
}

#[test]
fn instance_id_derived_from_state_dir_is_4_hex_chars() {
let id = derive_instance_id(std::path::Path::new("/var/lib/ember"));
assert_eq!(id.len(), 4);
assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
}

#[test]
fn instance_id_is_stable_across_calls() {
let p = std::path::Path::new("/some/path/that/does/not/exist");
assert_eq!(derive_instance_id(p), derive_instance_id(p));
}

#[test]
fn distinct_state_dirs_usually_get_distinct_ids() {
// 16-bit space, but two unrelated paths almost never collide.
let a = derive_instance_id(std::path::Path::new("/var/lib/ember"));
let b = derive_instance_id(std::path::Path::new("/tmp/ember-test"));
assert_ne!(a, b);
}

#[test]
fn instance_namespace_returns_id_for_new_install() {
let cfg = config_with_id("a3f4");
assert_eq!(cfg.instance_namespace(), Some("a3f4"));
}

/// Configs written by older ember binaries don't have `instance_id`.
/// Serde fills the field with the empty-string default and
/// `instance_namespace()` collapses that to `None` so subsystems
/// can pattern-match on legacy-vs-tagged in their own modules.
#[test]
fn legacy_config_has_no_instance_namespace() {
let json = r#"{"pool":"tank","dataset":"ember","kernel_path":null}"#;
let cfg: GlobalConfig = serde_json::from_str(json).unwrap();
assert_eq!(cfg.instance_id, "");
assert_eq!(cfg.instance_namespace(), None);
// ip_subnet falls back to the historical default subnet.
assert_eq!(cfg.ip_subnet, "10.100.0.0/16");
}
}
Loading
Loading