From b2b6acf6f2293184358e4804c8157385168f8c75 Mon Sep 17 00:00:00 2001 From: Ty Smith Date: Fri, 3 Jul 2026 15:43:18 -0700 Subject: [PATCH 1/2] feat(security): harden PAM trust boundary (plan 05) - Sanitize oneshot child environment: env_clear + allow-list (SSH_CONNECTION, SSH_TTY, pinned PATH); blocks LD_PRELOAD/LD_* injection into the root-context child. - Verify config trust before parsing: /etc/facelock/config.toml and all parent dirs must be root-owned and not group/world-writable; fstat on the opened fd avoids TOCTOU. Untrusted config fails closed (PAM_IGNORE). - Verify daemon peer UID: resolve org.facelock.Daemon owner, require UID 0, and pin the Authenticate call to the owner's unique bus name. Removes the single point of failure on the D-Bus policy file. - Make the -2 error contract live (option 4a): the daemon now encodes recoverable errors (rate limited, IR required, camera/storage failure) in-band as AuthResult{model_id:-2,label}; the PAM client classifies them (rate limited -> PAM_AUTH_ERR) instead of silently escalating to a fresh root oneshot attempt. CLI ipc_client decodes -2/-3 sentinels. - Structured timeout classification: match zbus InputOutput(TimedOut), MethodError(NoReply/Timeout/TimedOut), and fdo variants instead of substring matching; bound connection establishment with an overall deadline on a worker thread. - Container tests: group/world-writable config rejection, LD_PRELOAD marker + child env capture, non-root fake daemon peer refusal (test-arch-pam); rate-limit no-escalation assertions (test-arch-integration). - Update docs/contracts.md and docs/security.md. Co-Authored-By: Claude Fable 5 --- crates/facelock-cli/src/commands/daemon.rs | 35 +- crates/facelock-cli/src/ipc_client.rs | 11 + crates/pam-facelock/src/lib.rs | 500 ++++++++++++++++++--- docs/contracts.md | 44 +- docs/security.md | 52 +++ test/Containerfile | 5 +- test/fake-facelock-daemon.py | 53 +++ test/run-container-tests.sh | 98 ++++ test/run-integration-tests.sh | 21 + 9 files changed, 757 insertions(+), 62 deletions(-) create mode 100644 test/fake-facelock-daemon.py diff --git a/crates/facelock-cli/src/commands/daemon.rs b/crates/facelock-cli/src/commands/daemon.rs index 9ce3de1..3f4a88d 100644 --- a/crates/facelock-cli/src/commands/daemon.rs +++ b/crates/facelock-cli/src/commands/daemon.rs @@ -249,6 +249,25 @@ fn now_secs() -> u64 { epoch.elapsed().as_secs() } +/// Encode a recoverable authentication error into the `AuthResult` wire +/// format (`model_id == -2`, `label` = error message) instead of a D-Bus +/// error. +/// +/// A D-Bus error reply makes clients treat the daemon as broken: the PAM +/// module would fall back to a fresh root oneshot attempt, silently +/// escalating past daemon-side state such as rate limiting. In-band encoding +/// lets the PAM client classify the error (rate limited → PAM_AUTH_ERR, +/// everything else → PAM_IGNORE) without retrying. +/// See docs/contracts.md ("Authenticate error encoding"). +fn recoverable_auth_error(message: String) -> AuthResult { + AuthResult { + matched: false, + model_id: -2, + label: message, + similarity: 0.0, + } +} + struct FacelockService { handler: Arc>, /// Timestamp of last D-Bus method call (seconds since daemon start). @@ -381,7 +400,7 @@ impl FacelockService { similarity: 0.0, }) } - DaemonResponse::Error { message } => Err(fdo::Error::Failed(message)), + DaemonResponse::Error { message } => Ok(recoverable_auth_error(message)), other => Err(fdo::Error::Failed(format!( "unexpected response: {other:?}" ))), @@ -1052,6 +1071,20 @@ mod tests { assert_eq!(OBJECT_PATH, "/org/facelock/Daemon"); } + #[test] + fn recoverable_auth_errors_are_encoded_in_band() { + // Recoverable errors (rate limited, IR required, camera/storage + // failures) must travel in the AuthResult wire format with + // model_id == -2, not as D-Bus errors: a D-Bus error would make the + // PAM client fall back to a fresh root oneshot attempt, silently + // bypassing daemon-side state such as rate limiting. + let result = recoverable_auth_error("rate limited".to_string()); + assert!(!result.matched); + assert_eq!(result.model_id, -2); + assert_eq!(result.label, "rate limited"); + assert_eq!(result.similarity, 0.0); + } + #[test] fn root_is_allowed_for_privileged_operations() { assert!(require_root(&caller(0, Some("root")), "Shutdown").is_ok()); diff --git a/crates/facelock-cli/src/ipc_client.rs b/crates/facelock-cli/src/ipc_client.rs index a361ba7..3e30021 100644 --- a/crates/facelock-cli/src/ipc_client.rs +++ b/crates/facelock-cli/src/ipc_client.rs @@ -92,6 +92,17 @@ pub fn send_request(request: &DaemonRequest) -> anyhow::Result { let result: AuthResult = proxy .call("Authenticate", &(user.as_str(),)) .context("D-Bus Authenticate call failed")?; + // Sentinel model_id values (see docs/contracts.md): + // -2 = recoverable daemon error (label carries the message), + // -3 = suppressed (no enrolled models + suppress_unknown). + if !result.matched && result.model_id == -2 { + return Ok(DaemonResponse::Error { + message: result.label, + }); + } + if !result.matched && result.model_id == -3 { + return Ok(DaemonResponse::Suppressed); + } Ok(DaemonResponse::AuthResult(MatchResult { matched: result.matched, model_id: if result.model_id >= 0 { diff --git a/crates/pam-facelock/src/lib.rs b/crates/pam-facelock/src/lib.rs index 7931c0b..f8dfe5b 100644 --- a/crates/pam-facelock/src/lib.rs +++ b/crates/pam-facelock/src/lib.rs @@ -430,65 +430,185 @@ enum AuthResponse { Error { message: String }, } +/// Structured timeout classification for zbus errors. +/// +/// Matches the concrete variants zbus uses for method-call timeouts +/// (`InputOutput` with `ErrorKind::TimedOut` from `method_timeout`) and the +/// D-Bus daemon's own reply-timeout errors, instead of fragile substring +/// matching on the display string. +fn is_timeout_zbus_error(err: &zbus::Error) -> bool { + match err { + zbus::Error::InputOutput(io_err) => io_err.kind() == std::io::ErrorKind::TimedOut, + zbus::Error::MethodError(name, _, _) => matches!( + name.as_str(), + "org.freedesktop.DBus.Error.NoReply" + | "org.freedesktop.DBus.Error.Timeout" + | "org.freedesktop.DBus.Error.TimedOut" + ), + zbus::Error::FDO(fdo_err) => is_timeout_fdo_error(fdo_err), + _ => false, + } +} + +/// Timeout classification for `zbus::fdo::Error` (returned by the +/// `org.freedesktop.DBus` proxy methods used for peer verification). +fn is_timeout_fdo_error(err: &zbus::fdo::Error) -> bool { + matches!( + err, + zbus::fdo::Error::NoReply(_) | zbus::fdo::Error::Timeout(_) | zbus::fdo::Error::TimedOut(_) + ) +} + +fn classify_zbus_error(prefix: &str, err: &zbus::Error) -> String { + if is_timeout_zbus_error(err) { + format!("dbus_timeout: {err}") + } else { + format!("{prefix}: {err}") + } +} + +fn classify_fdo_error(prefix: &str, err: &zbus::fdo::Error) -> String { + if is_timeout_fdo_error(err) { + format!("dbus_timeout: {err}") + } else { + format!("{prefix}: {err}") + } +} + +/// Map a raw D-Bus `Authenticate` reply onto an [`AuthResponse`]. +/// +/// Sentinel `model_id` values (with `matched == false`): +/// - `-2`: recoverable daemon error, `label` carries the error message +/// (rate limited, IR required, camera/storage failure). +/// - `-3`: suppressed (no enrolled models + `suppress_unknown`). +fn parse_auth_reply(matched: bool, model_id: i32, label: String, similarity: f64) -> AuthResponse { + if matched { + return AuthResponse::Matched { + similarity: similarity as f32, + }; + } + match model_id { + -2 => AuthResponse::Error { message: label }, + -3 => AuthResponse::Suppressed, + _ => AuthResponse::NoMatch { + similarity: similarity as f32, + }, + } +} + +/// Verify that the current owner of `org.facelock.Daemon` is running as root +/// (UID 0) and return its unique bus name. +/// +/// Trust boundary: the D-Bus policy file is supposed to prevent unprivileged +/// processes from owning the daemon name, but the PAM module must not have a +/// single point of failure on that file. The caller pins the `Authenticate` +/// call to the returned unique name, so the owner cannot change between the +/// UID check and the method call. +fn verify_daemon_peer( + connection: &zbus::blocking::Connection, +) -> Result { + let dbus_proxy = zbus::blocking::fdo::DBusProxy::new(connection) + .map_err(|e| classify_zbus_error("dbus_proxy_failed", &e))?; + + let bus_name = zbus::names::BusName::try_from(DBUS_BUS_NAME) + .map_err(|e| format!("dbus_proxy_failed: invalid bus name: {e}"))?; + + // Resolve the owner, triggering D-Bus activation first if the daemon is + // not running yet (GetNameOwner alone does not activate services). + let owner = match dbus_proxy.get_name_owner(bus_name.clone()) { + Ok(owner) => owner, + Err(_) => { + let well_known = zbus::names::WellKnownName::try_from(DBUS_BUS_NAME) + .map_err(|e| format!("dbus_proxy_failed: invalid bus name: {e}"))?; + dbus_proxy + .start_service_by_name(well_known, 0) + .map_err(|e| classify_fdo_error("dbus_connect_failed", &e))?; + dbus_proxy + .get_name_owner(bus_name) + .map_err(|e| classify_fdo_error("dbus_connect_failed", &e))? + } + }; + + let owner_uid = dbus_proxy + .get_connection_unix_user(zbus::names::BusName::from(owner.inner().clone())) + .map_err(|e| classify_fdo_error("dbus_call_failed", &e))?; + + if owner_uid != 0 { + // Refuse to trust the reply of a non-root peer. The caller falls + // through (oneshot fallback / password), never PAM_SUCCESS. + return Err(format!( + "dbus_untrusted_peer: {DBUS_BUS_NAME} owned by uid {owner_uid} (require root)" + )); + } + + Ok(owner) +} + /// Authenticate via D-Bus system bus to the facelock daemon. +/// +/// Runs the blocking D-Bus work on a worker thread with an overall deadline, +/// so that even connection establishment against a hung bus cannot stall the +/// PAM stack (zbus `method_timeout` only bounds method calls, not connect). fn daemon_authenticate(config: &PamConfig, user: &str) -> Result { - // Timeout = recognition timeout + buffer for camera open/warmup/model load - let timeout_secs = config.recognition.timeout_secs as u64 + 5; + // Method timeout = recognition timeout + buffer for camera open/warmup/model load + let method_timeout_secs = config.recognition.timeout_secs as u64 + 5; + // Overall deadline adds a further buffer for connection establishment, + // D-Bus activation, and peer verification round-trips. + let overall_timeout = std::time::Duration::from_secs(method_timeout_secs + 5); + + let (tx, rx) = std::sync::mpsc::channel(); + let user_owned = user.to_string(); + std::thread::Builder::new() + .name("pam-facelock-dbus".into()) + .spawn(move || { + let _ = tx.send(daemon_authenticate_blocking( + method_timeout_secs, + &user_owned, + )); + }) + .map_err(|e| format!("dbus_call_failed: worker thread spawn failed: {e}"))?; + + match rx.recv_timeout(overall_timeout) { + Ok(result) => result, + Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { + // The worker is stuck (most likely in connection establishment). + // Abandon it; never block the PAM stack. + Err("dbus_timeout: connection or method call exceeded overall deadline".into()) + } + Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => { + Err("dbus_call_failed: worker thread terminated unexpectedly".into()) + } + } +} +fn daemon_authenticate_blocking( + method_timeout_secs: u64, + user: &str, +) -> Result { let connection = zbus::blocking::connection::Builder::system() .map_err(|e| format!("dbus_connect_failed: {e}"))? - .method_timeout(std::time::Duration::from_secs(timeout_secs)) + .method_timeout(std::time::Duration::from_secs(method_timeout_secs)) .build() .map_err(|e| format!("dbus_connect_failed: {e}"))?; + // Verify the daemon peer is root and pin the call to its unique name. + let daemon_peer = verify_daemon_peer(&connection)?; + let proxy = zbus::blocking::Proxy::new( &connection, - DBUS_BUS_NAME, + zbus::names::BusName::from(daemon_peer.inner().clone()), DBUS_OBJECT_PATH, DBUS_INTERFACE_NAME, ) - .map_err(|e| format!("dbus_proxy_failed: {e}"))?; + .map_err(|e| classify_zbus_error("dbus_proxy_failed", &e))?; // D-Bus method returns (matched: bool, model_id: i32, label: String, similarity: f64) - let reply: (bool, i32, String, f64) = - proxy - .call("Authenticate", &(user,)) - .map_err(|e: zbus::Error| { - let msg = e.to_string(); - // Check if this is a timeout or connection error for fallback - if msg.contains("timed out") || msg.contains("Timeout") { - format!("dbus_timeout: {msg}") - } else { - format!("dbus_call_failed: {msg}") - } - })?; + let reply: (bool, i32, String, f64) = proxy + .call("Authenticate", &(user,)) + .map_err(|e: zbus::Error| classify_zbus_error("dbus_call_failed", &e))?; let (matched, model_id, label, similarity) = reply; - - // Check for D-Bus error responses encoded in the return value - // model_id == -2 with matched == false signals a daemon error, label contains the error message - if !matched && model_id == -2 { - return Ok(AuthResponse::Error { message: label }); - } - - // model_id == -3 with matched == false signals suppress_unknown - // (no enrolled models, config says to let PAM stack fall through) - if !matched && model_id == -3 { - return Ok(AuthResponse::Suppressed); - } - - if matched { - Ok(AuthResponse::Matched { - similarity: similarity as f32, - }) - } else if similarity == 0.0 && model_id == -1 { - // No enrolled faces - Ok(AuthResponse::NoMatch { similarity: 0.0 }) - } else { - Ok(AuthResponse::NoMatch { - similarity: similarity as f32, - }) - } + Ok(parse_auth_reply(matched, model_id, label, similarity)) } // --------------------------------------------------------------------------- @@ -496,13 +616,55 @@ fn daemon_authenticate(config: &PamConfig, user: &str) -> Result Result { - let config_path = DEFAULT_CONFIG_PATH; - - let content = std::fs::read_to_string(config_path).map_err(|e| format!("read config: {e}"))?; + let content = read_trusted_config(DEFAULT_CONFIG_PATH)?; toml::from_str(&content).map_err(|e| format!("parse config: {e}")) } +/// Read a config file only if it is trustworthy: the file and every parent +/// directory must be root-owned and not group- or world-writable. Anything +/// else is rejected and the module fails closed (falls through to password). +/// +/// The file's own check uses `fstat` on the opened descriptor so the +/// validated inode is exactly the one whose contents are read (no TOCTOU +/// between a path-based stat and the read). +fn read_trusted_config(path: &str) -> Result { + use std::io::Read; + + let mut file = std::fs::File::open(path).map_err(|e| format!("read config: {e}"))?; + + #[cfg(unix)] + { + use std::os::unix::fs::{MetadataExt, PermissionsExt}; + + let metadata = file + .metadata() + .map_err(|e| format!("failed to stat config {path}: {e}"))?; + validate_root_owned_nonwritable( + metadata.uid(), + metadata.permissions().mode(), + &format!("config {path}"), + )?; + + let mut current = Path::new(path).parent(); + while let Some(dir) = current { + let meta = std::fs::metadata(dir) + .map_err(|e| format!("failed to stat config parent {}: {e}", dir.display()))?; + validate_root_owned_nonwritable( + meta.uid(), + meta.permissions().mode(), + &format!("config parent dir {}", dir.display()), + )?; + current = dir.parent(); + } + } + + let mut content = String::new(); + file.read_to_string(&mut content) + .map_err(|e| format!("read config: {e}"))?; + Ok(content) +} + fn validate_auth_bin(path: &str) -> Result { if path.is_empty() { return Err("auth_bin must not be empty".into()); @@ -562,6 +724,25 @@ fn validate_root_owned_nonwritable(uid: u32, mode: u32, label: &str) -> Result<( // One-shot authentication (daemonless) // --------------------------------------------------------------------------- +/// Build the sanitized environment for the spawned oneshot child. +/// +/// Allow-list only: `SSH_CONNECTION`/`SSH_TTY` are forwarded because the +/// child re-runs its own SSH-abort check (`facelock auth`). `PATH` is pinned +/// to trusted system directories. Everything else — `LD_*`, `XDG_*`, +/// `DBUS_*`, etc. — is dropped; the notification path constructs its own +/// session-bus address from the target UID and needs no inherited XDG vars. +fn oneshot_child_env( + parent: impl Iterator, +) -> Vec<(std::ffi::OsString, std::ffi::OsString)> { + const KEEP: &[&str] = &["SSH_CONNECTION", "SSH_TTY"]; + + let mut env: Vec<(std::ffi::OsString, std::ffi::OsString)> = parent + .filter(|(key, _)| KEEP.iter().any(|keep| key.as_os_str() == *keep)) + .collect(); + env.push(("PATH".into(), "/usr/bin:/bin".into())); + env +} + /// Run facelock auth as a subprocess for daemonless authentication. /// Exit codes: 0 = matched, 1 = no match, 2+ = error. fn run_oneshot_auth(service: &str, user: &str, config: &PamConfig) -> libc::c_int { @@ -588,6 +769,12 @@ fn run_oneshot_auth(service: &str, user: &str, config: &PamConfig) -> libc::c_in .arg(user) .arg("--config") .arg(DEFAULT_CONFIG_PATH) + // Sanitize the child environment: the PAM caller may be fully root + // (no AT_SECURE), so inherited LD_PRELOAD/LD_* would be honored by + // the dynamic linker in the spawned root child. Start from an empty + // environment and re-add only the vetted allow-list. + .env_clear() + .envs(oneshot_child_env(std::env::vars_os())) .stdin(std::process::Stdio::null()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::piped()) @@ -658,6 +845,21 @@ fn run_oneshot_auth(service: &str, user: &str, config: &PamConfig) -> libc::c_in // Core authentication logic // --------------------------------------------------------------------------- +/// Map a recoverable daemon error message onto a PAM return code plus a +/// syslog reason. Every branch degrades (PAM_AUTH_ERR / PAM_IGNORE); a +/// daemon error can never map to PAM_SUCCESS. +fn pam_code_for_daemon_error(message: &str) -> (libc::c_int, &'static str) { + if message.contains("rate_limit") || message.contains("rate limited") { + // Deliberate failure, not fall-through: the user has exhausted the + // face-auth budget; the password modules still run after us. + (PAM_AUTH_ERR, "rate_limited") + } else if message.contains("IR camera required") || message.contains("ir_required") { + (PAM_IGNORE, "ir_required") + } else { + (PAM_IGNORE, "error: internal") + } +} + fn identify(pamh: *mut libc::c_void) -> libc::c_int { // 0. Get PAM service name for logging let service = unsafe { pam_get_service(pamh) }.unwrap_or_else(|| "unknown".to_string()); @@ -757,17 +959,13 @@ fn identify(pamh: *mut libc::c_void) -> libc::c_int { PAM_AUTHINFO_UNAVAIL } Ok(AuthResponse::Error { message }) => { - // Map specific daemon errors to appropriate PAM codes - if message.contains("rate_limit") || message.contains("rate limited") { - log_auth(&service, "rate_limited", &user, LOG_WARNING); - PAM_AUTH_ERR - } else if message.contains("IR camera required") || message.contains("ir_required") { - log_auth(&service, "ir_required", &user, LOG_WARNING); - PAM_IGNORE - } else { - log_auth(&service, "error: internal", &user, LOG_WARNING); - PAM_IGNORE - } + // The daemon answered with a recoverable error (model_id == -2). + // It is running and made a decision, so never fall back to a + // fresh root oneshot attempt (which would bypass daemon-side + // state such as rate limiting). Degrade to password instead. + let (code, reason) = pam_code_for_daemon_error(&message); + log_auth(&service, reason, &user, LOG_WARNING); + code } Err(e) => { // D-Bus connection/call failed -- fall back to oneshot mode @@ -946,4 +1144,194 @@ db_path = "/tmp/test.db" .unwrap_err(); assert!(err.contains("owned by root")); } + + // --- Plan 05: oneshot child environment sanitization --- + + fn env_pairs(pairs: &[(&str, &str)]) -> Vec<(std::ffi::OsString, std::ffi::OsString)> { + pairs + .iter() + .map(|(k, v)| (std::ffi::OsString::from(k), std::ffi::OsString::from(v))) + .collect() + } + + #[test] + fn oneshot_child_env_drops_ld_preload_and_keeps_ssh_vars() { + let parent = env_pairs(&[ + ("LD_PRELOAD", "/tmp/evil.so"), + ("LD_LIBRARY_PATH", "/tmp"), + ("SSH_CONNECTION", "192.0.2.1 1111 192.0.2.2 22"), + ("SSH_TTY", "/dev/pts/3"), + ("DBUS_SESSION_BUS_ADDRESS", "unix:path=/tmp/bus"), + ("XDG_RUNTIME_DIR", "/run/user/1000"), + ("PATH", "/tmp/evil:/usr/bin"), + ]); + let child = oneshot_child_env(parent.into_iter()); + + let keys: Vec<&str> = child.iter().map(|(k, _)| k.to_str().unwrap()).collect(); + assert!(!keys.contains(&"LD_PRELOAD")); + assert!(!keys.contains(&"LD_LIBRARY_PATH")); + assert!(!keys.contains(&"DBUS_SESSION_BUS_ADDRESS")); + assert!(!keys.contains(&"XDG_RUNTIME_DIR")); + assert!(keys.contains(&"SSH_CONNECTION")); + assert!(keys.contains(&"SSH_TTY")); + } + + #[test] + fn oneshot_child_env_pins_path() { + let parent = env_pairs(&[("PATH", "/tmp/evil:/usr/bin")]); + let child = oneshot_child_env(parent.into_iter()); + let path = child + .iter() + .find(|(k, _)| k == "PATH") + .map(|(_, v)| v.to_str().unwrap().to_string()) + .unwrap(); + assert_eq!(path, "/usr/bin:/bin"); + } + + #[test] + fn oneshot_child_env_without_ssh_vars_is_path_only() { + let parent = env_pairs(&[("HOME", "/root"), ("TERM", "xterm")]); + let child = oneshot_child_env(parent.into_iter()); + assert_eq!(child.len(), 1); + assert_eq!(child[0].0, "PATH"); + } + + // --- Plan 05: config trust validation --- + + #[test] + fn read_trusted_config_missing_file_is_an_error() { + let err = read_trusted_config("/definitely/missing/facelock-config.toml").unwrap_err(); + assert!(err.contains("read config")); + } + + #[cfg(unix)] + #[test] + fn read_trusted_config_rejects_untrusted_file() { + // Create a config in a tmp dir. When running unprivileged the file is + // not root-owned; when running as root the world-writable tmp parent + // fails the parent-directory check. Either way it must be rejected. + let dir = std::env::temp_dir().join(format!("pam-facelock-test-{}", std::process::id())); + std::fs::create_dir_all(&dir).unwrap(); + let path = dir.join("config.toml"); + std::fs::write(&path, "[daemon]\nmode = \"daemon\"\n").unwrap(); + + let result = read_trusted_config(path.to_str().unwrap()); + std::fs::remove_dir_all(&dir).ok(); + + let err = result.unwrap_err(); + assert!( + err.contains("owned by root") || err.contains("group- or world-writable"), + "unexpected error: {err}" + ); + } + + // --- Plan 05: daemon reply decoding (dead -2 contract made live) --- + + #[test] + fn parse_auth_reply_decodes_recoverable_error() { + match parse_auth_reply(false, -2, "rate limited".into(), 0.0) { + AuthResponse::Error { message } => assert_eq!(message, "rate limited"), + _ => panic!("expected Error"), + } + } + + #[test] + fn parse_auth_reply_decodes_suppressed() { + assert!(matches!( + parse_auth_reply(false, -3, String::new(), 0.0), + AuthResponse::Suppressed + )); + } + + #[test] + fn parse_auth_reply_decodes_match() { + assert!(matches!( + parse_auth_reply(true, 4, "me".into(), 0.93), + AuthResponse::Matched { .. } + )); + } + + #[test] + fn parse_auth_reply_decodes_no_match() { + match parse_auth_reply(false, -1, String::new(), 0.4) { + AuthResponse::NoMatch { similarity } => assert!((similarity - 0.4).abs() < 1e-6), + _ => panic!("expected NoMatch"), + } + } + + #[test] + fn parse_auth_reply_error_sentinel_requires_unmatched() { + // A matched=true reply always wins over sentinel model ids: the + // daemon never sends this, but decoding must not invent an error. + assert!(matches!( + parse_auth_reply(true, -2, "x".into(), 0.9), + AuthResponse::Matched { .. } + )); + } + + // --- Plan 05: daemon error → PAM code routing --- + + #[test] + fn daemon_error_rate_limited_maps_to_auth_err() { + let (code, reason) = pam_code_for_daemon_error("rate limited"); + assert_eq!(code, PAM_AUTH_ERR); + assert_eq!(reason, "rate_limited"); + } + + #[test] + fn daemon_error_ir_required_maps_to_ignore() { + let (code, reason) = + pam_code_for_daemon_error("IR camera required for authentication. ..."); + assert_eq!(code, PAM_IGNORE); + assert_eq!(reason, "ir_required"); + } + + #[test] + fn daemon_error_never_maps_to_success() { + for message in [ + "rate limited", + "IR camera required", + "camera error: device busy", + "storage error: disk io", + "", + ] { + let (code, _) = pam_code_for_daemon_error(message); + assert_ne!(code, PAM_SUCCESS, "fail-open for message: {message}"); + } + } + + // --- Plan 05: structured timeout classification --- + + #[test] + fn timeout_classification_matches_io_timed_out() { + let err = zbus::Error::InputOutput(std::sync::Arc::new(std::io::Error::new( + std::io::ErrorKind::TimedOut, + "timeout waiting for reply", + ))); + assert!(is_timeout_zbus_error(&err)); + } + + #[test] + fn timeout_classification_matches_fdo_variants() { + for fdo_err in [ + zbus::fdo::Error::NoReply("no reply".into()), + zbus::fdo::Error::Timeout("timeout".into()), + zbus::fdo::Error::TimedOut("timed out".into()), + ] { + let err = zbus::Error::FDO(Box::new(fdo_err)); + assert!(is_timeout_zbus_error(&err), "not classified: {err}"); + } + } + + #[test] + fn timeout_classification_rejects_other_errors() { + assert!(!is_timeout_zbus_error(&zbus::Error::Failure( + "something timed out-ish but is not a timeout variant".into() + ))); + let io_err = zbus::Error::InputOutput(std::sync::Arc::new(std::io::Error::new( + std::io::ErrorKind::ConnectionRefused, + "refused", + ))); + assert!(!is_timeout_zbus_error(&io_err)); + } } diff --git a/docs/contracts.md b/docs/contracts.md index 4839e43..cf8c88a 100644 --- a/docs/contracts.md +++ b/docs/contracts.md @@ -145,16 +145,52 @@ Method authorization contract: ### Response types `AuthResult`, `Enrolled`, `Models`, `Removed`, `Frame`, `DetectFrame`, `Devices`, `Ok`, `Error` +### Authenticate error encoding + +`Authenticate` returns `AuthResult (matched: b, model_id: i, label: s, similarity: d)`. +Sentinel `model_id` values (only meaningful with `matched == false`): + +| model_id | Meaning | +|----------|---------| +| >= 0 | Matched model id (with `matched == true`) | +| -1 | No match / no enrolled faces | +| -2 | Recoverable daemon error; `label` carries the error message (rate limited, IR required, camera/storage failure) | +| -3 | Suppressed: no enrolled models and `security.suppress_unknown = true` | + +Recoverable errors travel **in-band** (model_id `-2`), not as D-Bus errors, so +clients can distinguish "the daemon decided auth cannot proceed" from "the +daemon is unavailable". D-Bus errors remain for authorization failures, +daemon-busy, and transport problems. In particular, a rate-limited state is a +daemon decision and must never make the PAM client retry via a root oneshot. + +### Daemon peer verification (PAM client) + +Before trusting an `Authenticate` reply, the PAM module resolves the owner of +`org.facelock.Daemon` (`GetNameOwner`, activating the service first if +needed), requires the owner UID to be 0 (`GetConnectionUnixUser`), and pins +the method call to the owner's unique bus name. A non-root owner is refused: +the module falls through (oneshot fallback / password), never `PAM_SUCCESS`. + ## PAM Semantics | Outcome | PAM Code | |---------|----------| | Face matched | `PAM_SUCCESS` (0) | | No match | `PAM_AUTH_ERR` (7) | -| Daemon unavailable / error | `PAM_IGNORE` (25) | -| Timeout | `PAM_AUTH_ERR` (7) | - -PAM module never blocks indefinitely. All operations have timeouts. +| Rate limited (daemon, model_id -2) | `PAM_AUTH_ERR` (7) — no oneshot fallback | +| IR required / internal daemon error (model_id -2) | `PAM_IGNORE` (25) — no oneshot fallback | +| Suppressed (model_id -3) | `PAM_AUTHINFO_UNAVAIL` (9) | +| Daemon unavailable / untrusted (non-root) peer | oneshot fallback, else `PAM_IGNORE` (25) | +| Config missing, unparseable, or untrusted (not root-owned / group- or world-writable, incl. parents) | `PAM_IGNORE` (25) | +| Timeout (structured zbus timeout or overall deadline) | `PAM_AUTH_ERR` (7) | + +PAM module never blocks indefinitely. All operations have timeouts, including +D-Bus connection establishment (overall deadline on a worker thread). + +The oneshot fallback spawns `facelock auth` with a sanitized environment: +`env_clear()` plus an allow-list of `SSH_CONNECTION`, `SSH_TTY`, and a pinned +`PATH=/usr/bin:/bin`. No other variables (`LD_*`, `XDG_*`, `DBUS_*`, ...) are +inherited. Stdin is `/dev/null`. ### Syslog Format diff --git a/docs/security.md b/docs/security.md index 1fcd844..a0f6351 100644 --- a/docs/security.md +++ b/docs/security.md @@ -200,6 +200,31 @@ The daemon must also verify the caller UID via `GetConnectionUnixUser` on every - `ReleaseCamera`: root or the Unix user that owns the active preview camera session - `ListDevices`: root or a caller in the `facelock` group +#### A2. PAM Peer-UID Verification (Required) + +The trust check runs in both directions: before trusting an `Authenticate` +reply, the PAM module resolves the owner of `org.facelock.Daemon` via +`GetNameOwner`, verifies that owner's UID is 0 via `GetConnectionUnixUser`, +and pins the method call to the owner's *unique* bus name so the owner cannot +change between check and call. If the name is owned by a non-root process +(e.g. because the bus policy file was misconfigured or replaced), the module +refuses the reply and degrades — it never returns `PAM_SUCCESS` on the word +of an unverified peer. This removes the single point of failure on the +`org.facelock.Daemon.conf` policy file. + +#### A3. In-Band Recoverable Error Encoding (Required) + +Recoverable authentication errors (rate limited, IR required, camera or +storage failure) are returned in-band as `AuthResult { model_id: -2, label: + }` rather than as D-Bus errors. A D-Bus error reply is +indistinguishable from "daemon broken", which would make the PAM module fall +back to a fresh root oneshot attempt — silently escalating past daemon-side +state such as the rate-limit window. With in-band encoding the PAM module +classifies the error itself: rate limited maps to `PAM_AUTH_ERR` (face-auth +budget exhausted, password modules still run), everything else to +`PAM_IGNORE`. D-Bus errors remain reserved for authorization failures and +transport-level problems, which do fall back to the oneshot path. + #### B. D-Bus Message Size Limits (Enforced by Bus) The D-Bus bus daemon enforces message size limits (typically 128MB by default, configurable in the bus configuration). This prevents oversized messages from consuming daemon memory without requiring application-level size checks. @@ -228,6 +253,33 @@ Implementation note: ### 5. PAM Module Hardening +#### A0. Config File Trust (Required) + +The PAM module runs in a root context, so `/etc/facelock/config.toml` is an +attack vector: a writable config could redirect `auth_bin`, disable +anti-spoofing knobs, or change the daemon mode. Before parsing, the module +verifies that the config file **and every parent directory** are root-owned +and not group- or world-writable. The file check uses `fstat` on the opened +descriptor so the validated inode is exactly the one read (no TOCTOU). An +untrusted config is treated like a missing config: the module logs the +reason and returns `PAM_IGNORE` (fail closed, fall through to password). + +#### A1. Sanitized Oneshot Child Environment (Required) + +The oneshot fallback spawns `facelock auth` as root. When the PAM caller is +fully root (euid == uid, no `AT_SECURE`), the dynamic linker honors +`LD_PRELOAD`/`LD_*` from the inherited environment — allowing arbitrary code +injection into a root process. The module therefore spawns the child with +`env_clear()` and an allow-list: + +- `SSH_CONNECTION` / `SSH_TTY` — forwarded so the child's own SSH-abort + check keeps working +- `PATH=/usr/bin:/bin` — pinned, never inherited + +Everything else (`LD_*`, `XDG_*`, `DBUS_*`, ...) is dropped. The desktop +notification path constructs its own session-bus address from the target UID +and does not need inherited XDG variables. The child's stdin is `/dev/null`. + #### A. Audit Logging (Required) Log all authentication attempts with outcomes: diff --git a/test/Containerfile b/test/Containerfile index e833130..385d973 100644 --- a/test/Containerfile +++ b/test/Containerfile @@ -2,7 +2,7 @@ FROM archlinux:latest # Install dependencies -RUN pacman -Syu --noconfirm pam sudo base-devel libxkbcommon just dbus onnxruntime-cpu protobuf && pacman -Scc --noconfirm +RUN pacman -Syu --noconfirm pam sudo base-devel libxkbcommon just dbus onnxruntime-cpu protobuf python python-gobject sqlite && pacman -Scc --noconfirm # Build pamtester from upstream source RUN curl -sL https://sourceforge.net/projects/pamtester/files/pamtester/0.1.2/pamtester-0.1.2.tar.gz/download | tar xz -C /tmp && \ @@ -39,6 +39,9 @@ COPY test/container-config.toml /etc/facelock/config.toml # Facelock-test PAM config for pamtester COPY test/pam.d/facelock-test /etc/pam.d/facelock-test +# Fake daemon harness for the PAM peer-UID container test (plan 05) +COPY test/fake-facelock-daemon.py /fake-facelock-daemon.py + # Copy test scripts COPY test/run-container-tests.sh /run-tests.sh COPY test/run-integration-tests.sh /run-integration-tests.sh diff --git a/test/fake-facelock-daemon.py b/test/fake-facelock-daemon.py new file mode 100644 index 0000000..8401d0e --- /dev/null +++ b/test/fake-facelock-daemon.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +"""Fake org.facelock.Daemon that always replies matched=true. + +Container-test harness for the PAM peer-UID check (plan 05): it is run as an +unprivileged user (with a deliberately loosened test bus policy) to prove +that pam_facelock refuses to trust an Authenticate reply from a bus-name +owner that is not running as root — even when that reply claims a match. +""" + +import gi + +gi.require_version("Gio", "2.0") +from gi.repository import Gio, GLib # noqa: E402 + +INTROSPECTION_XML = """ + + + + + + + + +""" + + +def on_method_call(_conn, _sender, _path, _iface, method, _params, invocation): + if method == "Authenticate": + # matched=true, model_id=1, label="fake", similarity=0.999 + invocation.return_value( + GLib.Variant("((bisd))", ((True, 1, "fake", 0.999),)) + ) + else: + invocation.return_dbus_error( + "org.freedesktop.DBus.Error.UnknownMethod", f"no such method: {method}" + ) + + +def main(): + node = Gio.DBusNodeInfo.new_for_xml(INTROSPECTION_XML) + conn = Gio.bus_get_sync(Gio.BusType.SYSTEM, None) + conn.register_object( + "/org/facelock/Daemon", node.interfaces[0], on_method_call, None, None + ) + Gio.bus_own_name_on_connection( + conn, "org.facelock.Daemon", Gio.BusNameOwnerFlags.NONE, None, None + ) + print("fake facelock daemon ready", flush=True) + GLib.MainLoop().run() + + +if __name__ == "__main__": + main() diff --git a/test/run-container-tests.sh b/test/run-container-tests.sh index 423e75b..93d9e8b 100755 --- a/test/run-container-tests.sh +++ b/test/run-container-tests.sh @@ -101,6 +101,104 @@ run_test "Oneshot mode: no enrolled faces returns quickly" \ "sed -i '/^\[daemon\]/a mode = \"oneshot\"' /etc/facelock/config.toml; timeout 3 pamtester facelock-test testuser authenticate 2>&1; rc=\$?; sed -i '/^mode = \"oneshot\"/d' /etc/facelock/config.toml; test \$rc -ne 124" \ 0 +# --- Plan 05: PAM trust-boundary hardening (all camera-free) --- + +# (a) A group/world-writable config must be rejected: the module ignores it +# and fails closed (PAM_IGNORE -> pam_deny). The 'Identifying face' prompt +# only appears once the config is accepted, so its absence plus an auth +# failure proves the module rejected the file instead of trusting it. +run_test "Group-writable config rejected, fails closed" \ + "chmod 664 /etc/facelock/config.toml; pamtester facelock-test testuser authenticate < /dev/null > /tmp/gw-out 2>&1; rc=\$?; chmod 644 /etc/facelock/config.toml; test \$rc -ne 0 && ! grep -q 'Identifying face' /tmp/gw-out" \ + 0 + +run_test "World-writable config rejected, fails closed" \ + "chmod 666 /etc/facelock/config.toml; pamtester facelock-test testuser authenticate < /dev/null > /tmp/ww-out 2>&1; rc=\$?; chmod 644 /etc/facelock/config.toml; test \$rc -ne 0 && ! grep -q 'Identifying face' /tmp/ww-out" \ + 0 + +run_test "Config accepted again after restoring 644" \ + "pamtester facelock-test testuser authenticate 2>&1 | grep -q 'Identifying face'" \ + 0 + +# (b) env_clear: LD_PRELOAD must never reach the spawned oneshot child while +# SSH_CONNECTION must survive. A constructor-marker .so logs every process it +# is loaded into; a root-owned capture stub stands in for the auth binary so +# the exact child environment can be asserted. +cat > /tmp/preload-marker.c <<'EOF' +#define _GNU_SOURCE +#include +#include +__attribute__((constructor)) static void mark(void) { + char exe[512] = {0}; + ssize_t n = readlink("/proc/self/exe", exe, sizeof(exe) - 1); + FILE *f = fopen("/tmp/preload-log", "a"); + if (f) { fprintf(f, "%s\n", n > 0 ? exe : "?"); fclose(f); } +} +EOF +gcc -shared -fPIC -o /tmp/preload-marker.so /tmp/preload-marker.c +printf '#!/bin/bash\nenv > /tmp/oneshot-child-env\nexit 2\n' > /usr/local/bin/facelock-env-capture +chmod 755 /usr/local/bin/facelock-env-capture +rm -f /tmp/preload-log /tmp/oneshot-child-env + +sed -i '/^\[daemon\]/a mode = "oneshot"\nauth_bin = "/usr/local/bin/facelock-env-capture"' /etc/facelock/config.toml +sed -i '/^\[security\]/a abort_if_ssh = false' /etc/facelock/config.toml +env LD_PRELOAD=/tmp/preload-marker.so SSH_CONNECTION='192.0.2.1 1111 192.0.2.2 22' \ + pamtester facelock-test testuser authenticate < /dev/null > /dev/null 2>&1 || true +sed -i '/^mode = "oneshot"/d;/^auth_bin = /d;/^abort_if_ssh = false/d' /etc/facelock/config.toml + +run_test "env_clear: marker was active in the PAM process" \ + "grep -q pamtester /tmp/preload-log" \ + 0 + +run_test "env_clear: LD_PRELOAD marker not loaded by oneshot child" \ + "test -f /tmp/oneshot-child-env && ! grep -q '^LD_PRELOAD=' /tmp/oneshot-child-env && ! grep -q bash /tmp/preload-log" \ + 0 + +run_test "env_clear: SSH_CONNECTION survives to oneshot child" \ + "grep -q '^SSH_CONNECTION=192.0.2.1' /tmp/oneshot-child-env" \ + 0 + +run_test "env_clear: oneshot child PATH pinned to /usr/bin:/bin" \ + "grep -qx 'PATH=/usr/bin:/bin' /tmp/oneshot-child-env" \ + 0 + +# (c) Peer-UID check: a non-root process owning org.facelock.Daemon and +# replying matched=true must never produce PAM_SUCCESS. A deliberately +# loosened bus policy simulates a broken/compromised policy file. +cat > /usr/share/dbus-1/system.d/zz-facelock-peer-test.conf <<'EOF' + + + + + + + + + +EOF +mkdir -p /run/dbus +dbus-uuidgen --ensure=/etc/machine-id > /dev/null 2>&1 || true +dbus-daemon --system --fork --nopidfile +runuser -u testuser -- python3 /fake-facelock-daemon.py > /tmp/fake-daemon.log 2>&1 & +FAKE_PID=$! +for _ in $(seq 1 40); do + dbus-send --system --print-reply --dest=org.freedesktop.DBus \ + /org/freedesktop/DBus org.freedesktop.DBus.NameHasOwner \ + string:org.facelock.Daemon 2>/dev/null | grep -q 'boolean true' && break + sleep 0.25 +done + +run_test "Peer-UID harness: fake non-root daemon replies matched=true" \ + "dbus-send --system --print-reply --dest=org.facelock.Daemon /org/facelock/Daemon org.facelock.Daemon.Authenticate string:testuser | grep -q 'boolean true'" \ + 0 + +run_test "Peer-UID: non-root daemon owner yields no PAM_SUCCESS" \ + "! timeout 15 pamtester facelock-test testuser authenticate < /dev/null" \ + 0 + +kill "$FAKE_PID" 2>/dev/null || true +rm -f /usr/share/dbus-1/system.d/zz-facelock-peer-test.conf + echo "" echo "=== Results: $PASS passed, $FAIL failed ===" diff --git a/test/run-integration-tests.sh b/test/run-integration-tests.sh index 903df87..f3e93ca 100755 --- a/test/run-integration-tests.sh +++ b/test/run-integration-tests.sh @@ -127,6 +127,27 @@ run_test_contains "Authenticate enrolled face (CLI)" \ run_test "Authenticate enrolled face (PAM)" \ "timeout --foreground $LIVE_TIMEOUT pamtester facelock-test testuser authenticate" +# --- Plan 05: rate-limited daemon state must never escalate to a fresh oneshot --- + +# Force a rate-limited state by inserting failed attempts directly into the +# shared SQLite window (default: 5 attempts / 60s). Requires enrolled models +# (rate limiting is checked after the has-models pre-check). +run_test "Rate limit: seed failed attempts" \ + "sqlite3 /tmp/facelock-test.db \"INSERT INTO rate_limit (user, attempt_time) SELECT 'testuser', strftime('%s','now') FROM (VALUES (1),(2),(3),(4),(5),(6));\"" + +run_test_contains "Rate limit: daemon encodes recoverable error in-band (model_id=-2)" \ + "dbus-send --system --print-reply --dest=org.facelock.Daemon /org/facelock/Daemon org.facelock.Daemon.Authenticate string:testuser" \ + "int32 -2" + +# With the in-band encoding the PAM module classifies the error itself +# (rate limited -> PAM_AUTH_ERR) instead of retrying as a root oneshot. +# A marker auth_bin proves no oneshot child is ever spawned. +run_test "Rate limit: PAM fails without oneshot escalation" \ + "printf '#!/bin/bash\ntouch /tmp/oneshot-invoked\nexit 2\n' > /usr/local/bin/oneshot-marker && chmod 755 /usr/local/bin/oneshot-marker && sed -i '/^\[daemon\]/a auth_bin = \"/usr/local/bin/oneshot-marker\"' /etc/facelock/config.toml && rm -f /tmp/oneshot-invoked; timeout 30 pamtester facelock-test testuser authenticate < /dev/null; rc=\$?; sed -i '/^auth_bin = /d' /etc/facelock/config.toml; test \$rc -ne 0 && test ! -f /tmp/oneshot-invoked" + +run_test "Rate limit: clear seeded attempts" \ + "sqlite3 /tmp/facelock-test.db \"DELETE FROM rate_limit WHERE user = 'testuser';\"" + # Clean up run_test "Clear enrolled models" \ "facelock clear --user testuser --yes" From f6888da2ea3135e62ed80289442cdb2583b8c489 Mon Sep 17 00:00:00 2001 From: Ty Smith Date: Fri, 3 Jul 2026 15:50:06 -0700 Subject: [PATCH 2/2] test: resolve actual db_path for integration rate-limit seeding The container config has no [storage] section, so the daemon uses the default /var/lib/facelock/facelock.db; derive the path from the config instead of hardcoding /tmp/facelock-test.db. Co-Authored-By: Claude Fable 5 --- test/run-integration-tests.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/run-integration-tests.sh b/test/run-integration-tests.sh index f3e93ca..5293f9a 100755 --- a/test/run-integration-tests.sh +++ b/test/run-integration-tests.sh @@ -129,11 +129,15 @@ run_test "Authenticate enrolled face (PAM)" \ # --- Plan 05: rate-limited daemon state must never escalate to a fresh oneshot --- +# Resolve the database the daemon actually uses (config db_path or default). +FACELOCK_DB="$(sed -n 's/^[[:space:]]*db_path[[:space:]]*=[[:space:]]*"\(.*\)"/\1/p' /etc/facelock/config.toml | head -1)" +FACELOCK_DB="${FACELOCK_DB:-/var/lib/facelock/facelock.db}" + # Force a rate-limited state by inserting failed attempts directly into the # shared SQLite window (default: 5 attempts / 60s). Requires enrolled models # (rate limiting is checked after the has-models pre-check). run_test "Rate limit: seed failed attempts" \ - "sqlite3 /tmp/facelock-test.db \"INSERT INTO rate_limit (user, attempt_time) SELECT 'testuser', strftime('%s','now') FROM (VALUES (1),(2),(3),(4),(5),(6));\"" + "sqlite3 $FACELOCK_DB \"INSERT INTO rate_limit (user, attempt_time) SELECT 'testuser', strftime('%s','now') FROM (VALUES (1),(2),(3),(4),(5),(6));\"" run_test_contains "Rate limit: daemon encodes recoverable error in-band (model_id=-2)" \ "dbus-send --system --print-reply --dest=org.facelock.Daemon /org/facelock/Daemon org.facelock.Daemon.Authenticate string:testuser" \ @@ -146,7 +150,7 @@ run_test "Rate limit: PAM fails without oneshot escalation" \ "printf '#!/bin/bash\ntouch /tmp/oneshot-invoked\nexit 2\n' > /usr/local/bin/oneshot-marker && chmod 755 /usr/local/bin/oneshot-marker && sed -i '/^\[daemon\]/a auth_bin = \"/usr/local/bin/oneshot-marker\"' /etc/facelock/config.toml && rm -f /tmp/oneshot-invoked; timeout 30 pamtester facelock-test testuser authenticate < /dev/null; rc=\$?; sed -i '/^auth_bin = /d' /etc/facelock/config.toml; test \$rc -ne 0 && test ! -f /tmp/oneshot-invoked" run_test "Rate limit: clear seeded attempts" \ - "sqlite3 /tmp/facelock-test.db \"DELETE FROM rate_limit WHERE user = 'testuser';\"" + "sqlite3 $FACELOCK_DB \"DELETE FROM rate_limit WHERE user = 'testuser';\"" # Clean up run_test "Clear enrolled models" \