From 70ba62cddd80388eb7d37759dd912a903d5a0f4e Mon Sep 17 00:00:00 2001 From: Junha Park <0xjunha@gmail.com> Date: Mon, 11 May 2026 21:15:41 +0900 Subject: [PATCH 1/3] fix(cli): harden auto-refresh service --- crates/cli/src/refresh.rs | 267 +++++++++++++++++++++- crates/cli/src/service/macos.rs | 165 +++++++++++++- crates/cli/src/tests/service_watch.rs | 313 +++++++++++++++++++++++++- 3 files changed, 728 insertions(+), 17 deletions(-) diff --git a/crates/cli/src/refresh.rs b/crates/cli/src/refresh.rs index 5282cf0..59b4b97 100644 --- a/crates/cli/src/refresh.rs +++ b/crates/cli/src/refresh.rs @@ -1,10 +1,10 @@ use std::{ env, fs, fs::{File, OpenOptions}, - io::{self, IsTerminal, Write}, + io::{self, IsTerminal, Seek, SeekFrom, Write}, path::{Path, PathBuf}, sync::mpsc, - time::{Duration, Instant}, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; use anyhow::{Context, Result, bail}; @@ -15,7 +15,8 @@ use darc_core::{ }; use darc_paths::current_utc_timestamp; use fs2::FileExt; -use serde::Serialize; +use serde::{Deserialize, Serialize}; +use serde_json::Value as JsonValue; use crate::args::{ProviderArg, RefreshArgs}; use crate::output::{HumanStyle, print_field, print_line, print_project_warning, print_section}; @@ -132,6 +133,7 @@ impl RefreshProgressPrinter { pub(crate) const DEFAULT_WATCH_DEBOUNCE: Duration = Duration::from_secs(30); pub(crate) const DEFAULT_WATCH_MIN_INTERVAL: Duration = Duration::from_secs(60); pub(crate) const DEFAULT_WATCH_RECONCILE_INTERVAL: Duration = Duration::from_secs(600); +pub(crate) const REFRESH_LOCK_SCHEMA: &str = "darc.refresh.lock.v1"; /// Stores one parsed refresh invocation for one-shot and watch modes. #[derive(Debug, Clone)] @@ -164,6 +166,7 @@ pub(crate) struct WatchSettings { /// Stores the latest foreground or service refresh state. #[derive(Debug, Default, Clone)] pub(crate) struct WatchState { + pub(crate) watch_identity: Option, pub(crate) last_event_at: Option, pub(crate) last_refresh_reason: Option, pub(crate) last_refresh_started_at: Option, @@ -180,6 +183,8 @@ pub(crate) struct WatchStatus<'a> { pub(crate) root: String, pub(crate) mode: &'a str, pub(crate) running: bool, + pub(crate) watch_pid: Option, + pub(crate) watch_token: Option<&'a str>, pub(crate) debounce: Option, pub(crate) min_interval: Option, pub(crate) reconcile_interval: Option, @@ -192,6 +197,68 @@ pub(crate) struct WatchStatus<'a> { pub(crate) last_error: Option<&'a str>, } +/// Identifies one running watch loop instance in the status file. +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +pub(crate) struct WatchIdentity { + pub(crate) pid: u32, + pub(crate) token: String, +} + +impl WatchIdentity { + /// Builds identity metadata for the current watch process. + pub(crate) fn current() -> Self { + let pid = std::process::id(); + let nonce = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + Self { + pid, + token: format!("{pid}:{nonce}"), + } + } + + /// Returns whether a status file still belongs to this watch instance. + pub(crate) fn matches_status(&self, status: &JsonValue) -> bool { + status + .get("watch_pid") + .and_then(JsonValue::as_u64) + .is_some_and(|pid| pid == u64::from(self.pid)) + && status + .get("watch_token") + .and_then(JsonValue::as_str) + .is_some_and(|token| token == self.token.as_str()) + } +} + +/// Stores the active refresh lock holder for diagnostics. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub(crate) struct RefreshLockInfo { + pub(crate) schema: String, + pub(crate) pid: u32, + pub(crate) started_at: String, +} + +impl RefreshLockInfo { + /// Builds lock metadata for the current process. + pub(crate) fn current() -> Self { + Self { + schema: REFRESH_LOCK_SCHEMA.to_owned(), + pid: std::process::id(), + started_at: current_utc_timestamp(), + } + } +} + +/// Describes whether the refresh lock file is currently held. +#[cfg(any(target_os = "macos", test))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum RefreshLockSnapshot { + Missing, + Available { stale_info: Option }, + Held { holder: Option }, +} + /// Holds an advisory refresh lock until dropped. pub(crate) struct RefreshLock { pub(crate) file: File, @@ -199,10 +266,30 @@ pub(crate) struct RefreshLock { impl Drop for RefreshLock { fn drop(&mut self) { + let _ = clear_refresh_lock_info(&mut self.file); let _ = self.file.unlock(); } } +/// Marks watch status as stopped when the foreground loop exits. +pub(crate) struct WatchStatusGuard { + pub(crate) root: PathBuf, + pub(crate) identity: WatchIdentity, +} + +impl WatchStatusGuard { + /// Builds one status guard for a running watch loop. + pub(crate) fn new(root: PathBuf, identity: WatchIdentity) -> Self { + Self { root, identity } + } +} + +impl Drop for WatchStatusGuard { + fn drop(&mut self) { + let _ = mark_watch_status_stopped_if_current(&self.root, &self.identity); + } +} + /// Represents filesystem watcher notifications consumed by the refresh loop. #[cfg_attr(not(target_os = "macos"), allow(dead_code))] pub(crate) enum WatchSignal { @@ -356,7 +443,11 @@ pub(crate) fn run_refresh_watch( ); } - let mut state = WatchState::default(); + let watch_identity = WatchIdentity::current(); + let mut state = WatchState { + watch_identity: Some(watch_identity.clone()), + ..WatchState::default() + }; write_watch_status( &request.root, &state, @@ -364,6 +455,7 @@ pub(crate) fn run_refresh_watch( "refresh-watch", Some(&settings), )?; + let _status_guard = WatchStatusGuard::new(request.root.clone(), watch_identity); run_refresh_cycle(&request, &mut state, &settings, "initial")?; let mut dirty_since: Option = None; @@ -372,8 +464,7 @@ pub(crate) fn run_refresh_watch( let timeout = watch_loop_timeout(dirty_since, last_refresh_at, &settings); match rx.recv_timeout(timeout) { Ok(WatchSignal::Changed) => { - state.last_event_at = Some(current_utc_timestamp()); - dirty_since.get_or_insert_with(Instant::now); + record_watch_change(&mut state, &mut dirty_since, Instant::now()); write_watch_status( &request.root, &state, @@ -401,6 +492,16 @@ pub(crate) fn run_refresh_watch( } } +/// Records one filesystem change and restarts the debounce quiet period. +pub(crate) fn record_watch_change( + state: &mut WatchState, + dirty_since: &mut Option, + now: Instant, +) { + state.last_event_at = Some(current_utc_timestamp()); + *dirty_since = Some(now); +} + /// Runs one watched refresh cycle and records status without terminating on refresh failure. pub(crate) fn run_refresh_cycle( request: &RefreshRunRequest, @@ -655,7 +756,7 @@ pub(crate) fn acquire_refresh_lock(root: &Path) -> Result { fs::create_dir_all(&run_dir) .with_context(|| format!("failed to create {}", run_dir.display()))?; let lock_path = run_dir.join("refresh.lock"); - let file = OpenOptions::new() + let mut file = OpenOptions::new() .read(true) .write(true) .create(true) @@ -663,14 +764,79 @@ pub(crate) fn acquire_refresh_lock(root: &Path) -> Result { .open(&lock_path) .with_context(|| format!("failed to open {}", lock_path.display()))?; file.try_lock_exclusive().with_context(|| { + let holder = read_refresh_lock_info(&lock_path) + .ok() + .flatten() + .map(|info| format!(" by process {} since {}", info.pid, info.started_at)) + .unwrap_or_default(); format!( - "another Darc refresh is already running ({})", + "another Darc refresh is already running{holder} ({})", lock_path.display() ) })?; + write_refresh_lock_info(&mut file, &RefreshLockInfo::current())?; Ok(RefreshLock { file }) } +/// Inspects the shared refresh lock without taking ownership of it. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn inspect_refresh_lock(root: &Path) -> Result { + let lock_path = root.join("run/refresh.lock"); + if !lock_path.exists() { + return Ok(RefreshLockSnapshot::Missing); + } + let file = OpenOptions::new() + .read(true) + .write(true) + .open(&lock_path) + .with_context(|| format!("failed to open {}", lock_path.display()))?; + let info = read_refresh_lock_info(&lock_path)?; + match file.try_lock_exclusive() { + Ok(()) => { + file.unlock() + .with_context(|| format!("failed to unlock {}", lock_path.display()))?; + Ok(RefreshLockSnapshot::Available { stale_info: info }) + } + Err(error) if error.kind() == io::ErrorKind::WouldBlock => { + Ok(RefreshLockSnapshot::Held { holder: info }) + } + Err(error) => Err(error) + .with_context(|| format!("failed to inspect refresh lock {}", lock_path.display())), + } +} + +/// Writes refresh lock holder metadata into an acquired lock file. +pub(crate) fn write_refresh_lock_info(file: &mut File, info: &RefreshLockInfo) -> Result<()> { + file.set_len(0).context("failed to truncate refresh lock")?; + file.seek(SeekFrom::Start(0)) + .context("failed to rewind refresh lock")?; + serde_json::to_writer_pretty(&mut *file, info).context("failed to serialize refresh lock")?; + file.write_all(b"\n") + .context("failed to write refresh lock newline")?; + file.flush().context("failed to flush refresh lock") +} + +/// Reads refresh lock holder metadata when the lock file contains it. +pub(crate) fn read_refresh_lock_info(lock_path: &Path) -> Result> { + let content = fs::read_to_string(lock_path) + .with_context(|| format!("failed to read {}", lock_path.display()))?; + if content.trim().is_empty() { + return Ok(None); + } + let Ok(info) = serde_json::from_str::(&content) else { + return Ok(None); + }; + Ok((info.schema == REFRESH_LOCK_SCHEMA).then_some(info)) +} + +/// Clears refresh lock holder metadata before releasing the lock. +pub(crate) fn clear_refresh_lock_info(file: &mut File) -> Result<()> { + file.set_len(0).context("failed to clear refresh lock")?; + file.seek(SeekFrom::Start(0)) + .context("failed to rewind refresh lock")?; + file.flush().context("failed to flush refresh lock") +} + /// Writes the current continuous refresh status JSON. pub(crate) fn write_watch_status( root: &Path, @@ -682,12 +848,15 @@ pub(crate) fn write_watch_status( let run_dir = root.join("run"); fs::create_dir_all(&run_dir) .with_context(|| format!("failed to create {}", run_dir.display()))?; + let watch_identity = state.watch_identity.as_ref(); let status = WatchStatus { schema: "darc.watch.status.v1", generated_at: current_utc_timestamp(), root: root.display().to_string(), mode, running, + watch_pid: watch_identity.map(|identity| identity.pid), + watch_token: watch_identity.map(|identity| identity.token.as_str()), debounce: settings.map(|settings| format_duration(settings.debounce)), min_interval: settings.map(|settings| format_duration(settings.min_interval)), reconcile_interval: settings.map(|settings| format_duration(settings.reconcile_interval)), @@ -705,6 +874,88 @@ pub(crate) fn write_watch_status( .with_context(|| format!("failed to write {}", status_path.display())) } +/// Marks an existing watch status file as stopped while preserving its last refresh details. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn mark_watch_status_stopped(root: &Path) -> Result<()> { + mark_watch_status_stopped_matching(root, None) +} + +/// Marks watch status stopped only when it still belongs to this watch instance. +pub(crate) fn mark_watch_status_stopped_if_current( + root: &Path, + identity: &WatchIdentity, +) -> Result<()> { + mark_watch_status_stopped_matching(root, Some(identity)) +} + +/// Marks watch status stopped after optionally checking status ownership. +pub(crate) fn mark_watch_status_stopped_matching( + root: &Path, + expected_identity: Option<&WatchIdentity>, +) -> Result<()> { + let run_dir = root.join("run"); + fs::create_dir_all(&run_dir) + .with_context(|| format!("failed to create {}", run_dir.display()))?; + let status_path = run_dir.join("status.json"); + let mut status = stopped_watch_status_value(root, &status_path)?; + if expected_identity.is_some_and(|identity| !identity.matches_status(&status)) { + return Ok(()); + } + let object = status.as_object_mut().expect("stopped status is an object"); + object.insert( + "generated_at".to_owned(), + JsonValue::String(current_utc_timestamp()), + ); + object.insert("running".to_owned(), JsonValue::Bool(false)); + let content = serde_json::to_vec_pretty(&status).context("failed to serialize watch status")?; + fs::write(&status_path, content) + .with_context(|| format!("failed to write {}", status_path.display())) +} + +/// Returns the status object to update when marking a watch stopped. +pub(crate) fn stopped_watch_status_value(root: &Path, status_path: &Path) -> Result { + let content = match fs::read_to_string(status_path) { + Ok(content) => content, + Err(error) if error.kind() == io::ErrorKind::NotFound => { + return Ok(minimal_stopped_watch_status(root)); + } + Err(error) => { + return Err(error).with_context(|| format!("failed to read {}", status_path.display())); + } + }; + let Ok(status) = serde_json::from_str::(&content) else { + return Ok(minimal_stopped_watch_status(root)); + }; + if status.is_object() { + Ok(status) + } else { + Ok(minimal_stopped_watch_status(root)) + } +} + +/// Builds a minimal stopped watch status for missing or malformed status files. +pub(crate) fn minimal_stopped_watch_status(root: &Path) -> JsonValue { + serde_json::json!({ + "schema": "darc.watch.status.v1", + "generated_at": current_utc_timestamp(), + "root": root.display().to_string(), + "mode": "refresh-watch", + "running": false, + "watch_pid": null, + "watch_token": null, + "debounce": null, + "min_interval": null, + "reconcile_interval": null, + "poll": null, + "last_event_at": null, + "last_refresh_reason": null, + "last_refresh_started_at": null, + "last_refresh_completed_at": null, + "last_refresh_succeeded": null, + "last_error": null, + }) +} + /// Installs native macOS watchers for the selected paths. #[cfg(target_os = "macos")] pub(crate) fn install_native_watchers( diff --git a/crates/cli/src/service/macos.rs b/crates/cli/src/service/macos.rs index bb9eeea..3e5db3a 100644 --- a/crates/cli/src/service/macos.rs +++ b/crates/cli/src/service/macos.rs @@ -7,14 +7,14 @@ use std::{ io::{self, IsTerminal}, path::PathBuf, process::Command, - time::{Duration, Instant}, + time::{Duration, Instant, SystemTime}, }; +#[cfg(any(target_os = "macos", test))] +use anyhow::Result; #[cfg(target_os = "macos")] -use anyhow::Context; -#[cfg(target_os = "macos")] -use anyhow::{Result, bail}; -#[cfg(target_os = "macos")] +use anyhow::{Context, bail}; +#[cfg(any(target_os = "macos", test))] use serde_json::Value as JsonValue; #[cfg(target_os = "macos")] @@ -25,6 +25,13 @@ use crate::args::ServiceCommands; use crate::output::HumanStyle; #[cfg(target_os = "macos")] use crate::output::{print_field, print_line, print_section}; +#[cfg(target_os = "macos")] +use crate::refresh::inspect_refresh_lock; +#[cfg(any(target_os = "macos", test))] +use crate::refresh::{ + DEFAULT_WATCH_RECONCILE_INTERVAL, RefreshLockInfo, RefreshLockSnapshot, + mark_watch_status_stopped, parse_duration, +}; /// Renders automatic background refresh setup progress for interactive terminals. #[cfg(any(target_os = "macos", test))] @@ -116,7 +123,7 @@ pub(crate) fn run_refresh_auto(root: &Path) -> Result<()> { Ok(()) } -#[cfg(target_os = "macos")] +#[cfg(any(target_os = "macos", test))] pub(crate) const MACOS_SERVICE_LABEL: &str = "com.0xjunha.darc.refresh"; #[cfg(target_os = "macos")] pub(crate) const MACOS_SERVICE_UNLOAD_TIMEOUT: Duration = Duration::from_secs(2); @@ -133,6 +140,18 @@ pub(crate) enum MacosServiceStartOutcome { Restarted, } +/// Summarizes the watch process state from launchd and Darc status facts. +#[cfg(any(target_os = "macos", test))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum MacosWatchProcessState { + Running, + Starting, + StaleLaunchdRunning, + StaleLaunchdStopped, + Stopped, + Unknown, +} + #[cfg(any(target_os = "macos", test))] impl MacosServiceStartOutcome { /// Returns the status text for `darc refresh --auto`. @@ -320,6 +339,7 @@ pub(crate) fn stop_macos_service(root: &Path) -> Result<()> { print_section(style, "Service"); print_field(style, 2, "Status", style.muted("not running")); } + mark_macos_service_stopped(root)?; remove_macos_runtime_plist(root)?; Ok(()) } @@ -353,12 +373,29 @@ pub(crate) fn print_macos_service_status(root: &Path) -> Result<()> { println!(); print_section(style, "Watch Status"); + print_field( + style, + 2, + "Refresh lock", + format_refresh_lock_snapshot(style, &inspect_refresh_lock(root)?), + ); let status_path = root.join("run/status.json"); if status_path.exists() { let content = fs::read_to_string(&status_path) .with_context(|| format!("failed to read {}", status_path.display()))?; let status: JsonValue = serde_json::from_str(&content).context("failed to parse watch status JSON")?; + let status_stale = macos_watch_status_age(&status_path)? + .is_some_and(|age| macos_watch_status_stale(&status, age)); + print_field( + style, + 2, + "Watch process", + format_macos_watch_process_state( + style, + macos_watch_process_state(running, Some(&status), status_stale), + ), + ); print_field(style, 2, "Status file", style.path(status_path.display())); print_field( style, @@ -416,6 +453,15 @@ pub(crate) fn print_macos_service_status(root: &Path) -> Result<()> { json_error_or_dash(style, &status["last_error"]), ); } else { + print_field( + style, + 2, + "Watch process", + format_macos_watch_process_state( + style, + macos_watch_process_state(running, None, false), + ), + ); print_field( style, 2, @@ -486,7 +532,7 @@ pub(crate) fn macos_runtime_plist_path(root: &Path) -> PathBuf { } /// Builds the LaunchAgent plist XML. -#[cfg(target_os = "macos")] +#[cfg(any(target_os = "macos", test))] pub(crate) fn macos_launch_agent_plist( root: &Path, executable: &Path, @@ -513,6 +559,13 @@ pub(crate) fn macos_launch_agent_plist( RunAtLoad <{run_at_load}/> + KeepAlive + + SuccessfulExit + + + ThrottleInterval + 30 StandardOutPath {stdout} StandardErrorPath @@ -691,7 +744,7 @@ pub(crate) fn current_uid() -> Result { } /// Escapes one value for XML text content. -#[cfg(target_os = "macos")] +#[cfg(any(target_os = "macos", test))] pub(crate) fn xml_escape(value: &str) -> String { value .replace('&', "&") @@ -701,6 +754,102 @@ pub(crate) fn xml_escape(value: &str) -> String { .replace('\'', "'") } +/// Marks the latest watch status stopped after an explicit service stop. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn mark_macos_service_stopped(root: &Path) -> Result<()> { + mark_watch_status_stopped(root) +} + +/// Resolves the watch process state from launchd and the latest status file. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn macos_watch_process_state( + launchd_running: bool, + status: Option<&JsonValue>, + status_stale: bool, +) -> MacosWatchProcessState { + let status_running = status + .and_then(|status| status.get("running")) + .and_then(JsonValue::as_bool); + match (launchd_running, status_running) { + (true, Some(true)) if status_stale => MacosWatchProcessState::StaleLaunchdRunning, + (true, Some(true)) => MacosWatchProcessState::Running, + (true, Some(false)) if status_stale => MacosWatchProcessState::StaleLaunchdRunning, + (true, Some(false)) => MacosWatchProcessState::Starting, + (false, Some(true)) => MacosWatchProcessState::StaleLaunchdStopped, + (false, Some(false)) => MacosWatchProcessState::Stopped, + (true, None) if status_stale => MacosWatchProcessState::StaleLaunchdRunning, + (true, None) => MacosWatchProcessState::Unknown, + (false, None) => MacosWatchProcessState::Stopped, + } +} + +/// Returns whether a watch status file is too old for its reconcile cadence. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn macos_watch_status_stale(status: &JsonValue, age: std::time::Duration) -> bool { + let interval = status + .get("reconcile_interval") + .and_then(JsonValue::as_str) + .and_then(|value| parse_duration(value).ok()) + .unwrap_or(DEFAULT_WATCH_RECONCILE_INTERVAL); + let stale_after = interval.checked_mul(2).unwrap_or(interval); + age > stale_after +} + +/// Returns how long ago the watch status file was updated. +#[cfg(target_os = "macos")] +pub(crate) fn macos_watch_status_age(status_path: &Path) -> Result> { + let modified = status_path + .metadata() + .with_context(|| format!("failed to stat {}", status_path.display()))? + .modified() + .with_context(|| format!("failed to read modified time for {}", status_path.display()))?; + Ok(SystemTime::now().duration_since(modified).ok()) +} + +/// Formats the watch process state for service status output. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn format_macos_watch_process_state( + style: HumanStyle, + state: MacosWatchProcessState, +) -> String { + match state { + MacosWatchProcessState::Running => style.ok("running"), + MacosWatchProcessState::Starting => style.warn("launchd running; status stopped"), + MacosWatchProcessState::StaleLaunchdRunning => style.warn("stale; launchd running"), + MacosWatchProcessState::StaleLaunchdStopped => style.warn("stale; launchd not running"), + MacosWatchProcessState::Stopped => style.muted("stopped"), + MacosWatchProcessState::Unknown => style.muted("unknown"), + } +} + +/// Formats the refresh lock state for service status output. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn format_refresh_lock_snapshot( + style: HumanStyle, + snapshot: &RefreshLockSnapshot, +) -> String { + match snapshot { + RefreshLockSnapshot::Missing => style.muted("none"), + RefreshLockSnapshot::Available { stale_info: None } => style.ok("available"), + RefreshLockSnapshot::Available { + stale_info: Some(info), + } => style.warn(format!( + "available; stale holder metadata: {}", + format_refresh_lock_holder(info) + )), + RefreshLockSnapshot::Held { holder: None } => style.warn("held"), + RefreshLockSnapshot::Held { holder: Some(info) } => { + style.warn(format!("held by {}", format_refresh_lock_holder(info))) + } + } +} + +/// Formats one refresh lock holder for diagnostics. +#[cfg(any(target_os = "macos", test))] +pub(crate) fn format_refresh_lock_holder(info: &RefreshLockInfo) -> String { + format!("pid {} since {}", info.pid, info.started_at) +} + /// Formats a boolean as a styled yes or no. #[cfg(target_os = "macos")] pub(crate) fn yes_no(style: HumanStyle, value: bool) -> String { diff --git a/crates/cli/src/tests/service_watch.rs b/crates/cli/src/tests/service_watch.rs index db05954..2a488ad 100644 --- a/crates/cli/src/tests/service_watch.rs +++ b/crates/cli/src/tests/service_watch.rs @@ -240,6 +240,18 @@ fn watched_refresh_runs_when_debounce_and_min_interval_are_ready() { ); } +#[test] +fn watch_change_resets_debounce_to_latest_event() { + let now = Instant::now(); + let mut state = super::WatchState::default(); + let mut dirty_since = Some(now - Duration::from_secs(120)); + + super::record_watch_change(&mut state, &mut dirty_since, now); + + assert_eq!(dirty_since, Some(now)); + assert!(state.last_event_at.is_some()); +} + #[test] fn reconcile_refresh_preempts_stale_dirty_event() { let settings = sample_watch_settings( @@ -457,6 +469,7 @@ fn write_watch_status_records_settings_and_refresh_state() -> Result<()> { Duration::from_secs(600), ); let state = super::WatchState { + watch_identity: None, last_event_at: Some("2026-04-30T04:00:00Z".to_owned()), last_refresh_reason: Some("change".to_owned()), last_refresh_started_at: Some("2026-04-30T04:00:30Z".to_owned()), @@ -472,6 +485,8 @@ fn write_watch_status_records_settings_and_refresh_state() -> Result<()> { assert_eq!(status["root"], root.display().to_string()); assert_eq!(status["mode"], "refresh-watch"); assert_eq!(status["running"], true); + assert!(status["watch_pid"].is_null()); + assert!(status["watch_token"].is_null()); assert_eq!(status["debounce"], "30s"); assert_eq!(status["min_interval"], "1m"); assert_eq!(status["reconcile_interval"], "10m"); @@ -506,7 +521,6 @@ fn write_watch_status_keeps_settings_optional_for_legacy_compatibility() -> Resu Ok(()) } -#[cfg(target_os = "macos")] #[test] fn macos_launch_agent_plist_uses_refresh_watch_all_and_escapes_paths() { let root = PathBuf::from("/tmp/darc & root"); @@ -523,10 +537,307 @@ fn macos_launch_agent_plist_uses_refresh_watch_all_and_escapes_paths() { assert!(plist.contains("--root")); assert!(plist.contains("/tmp/darc & root")); assert!(plist.contains("RunAtLoad\n ")); + assert!(plist.contains("KeepAlive")); + assert!(plist.contains("SuccessfulExit\n ")); + assert!(plist.contains("ThrottleInterval\n 30")); assert!(plist.contains("/tmp/darc & root/log/refresh-watch.out.log")); assert!(plist.contains("/tmp/darc & root/log/refresh-watch.err.log")); } +#[test] +fn watch_status_stop_marker_preserves_refresh_details() -> Result<()> { + let root = unique_test_dir("watch-status-stop"); + let settings = sample_watch_settings( + Duration::from_secs(30), + Duration::from_secs(60), + Duration::from_secs(600), + ); + let state = super::WatchState { + watch_identity: None, + last_event_at: Some("2026-04-30T04:00:00Z".to_owned()), + last_refresh_reason: Some("change".to_owned()), + last_refresh_started_at: Some("2026-04-30T04:00:30Z".to_owned()), + last_refresh_completed_at: Some("2026-04-30T04:00:31Z".to_owned()), + last_refresh_succeeded: Some(false), + last_error: Some("synthetic failure".to_owned()), + }; + + super::write_watch_status(&root, &state, true, "refresh-watch", Some(&settings))?; + super::mark_watch_status_stopped(&root)?; + let status: Value = serde_json::from_str(&fs::read_to_string(root.join("run/status.json"))?)?; + + assert_eq!(status["running"], false); + assert_eq!(status["last_refresh_reason"], "change"); + assert_eq!(status["last_error"], "synthetic failure"); + Ok(()) +} + +#[test] +fn watch_status_stop_marker_rewrites_malformed_status() -> Result<()> { + let root = unique_test_dir("watch-status-stop-malformed"); + let status_path = root.join("run/status.json"); + fs::create_dir_all(status_path.parent().unwrap())?; + write_file(&status_path, "not json")?; + + super::mark_watch_status_stopped(&root)?; + let status: Value = serde_json::from_str(&fs::read_to_string(status_path)?)?; + + assert_eq!(status["schema"], "darc.watch.status.v1"); + assert_eq!(status["root"], root.display().to_string()); + assert_eq!(status["mode"], "refresh-watch"); + assert_eq!(status["running"], false); + assert!(status["last_refresh_reason"].is_null()); + Ok(()) +} + +#[test] +fn watch_status_stop_marker_rewrites_non_object_status() -> Result<()> { + let root = unique_test_dir("watch-status-stop-non-object"); + let status_path = root.join("run/status.json"); + fs::create_dir_all(status_path.parent().unwrap())?; + write_file(&status_path, "[]")?; + + super::mark_watch_status_stopped(&root)?; + let status: Value = serde_json::from_str(&fs::read_to_string(status_path)?)?; + + assert_eq!(status["running"], false); + assert_eq!(status["root"], root.display().to_string()); + Ok(()) +} + +#[test] +fn watch_status_stop_marker_creates_missing_run_dir() -> Result<()> { + let root = unique_test_dir("watch-status-stop-missing-run"); + + super::mark_watch_status_stopped(&root)?; + let status_path = root.join("run/status.json"); + let status: Value = serde_json::from_str(&fs::read_to_string(status_path)?)?; + + assert_eq!(status["running"], false); + assert_eq!(status["root"], root.display().to_string()); + Ok(()) +} + +#[test] +fn watch_status_guard_stops_matching_watch_identity() -> Result<()> { + let root = unique_test_dir("watch-status-stop-matching-identity"); + let identity = super::WatchIdentity { + pid: 123, + token: "watch-a".to_owned(), + }; + let state = super::WatchState { + watch_identity: Some(identity.clone()), + last_refresh_reason: Some("change".to_owned()), + ..super::WatchState::default() + }; + + super::write_watch_status(&root, &state, true, "refresh-watch", None)?; + super::mark_watch_status_stopped_if_current(&root, &identity)?; + let status: Value = serde_json::from_str(&fs::read_to_string(root.join("run/status.json"))?)?; + + assert_eq!(status["running"], false); + assert_eq!(status["watch_pid"].as_u64(), Some(u64::from(identity.pid))); + assert_eq!( + status["watch_token"].as_str(), + Some(identity.token.as_str()) + ); + assert_eq!(status["last_refresh_reason"], "change"); + Ok(()) +} + +#[test] +fn watch_status_guard_does_not_stop_newer_watch_identity() -> Result<()> { + let root = unique_test_dir("watch-status-stop-newer-identity"); + let old_identity = super::WatchIdentity { + pid: 123, + token: "watch-old".to_owned(), + }; + let new_identity = super::WatchIdentity { + pid: 123, + token: "watch-new".to_owned(), + }; + let state = super::WatchState { + watch_identity: Some(new_identity.clone()), + last_refresh_reason: Some("reconcile".to_owned()), + ..super::WatchState::default() + }; + + super::write_watch_status(&root, &state, true, "refresh-watch", None)?; + super::mark_watch_status_stopped_if_current(&root, &old_identity)?; + let status: Value = serde_json::from_str(&fs::read_to_string(root.join("run/status.json"))?)?; + + assert_eq!(status["running"], true); + assert_eq!( + status["watch_token"].as_str(), + Some(new_identity.token.as_str()) + ); + assert_eq!(status["last_refresh_reason"], "reconcile"); + Ok(()) +} + +#[test] +fn refresh_lock_records_holder_metadata_and_clears_on_drop() -> Result<()> { + let root = unique_test_dir("refresh-lock-metadata"); + let lock_path = root.join("run/refresh.lock"); + + let lock = super::acquire_refresh_lock(&root)?; + let info = super::read_refresh_lock_info(&lock_path)?.unwrap(); + + assert_eq!(info.schema, super::REFRESH_LOCK_SCHEMA); + assert_eq!(info.pid, std::process::id()); + assert!(!info.started_at.is_empty()); + + drop(lock); + + assert!(fs::read_to_string(&lock_path)?.trim().is_empty()); + assert_eq!( + super::inspect_refresh_lock(&root)?, + super::RefreshLockSnapshot::Available { stale_info: None } + ); + Ok(()) +} + +#[test] +fn refresh_lock_inspection_reports_stale_metadata() -> Result<()> { + let root = unique_test_dir("refresh-lock-stale-metadata"); + let lock_path = root.join("run/refresh.lock"); + fs::create_dir_all(lock_path.parent().unwrap())?; + let stale_info = super::RefreshLockInfo { + schema: super::REFRESH_LOCK_SCHEMA.to_owned(), + pid: 42, + started_at: "2026-04-30T04:00:00Z".to_owned(), + }; + write_file(&lock_path, &serde_json::to_string_pretty(&stale_info)?)?; + + assert_eq!( + super::inspect_refresh_lock(&root)?, + super::RefreshLockSnapshot::Available { + stale_info: Some(stale_info) + } + ); + Ok(()) +} + +#[test] +fn service_status_helpers_flag_stale_watch_status() { + let running_status = serde_json::json!({ "running": true }); + let stopped_status = serde_json::json!({ "running": false }); + let style = super::HumanStyle::new(false, false, None); + + assert_eq!( + super::macos_watch_process_state(false, Some(&running_status), false), + super::MacosWatchProcessState::StaleLaunchdStopped + ); + assert_eq!( + super::macos_watch_process_state(true, Some(&running_status), false), + super::MacosWatchProcessState::Running + ); + assert_eq!( + super::macos_watch_process_state(true, Some(&stopped_status), false), + super::MacosWatchProcessState::Starting + ); + assert_eq!( + super::macos_watch_process_state(true, Some(&stopped_status), true), + super::MacosWatchProcessState::StaleLaunchdRunning + ); + assert_eq!( + super::macos_watch_process_state(true, Some(&running_status), true), + super::MacosWatchProcessState::StaleLaunchdRunning + ); + assert_eq!( + super::macos_watch_process_state(false, Some(&stopped_status), true), + super::MacosWatchProcessState::Stopped + ); + assert_eq!( + super::format_macos_watch_process_state( + style, + super::MacosWatchProcessState::StaleLaunchdRunning + ), + "stale; launchd running" + ); + assert_eq!( + super::format_macos_watch_process_state( + style, + super::MacosWatchProcessState::StaleLaunchdStopped + ), + "stale; launchd not running" + ); +} + +#[test] +fn service_status_helpers_flag_old_watch_status_as_stale() { + let status = serde_json::json!({ "running": true, "reconcile_interval": "10m" }); + + assert!(!super::macos_watch_status_stale( + &status, + Duration::from_secs(1_200) + )); + assert!(super::macos_watch_status_stale( + &status, + Duration::from_secs(1_201) + )); +} + +#[test] +fn service_stop_marker_marks_running_status_stopped() -> Result<()> { + let root = unique_test_dir("service-stop-status"); + let state = super::WatchState { + last_refresh_reason: Some("change".to_owned()), + last_refresh_succeeded: Some(true), + ..super::WatchState::default() + }; + + super::write_watch_status(&root, &state, true, "refresh-watch", None)?; + super::mark_macos_service_stopped(&root)?; + let status: Value = serde_json::from_str(&fs::read_to_string(root.join("run/status.json"))?)?; + + assert_eq!(status["running"], false); + assert_eq!(status["last_refresh_reason"], "change"); + assert_eq!( + super::macos_watch_process_state(false, Some(&status), false), + super::MacosWatchProcessState::Stopped + ); + Ok(()) +} + +#[test] +fn service_stop_marker_tolerates_malformed_status() -> Result<()> { + let root = unique_test_dir("service-stop-status-malformed"); + let status_path = root.join("run/status.json"); + fs::create_dir_all(status_path.parent().unwrap())?; + write_file(&status_path, "{")?; + + super::mark_macos_service_stopped(&root)?; + let status: Value = serde_json::from_str(&fs::read_to_string(status_path)?)?; + + assert_eq!(status["running"], false); + assert_eq!( + super::macos_watch_process_state(false, Some(&status), false), + super::MacosWatchProcessState::Stopped + ); + Ok(()) +} + +#[test] +fn service_status_helpers_format_stale_lock_metadata() { + let style = super::HumanStyle::new(false, false, None); + let info = super::RefreshLockInfo { + schema: super::REFRESH_LOCK_SCHEMA.to_owned(), + pid: 42, + started_at: "2026-04-30T04:00:00Z".to_owned(), + }; + + let formatted = super::format_refresh_lock_snapshot( + style, + &super::RefreshLockSnapshot::Available { + stale_info: Some(info), + }, + ); + + assert!(formatted.contains("stale holder metadata")); + assert!(formatted.contains("pid 42 since 2026-04-30T04:00:00Z")); +} + #[cfg(target_os = "macos")] #[test] fn write_macos_service_plist_creates_runtime_dirs_and_file() -> Result<()> { From f9acf4087fa35f5a2ebdc80d090d88f8dd76f92f Mon Sep 17 00:00:00 2001 From: Junha Park <0xjunha@gmail.com> Date: Mon, 11 May 2026 21:15:56 +0900 Subject: [PATCH 2/3] docs(cli): document auto-refresh hardening --- CHANGELOG.md | 1 + docs/service.md | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9531b73..0d261ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable Darc release changes should be summarized here. - Clarify README guidance for agent setup and prompt-driven prior-session investigations. - Show release dates in changelog version headings and have release preparation add them automatically. - Speed up regex search for queries with a required literal prefix. +- Harden auto-refresh restart, debounce, stale lock, and service status reporting. ## [0.1.4] - 2026-05-08 diff --git a/docs/service.md b/docs/service.md index 9e3d3d0..1f840b4 100644 --- a/docs/service.md +++ b/docs/service.md @@ -13,7 +13,8 @@ darc refresh --auto This is equivalent to `darc service enable` followed by `darc service start`: it enables auto-start on future logins and starts or restarts the background refresh service now. If auto-refresh is already running, Darc stops the existing -LaunchAgent and starts the updated one. +LaunchAgent and starts the updated one. The LaunchAgent also asks launchd to restart the watcher after a failed exit, +with launchd throttling repeated restarts. The foreground command is: @@ -21,8 +22,9 @@ The foreground command is: darc refresh --watch --all ``` -This is the process used by the background service. It watches configured Claude and Codex source roots, debounces file -events, periodically reconciles missed events, and runs the same refresh path as `darc refresh --all`. +This is the process used by the background service. It watches configured Claude and Codex source roots, waits for the +debounce quiet period after the latest file event, periodically reconciles missed events, and runs the same refresh path +as `darc refresh --all`. For Codex sessions, Darc reads Codex's own log files and matches sessions from recorded metadata. It does not probe arbitrary historical `cwd` directories from those logs during background refresh; older Codex logs without @@ -50,8 +52,9 @@ darc service disable LaunchAgent auto-start file. - `stop` unloads the LaunchAgent in the current login session without removing the auto-start file. - `restart` stops and starts the LaunchAgent. -- `status` reports whether the LaunchAgent file exists, whether launchd has it loaded, the active watch settings, and - the latest Darc watch status. +- `status` reports whether the LaunchAgent file exists, whether launchd has it loaded, active or stale refresh lock + metadata, whether the latest watch status still matches launchd, the active watch settings, and the latest Darc watch + status. - `disable` unloads the LaunchAgent and removes the auto-start file. Linux systemd user units and Windows service or Task Scheduler support are not implemented yet. @@ -90,3 +93,7 @@ Runtime files live under the Darc root: ~/.darc/log/refresh-watch.out.log ~/.darc/log/refresh-watch.err.log ``` + +`refresh.lock` contains holder metadata only while a refresh owns the advisory lock. A leftover lock file without an +active lock is treated as available; if it still contains valid holder metadata, `darc service status` reports that +metadata as stale. From deb297f4d727201acf08997bc1afce058ffc5c52 Mon Sep 17 00:00:00 2001 From: Junha Park <0xjunha@gmail.com> Date: Mon, 11 May 2026 21:47:34 +0900 Subject: [PATCH 3/3] fix(cli): avoid restarting bootstrapped refresh service --- CHANGELOG.md | 1 + crates/cli/src/service/macos.rs | 12 +++++++----- crates/cli/src/tests/service_watch.rs | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d261ed..f95307e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ All notable Darc release changes should be summarized here. - Show release dates in changelog version headings and have release preparation add them automatically. - Speed up regex search for queries with a required literal prefix. - Harden auto-refresh restart, debounce, stale lock, and service status reporting. +- Avoid killing freshly bootstrapped auto-refresh services during start. ## [0.1.4] - 2026-05-08 diff --git a/crates/cli/src/service/macos.rs b/crates/cli/src/service/macos.rs index 3e5db3a..f8e942f 100644 --- a/crates/cli/src/service/macos.rs +++ b/crates/cli/src/service/macos.rs @@ -253,10 +253,10 @@ pub(crate) fn start_macos_service(root: &Path) -> Result<()> { #[cfg(target_os = "macos")] pub(crate) fn start_macos_service_impl(root: &Path) -> Result { let launch_agent_path = macos_launch_agent_path()?; - let plist_path = if launch_agent_path.exists() { - launch_agent_path + let (plist_path, needs_kickstart) = if launch_agent_path.exists() { + (launch_agent_path, false) } else { - write_macos_runtime_plist(root)? + (write_macos_runtime_plist(root)?, true) }; let domain = macos_launch_domain()?; let target = macos_launch_target_for_domain(&domain); @@ -272,7 +272,9 @@ pub(crate) fn start_macos_service_impl(root: &Path) -> Result Vec /// Builds the launchctl command needed to kickstart the service target. #[cfg(any(target_os = "macos", test))] pub(crate) fn macos_service_kickstart_launchctl_args(target: &str) -> Vec { - vec!["kickstart".to_owned(), "-k".to_owned(), target.to_owned()] + vec!["kickstart".to_owned(), target.to_owned()] } /// Formats the foreground command used by the background refresh service. diff --git a/crates/cli/src/tests/service_watch.rs b/crates/cli/src/tests/service_watch.rs index 2a488ad..3c28387 100644 --- a/crates/cli/src/tests/service_watch.rs +++ b/crates/cli/src/tests/service_watch.rs @@ -125,7 +125,7 @@ fn macos_service_launchctl_args_cover_restart_sequence() { ); assert_eq!( super::macos_service_kickstart_launchctl_args(target), - vec!["kickstart".to_owned(), "-k".to_owned(), target.to_owned()] + vec!["kickstart".to_owned(), target.to_owned()] ); }