diff --git a/Cargo.toml b/Cargo.toml index 6649f616..4056d070 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,14 @@ repository = "https://github.com/console-rs/console" documentation = "https://docs.rs/console" readme = "README.md" rust-version = "1.71" -include = ["CHANGELOG.md", "Cargo.toml", "LICENSE", "README.md", "src/**/*.rs"] +include = [ + "CHANGELOG.md", + "Cargo.toml", + "LICENSE", + "README.md", + "src/**/*.rs", + "tests/data/sample_zellij_session.log", +] [features] default = ["unicode-width", "ansi-parsing", "std"] diff --git a/src/ansi.rs b/src/ansi.rs index 09239be4..467e6bd4 100644 --- a/src/ansi.rs +++ b/src/ansi.rs @@ -6,6 +6,142 @@ use core::{ str::CharIndices, }; +#[derive(Clone, Copy, Debug)] +struct OscSequence; + +impl EscSequence for OscSequence { + const START: char = ']'; + + fn on_escape(next: Option) -> EscAction { + if matches!(next, Some('\\')) { + EscAction { + consume_next: true, + end: true, + } + } else { + EscAction { + consume_next: false, + end: false, + } + } + } + + fn on_char(c: char) -> EscAction { + EscAction { + consume_next: false, + end: c == '\u{07}', + } + } +} + +#[derive(Clone, Copy, Debug)] +struct DcsSequence; + +impl EscSequence for DcsSequence { + const START: char = 'P'; + + fn on_escape(next: Option) -> EscAction { + match next { + Some('\\') => EscAction { + consume_next: true, + end: true, + }, + None => EscAction { + consume_next: false, + end: true, + }, + Some('\u{1b}') => EscAction { + consume_next: true, + end: false, + }, + _ => EscAction { + consume_next: false, + end: false, + }, + } + } + + fn on_char(_c: char) -> EscAction { + EscAction { + consume_next: false, + end: false, + } + } +} + +trait EscSequence { + fn on_escape(next: Option) -> EscAction; + fn on_char(c: char) -> EscAction; + const START: char; +} + +struct EscAction { + consume_next: bool, + end: bool, +} + +fn consume_end_exclusive( + it: &mut Peekable>, + start: usize, +) -> usize { + let mut end = start + 1; + let Some((idx, start_char)) = it.next() else { + return end; + }; + if start_char != S::START { + return end; + } + end = idx + 1; + + while let Some((idx, c)) = it.next() { + end = idx + c.len_utf8(); + match c { + '\u{9c}' => return end, + '\u{1b}' => { + let action = S::on_escape(it.peek().map(|(_, next)| *next)); + if action.consume_next { + if let Some((next_idx, _)) = it.peek() { + end = *next_idx + 1; + it.next(); + } + } + if action.end { + return end; + } + } + _ => {} + } + if S::on_char(c).end { + return end; + } + } + + end +} + +fn find_dfa_end_exclusive_after_entry(it: &mut Peekable>) -> Option { + let mut state = State::S1; + let mut maybe_end = None; + + loop { + let item = it.peek(); + + if let Some((idx, c)) = item { + state.transition(*c); + + if state.is_final() { + maybe_end = Some(*idx); + } + } + + if state.is_trapped() || item.is_none() { + return maybe_end.map(|end| end + 1); + } + + it.next(); + } +} + #[derive(Debug, Clone, Copy, Default)] enum State { #[default] @@ -143,42 +279,36 @@ impl FusedIterator for Matches<'_> {} fn find_ansi_code_exclusive(it: &mut Peekable) -> Option<(usize, usize)> { 'outer: loop { - if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? { - let start = *start; - let mut state = State::default(); - let mut maybe_end = None; - - loop { - let item = it.peek(); - - if let Some((idx, c)) = item { - state.transition(*c); - - if state.is_final() { - maybe_end = Some(*idx); + let (start, entry) = *it.peek()?; + match entry { + '\u{1b}' => { + it.next(); + match it.peek() { + Some((_, OscSequence::START)) => { + return Some((start, consume_end_exclusive::(it, start))) } - } - - // The match is greedy so run till we hit the trap state no matter what. A valid - // match is just one that was final at some point - if state.is_trapped() || item.is_none() { - match maybe_end { - Some(end) => { - // All possible final characters are a single byte so it's safe to make - // the end exclusive by just adding one - return Some((start, end + 1)); + Some((_, DcsSequence::START)) => { + return Some((start, consume_end_exclusive::(it, start))) + } + _ => { + if let Some(end) = find_dfa_end_exclusive_after_entry(it) { + return Some((start, end)); } - // The character we are peeking right now might be the start of a match so - // we want to continue the loop without popping off that char - None => continue 'outer, + continue 'outer; } } - + } + '\u{9b}' => { + it.next(); + if let Some(end) = find_dfa_end_exclusive_after_entry(it) { + return Some((start, end)); + } + continue 'outer; + } + _ => { it.next(); } } - - it.next(); } } @@ -296,12 +426,16 @@ mod tests { use proptest::prelude::*; use regex::Regex; - // The manual dfa `State` is a handwritten translation from the previously used regex. That - // regex is kept here and used to ensure that the new matches are the same as the old + // The manual dfa `State` is a handwritten translation of the following regex. static STRIP_ANSI_RE: Lazy = Lazy::new(|| { - Regex::new( - r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])", - ) + Regex::new(concat!( + r"(?s)(?:", + r"\x1b\].*?(?:\x07|\x9c|\x1b\\|\z)|", + r"\x1bP(?:[^\x1b\x9c]|\x1b\x1b|\x1b[^\x1b\\])*?(?:\x9c|\x1b\\|\x1b\z|\z)|", + r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?", + r"[0-9A-PRZcf-nqry=><])", + r")", + )) .unwrap() }); @@ -415,6 +549,28 @@ mod tests { } } + #[test] + fn strip_osc8_hyperlink_st() { + let s = "\x1b]8;;file:///tmp/test\x1b\\hello\x1b]8;;\x1b\\"; + assert_eq!(strip_ansi_codes(s).as_ref(), "hello"); + } + + #[test] + fn strip_osc8_hyperlink_bel() { + let s = "\x1b]8;;file:///tmp/test\x07hello\x1b]8;;\x07"; + assert_eq!(strip_ansi_codes(s).as_ref(), "hello"); + } + + #[test] + fn strip_tmux_passthrough_dcs() { + let open = "\x1bPtmux;\x1b\x1b\x1b]8;;file:///tmp/test\x1b\x1b\\\x1b\\"; + let close = "\x1bPtmux;\x1b\x1b\x1b]8;;\x1b\x1b\\\x1b\\"; + assert_eq!( + strip_ansi_codes(&format!("{open}hello{close}")).as_ref(), + "hello" + ); + } + #[test] fn test_ansi_iter_re_vt100() { let s = "\x1b(0lpq\x1b)Benglish";