diff --git a/CONTEXT.md b/CONTEXT.md index abcc53e..e19ac7a 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -117,7 +117,7 @@ Origin prefixes exist because some agents (observed with GPT-class models in Cod Two levels of fidelity for handling `Bash` proposals. -- **Tier 1** — *implemented today.* Static regex parsing of the shell command for redirections (`>`, `>>`), atomic-replace (`mv X.tmp X`), `cp`, `tee`, and `sed -i` targets. Sets a [change](#change) with a `bash_*` [origin prefix](#origin-prefix); does **not** open a [preview](#preview). The user sees the file was touched via the neo-tree [indicator](#indicator) but reviews the actual content via their normal diff workflow after the fact. +- **Tier 1** — *implemented today.* Static parsing of the shell command for redirections (`>`, `>>`), atomic-replace (`mv X.tmp X`), `cp`, `tee`, `sed -i`, and other in-place editors (`perl -i`, `ruby -i`, `gawk -i inplace`) targets — plus PowerShell equivalents on Windows (`Remove-Item`, `Set-Content`, `Out-File`, `Move-Item`, `Copy-Item`). Implemented in `pre_tool/shell_detect.lua`. Sets a [change](#change) with a `bash_*` [origin prefix](#origin-prefix); does **not** open a [preview](#preview). The user sees the file was touched via the neo-tree [indicator](#indicator) but reviews the actual content via their normal diff workflow after the fact. - **Tier 2** — *not implemented.* Would compute and display real content diffs for shell-writes. Open design question; sandboxing was rejected (see [ADR-0001](docs/adr/0001-origin-prefixed-status-values.md)). The name exists so deferred work has a label, not a commitment. ## Source path / File path / Display path diff --git a/lua/code-preview/pre_tool/shell_detect.lua b/lua/code-preview/pre_tool/shell_detect.lua index 838f227..31676b7 100644 --- a/lua/code-preview/pre_tool/shell_detect.lua +++ b/lua/code-preview/pre_tool/shell_detect.lua @@ -323,6 +323,127 @@ local function detect_sed_i(cmd) return out end +-- ── In-place file editors: perl / ruby / awk ───────────────────── +-- +-- Like `sed -i`, these rewrite their trailing file(s) in place (Tier-1 +-- indicator only, no diff). They get their own quote-aware path rather than a +-- redirect/`each_subcommand`-style scan because an in-place script routinely +-- contains `;` and `|` (`perl -pi -e 's/a/b/; s/c/d/'`) that the char-walk +-- scanners would mis-cut. We require the in-place flag so read-only one-liners +-- (`perl -ne 'print' f`, `awk '{print}' f`) are never flagged. + +-- Quote-aware POSIX tokeniser. Single/double-quoted regions span whitespace and +-- separators and stay attached to their word, so a quoted script is one token. +-- Shell separators (; | || & &&) are emitted as their own tokens so the caller +-- can split into command segments without being fooled by quotes. +local function posix_tokenise(s) + local toks, i, n = {}, 1, #s + while i <= n do + local c = s:sub(i, i) + if c == "\n" or c == "\r" then + toks[#toks + 1] = ";"; i = i + 1 -- newline is a command separator + elseif c:match("%s") then + i = i + 1 + elseif c == ";" then + toks[#toks + 1] = ";"; i = i + 1 + elseif c == "|" then + if s:sub(i + 1, i + 1) == "|" then toks[#toks + 1] = "||"; i = i + 2 + else toks[#toks + 1] = "|"; i = i + 1 end + elseif c == "&" then + if s:sub(i + 1, i + 1) == "&" then toks[#toks + 1] = "&&"; i = i + 2 + else toks[#toks + 1] = "&"; i = i + 1 end + else + local start = i + while i <= n do + local ch = s:sub(i, i) + if ch == "'" or ch == '"' then + local q = ch; i = i + 1 + while i <= n and s:sub(i, i) ~= q do i = i + 1 end + i = i + 1 -- past the closing quote (or end of string) + elseif ch:match("%s") or ch == ";" or ch == "|" or ch == "&" then + break + else + i = i + 1 + end + end + toks[#toks + 1] = s:sub(start, i - 1) + end + end + return toks +end + +local INPLACE_SEPARATORS = { [";"] = true, ["|"] = true, ["||"] = true, ["&"] = true, ["&&"] = true } + +-- A perl/ruby in-place flag: `-i`, a switch cluster containing `i` (`-pi`, +-- `-0pi`, `-ni`, `-pie`), or `-i.bak`. Excludes `-M` (module names may +-- contain an "i"). +local function is_perl_inplace_flag(t) + if t:match("^%-M") then return false end + if t:match("^%-i%.%w+$") then return true end -- -i.bak + return t:match("^%-%w*i%w*$") ~= nil -- -i / -pi / -0pi / -pie +end + +local function basename(t) return (t:gsub(".*[/\\]", "")) end + +-- File targets for one separator-free command segment. +local function inplace_targets(seg) + local idx = 1 + if seg[idx] == "sudo" then idx = idx + 1 end + local exe = basename(seg[idx] or "") + + if exe == "perl" or exe == "ruby" then + -- The `-e`/`-E` switch may be bundled (`-pe`, `-pie`); detect any flag + -- cluster ending in e/E, and the in-place flag anywhere in the segment. + local has_inplace, script_idx = false, nil + for j = idx + 1, #seg do + local t = seg[j] + if not script_idx and t:match("^%-%w*[eE]$") and not t:match("^%-M") then + script_idx = j + end + if is_perl_inplace_flag(t) then has_inplace = true end + end + if not (has_inplace and script_idx) then return {} end + local files = {} + for j = script_idx + 2, #seg do -- skip the -e flag and its script + if not seg[j]:match("^%-") then files[#files + 1] = seg[j] end + end + return files + + elseif exe == "awk" or exe == "gawk" then + -- gawk in-place mode is `-i inplace`; the first positional after it is the + -- awk program, the rest are files. + local inplace_at + for j = idx + 1, #seg - 1 do + if seg[j] == "-i" and seg[j + 1] == "inplace" then inplace_at = j + 1; break end + end + if not inplace_at then return {} end + local files, seen_program = {}, false + for j = inplace_at + 1, #seg do + local t = seg[j] + if t:match("^%-") then -- skip flags (-F, -v, …) + elseif not seen_program then seen_program = true -- the awk program + else files[#files + 1] = t end + end + return files + end + return {} +end + +local function detect_inplace_edit(cmd) + local out, seg = {}, {} + local function flush() + if #seg > 0 then + for _, f in ipairs(inplace_targets(seg)) do out[#out + 1] = f end + end + seg = {} + end + for _, t in ipairs(posix_tokenise(cmd)) do + if INPLACE_SEPARATORS[t] then flush() else seg[#seg + 1] = t end + end + flush() + return out +end + -- ── PowerShell command grammar ─────────────────────────────────── -- -- PowerShell cmdlets are PascalCase Verb-Noun (`Remove-Item`) with aliases @@ -580,6 +701,7 @@ function M.detect_write_paths(cmd, cwd) for _, p in ipairs(detect_mv_cp(cmd)) do raw[#raw + 1] = p end for _, p in ipairs(detect_tee(cmd)) do raw[#raw + 1] = p end for _, p in ipairs(detect_sed_i(cmd)) do raw[#raw + 1] = p end + for _, p in ipairs(detect_inplace_edit(cmd)) do raw[#raw + 1] = p end -- PowerShell write / move / copy targets (raw tokens). for _, sub in ipairs(each_subcommand(cmd)) do local _, ps_write = detect_ps(sub) diff --git a/tests/plugin/pre_tool_shell_detect_spec.lua b/tests/plugin/pre_tool_shell_detect_spec.lua index a14f9c1..21063e6 100644 --- a/tests/plugin/pre_tool_shell_detect_spec.lua +++ b/tests/plugin/pre_tool_shell_detect_spec.lua @@ -89,6 +89,37 @@ describe("shell_detect.detect_write_paths (POSIX)", function() end end) +-- In-place editors (perl/ruby/awk) — write the trailing file(s) in place, like +-- sed -i. Require the in-place flag so read-only one-liners aren't flagged. +describe("shell_detect.detect_write_paths (in-place editors)", function() + local cases = { + -- The real codex sample: -0pi cluster, multi-statement substitution. + { name = "perl -0pi real sample", cmd = [[perl -0pi -e 's/(\n)/$1\n/' README.md]], expect = { CWD .. "/README.md" } }, + { name = "perl -pi -e", cmd = "perl -pi -e 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } }, + { name = "perl -i.bak backup", cmd = "perl -i.bak -pe 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } }, + { name = "perl -pie bundled e", cmd = "perl -pie 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } }, + -- `;` inside the single-quoted script must not split the command. + { name = "perl multi-statement", cmd = "perl -pi -e 's/a/b/; s/c/d/' foo.txt", expect = { CWD .. "/foo.txt" } }, + { name = "perl multiple files", cmd = "perl -pi -e 's/a/b/' a.txt b.txt", expect = { CWD .. "/a.txt", CWD .. "/b.txt" } }, + { name = "perl absolute target", cmd = "perl -pi -e 's/a/b/' /etc/hosts", expect = { "/etc/hosts" } }, + { name = "sudo perl", cmd = "sudo perl -pi -e 's/a/b/' /etc/hosts", expect = { "/etc/hosts" } }, + -- Segment splitting: the cd is a separate command; only perl writes. + { name = "perl after && chain", cmd = "cd sub && perl -pi -e 's/a/b/' f.txt", expect = { CWD .. "/f.txt" } }, + { name = "ruby -i -pe", cmd = [[ruby -i -pe 'gsub(/a/,"b")' foo.txt]], expect = { CWD .. "/foo.txt" } }, + { name = "gawk -i inplace", cmd = "gawk -i inplace '{print}' data.txt", expect = { CWD .. "/data.txt" } }, + -- Negatives: no in-place flag → read-only → nothing flagged. + { name = "perl read-only -ne", cmd = "perl -ne 'print' foo.txt", expect = {} }, + { name = "awk read-only", cmd = "awk '{print}' data.txt", expect = {} }, + { name = "perl -e no file", cmd = "perl -e 'print 1'", expect = {} }, + } + for _, c in ipairs(cases) do + it(c.name, function() + if IS_WIN then return pending("POSIX path semantics: Unix-only") end + assert.are.same(sorted(c.expect), sorted(shell_detect.detect_write_paths(c.cmd, CWD))) + end) + end +end) + describe("shell_detect.detect combined (POSIX)", function() it("returns both rm and write paths", function() if IS_WIN then return pending("POSIX path semantics: Unix-only") end