Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTEXT.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ Origin prefixes exist because some agents (observed with GPT-class models in Cod

Two levels of fidelity for handling `Bash` proposals.

- **Tier 1** — *implemented today.* Static regex parsing of the shell command for redirections (`>`, `>>`), atomic-replace (`mv X.tmp X`), `cp`, `tee`, and `sed -i` targets. Sets a [change](#change) with a `bash_*` [origin prefix](#origin-prefix); does **not** open a [preview](#preview). The user sees the file was touched via the neo-tree [indicator](#indicator) but reviews the actual content via their normal diff workflow after the fact.
- **Tier 1** — *implemented today.* Static parsing of the shell command for redirections (`>`, `>>`), atomic-replace (`mv X.tmp X`), `cp`, `tee`, `sed -i`, and other in-place editors (`perl -i`, `ruby -i`, `gawk -i inplace`) targets — plus PowerShell equivalents on Windows (`Remove-Item`, `Set-Content`, `Out-File`, `Move-Item`, `Copy-Item`). Implemented in `pre_tool/shell_detect.lua`. Sets a [change](#change) with a `bash_*` [origin prefix](#origin-prefix); does **not** open a [preview](#preview). The user sees the file was touched via the neo-tree [indicator](#indicator) but reviews the actual content via their normal diff workflow after the fact.
- **Tier 2** — *not implemented.* Would compute and display real content diffs for shell-writes. Open design question; sandboxing was rejected (see [ADR-0001](docs/adr/0001-origin-prefixed-status-values.md)). The name exists so deferred work has a label, not a commitment.

## Source path / File path / Display path
Expand Down
122 changes: 122 additions & 0 deletions lua/code-preview/pre_tool/shell_detect.lua
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,127 @@ local function detect_sed_i(cmd)
return out
end

-- ── In-place file editors: perl / ruby / awk ─────────────────────
--
-- Like `sed -i`, these rewrite their trailing file(s) in place (Tier-1
-- indicator only, no diff). They get their own quote-aware path rather than a
-- redirect/`each_subcommand`-style scan because an in-place script routinely
-- contains `;` and `|` (`perl -pi -e 's/a/b/; s/c/d/'`) that the char-walk
-- scanners would mis-cut. We require the in-place flag so read-only one-liners
-- (`perl -ne 'print' f`, `awk '{print}' f`) are never flagged.

-- Quote-aware POSIX tokeniser. Single/double-quoted regions span whitespace and
-- separators and stay attached to their word, so a quoted script is one token.
-- Shell separators (; | || & &&) are emitted as their own tokens so the caller
-- can split into command segments without being fooled by quotes.
local function posix_tokenise(s)
local toks, i, n = {}, 1, #s
while i <= n do
local c = s:sub(i, i)
if c == "\n" or c == "\r" then
toks[#toks + 1] = ";"; i = i + 1 -- newline is a command separator
elseif c:match("%s") then
i = i + 1
elseif c == ";" then
toks[#toks + 1] = ";"; i = i + 1
elseif c == "|" then
if s:sub(i + 1, i + 1) == "|" then toks[#toks + 1] = "||"; i = i + 2
else toks[#toks + 1] = "|"; i = i + 1 end
elseif c == "&" then
if s:sub(i + 1, i + 1) == "&" then toks[#toks + 1] = "&&"; i = i + 2
else toks[#toks + 1] = "&"; i = i + 1 end
else
local start = i
while i <= n do
local ch = s:sub(i, i)
if ch == "'" or ch == '"' then
local q = ch; i = i + 1
while i <= n and s:sub(i, i) ~= q do i = i + 1 end
i = i + 1 -- past the closing quote (or end of string)
elseif ch:match("%s") or ch == ";" or ch == "|" or ch == "&" then
break
else
i = i + 1
end
end
toks[#toks + 1] = s:sub(start, i - 1)
end
end
return toks
end

local INPLACE_SEPARATORS = { [";"] = true, ["|"] = true, ["||"] = true, ["&"] = true, ["&&"] = true }

-- A perl/ruby in-place flag: `-i`, a switch cluster containing `i` (`-pi`,
-- `-0pi`, `-ni`, `-pie`), or `-i.bak`. Excludes `-M<module>` (module names may
-- contain an "i").
local function is_perl_inplace_flag(t)
if t:match("^%-M") then return false end
if t:match("^%-i%.%w+$") then return true end -- -i.bak
return t:match("^%-%w*i%w*$") ~= nil -- -i / -pi / -0pi / -pie
end

local function basename(t) return (t:gsub(".*[/\\]", "")) end

-- File targets for one separator-free command segment.
local function inplace_targets(seg)
local idx = 1
if seg[idx] == "sudo" then idx = idx + 1 end
local exe = basename(seg[idx] or "")

if exe == "perl" or exe == "ruby" then
-- The `-e`/`-E` switch may be bundled (`-pe`, `-pie`); detect any flag
-- cluster ending in e/E, and the in-place flag anywhere in the segment.
local has_inplace, script_idx = false, nil
for j = idx + 1, #seg do
local t = seg[j]
if not script_idx and t:match("^%-%w*[eE]$") and not t:match("^%-M") then
script_idx = j
end
if is_perl_inplace_flag(t) then has_inplace = true end
end
if not (has_inplace and script_idx) then return {} end
local files = {}
for j = script_idx + 2, #seg do -- skip the -e flag and its script
if not seg[j]:match("^%-") then files[#files + 1] = seg[j] end
end
return files

elseif exe == "awk" or exe == "gawk" then
-- gawk in-place mode is `-i inplace`; the first positional after it is the
-- awk program, the rest are files.
local inplace_at
for j = idx + 1, #seg - 1 do
if seg[j] == "-i" and seg[j + 1] == "inplace" then inplace_at = j + 1; break end
end
if not inplace_at then return {} end
local files, seen_program = {}, false
for j = inplace_at + 1, #seg do
local t = seg[j]
if t:match("^%-") then -- skip flags (-F, -v, …)
elseif not seen_program then seen_program = true -- the awk program
else files[#files + 1] = t end
end
return files
end
return {}
end

local function detect_inplace_edit(cmd)
local out, seg = {}, {}
local function flush()
if #seg > 0 then
for _, f in ipairs(inplace_targets(seg)) do out[#out + 1] = f end
end
seg = {}
end
for _, t in ipairs(posix_tokenise(cmd)) do
if INPLACE_SEPARATORS[t] then flush() else seg[#seg + 1] = t end
end
flush()
return out
end

-- ── PowerShell command grammar ───────────────────────────────────
--
-- PowerShell cmdlets are PascalCase Verb-Noun (`Remove-Item`) with aliases
Expand Down Expand Up @@ -580,6 +701,7 @@ function M.detect_write_paths(cmd, cwd)
for _, p in ipairs(detect_mv_cp(cmd)) do raw[#raw + 1] = p end
for _, p in ipairs(detect_tee(cmd)) do raw[#raw + 1] = p end
for _, p in ipairs(detect_sed_i(cmd)) do raw[#raw + 1] = p end
for _, p in ipairs(detect_inplace_edit(cmd)) do raw[#raw + 1] = p end
-- PowerShell write / move / copy targets (raw tokens).
for _, sub in ipairs(each_subcommand(cmd)) do
local _, ps_write = detect_ps(sub)
Expand Down
31 changes: 31 additions & 0 deletions tests/plugin/pre_tool_shell_detect_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,37 @@ describe("shell_detect.detect_write_paths (POSIX)", function()
end
end)

-- In-place editors (perl/ruby/awk) — write the trailing file(s) in place, like
-- sed -i. Require the in-place flag so read-only one-liners aren't flagged.
describe("shell_detect.detect_write_paths (in-place editors)", function()
local cases = {
-- The real codex sample: -0pi cluster, multi-statement substitution.
{ name = "perl -0pi real sample", cmd = [[perl -0pi -e 's/(<!-- a -->\n)/$1<!-- b -->\n/' README.md]], expect = { CWD .. "/README.md" } },
{ name = "perl -pi -e", cmd = "perl -pi -e 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } },
{ name = "perl -i.bak backup", cmd = "perl -i.bak -pe 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } },
{ name = "perl -pie bundled e", cmd = "perl -pie 's/a/b/' foo.txt", expect = { CWD .. "/foo.txt" } },
-- `;` inside the single-quoted script must not split the command.
{ name = "perl multi-statement", cmd = "perl -pi -e 's/a/b/; s/c/d/' foo.txt", expect = { CWD .. "/foo.txt" } },
{ name = "perl multiple files", cmd = "perl -pi -e 's/a/b/' a.txt b.txt", expect = { CWD .. "/a.txt", CWD .. "/b.txt" } },
{ name = "perl absolute target", cmd = "perl -pi -e 's/a/b/' /etc/hosts", expect = { "/etc/hosts" } },
{ name = "sudo perl", cmd = "sudo perl -pi -e 's/a/b/' /etc/hosts", expect = { "/etc/hosts" } },
-- Segment splitting: the cd is a separate command; only perl writes.
{ name = "perl after && chain", cmd = "cd sub && perl -pi -e 's/a/b/' f.txt", expect = { CWD .. "/f.txt" } },
{ name = "ruby -i -pe", cmd = [[ruby -i -pe 'gsub(/a/,"b")' foo.txt]], expect = { CWD .. "/foo.txt" } },
{ name = "gawk -i inplace", cmd = "gawk -i inplace '{print}' data.txt", expect = { CWD .. "/data.txt" } },
-- Negatives: no in-place flag → read-only → nothing flagged.
{ name = "perl read-only -ne", cmd = "perl -ne 'print' foo.txt", expect = {} },
{ name = "awk read-only", cmd = "awk '{print}' data.txt", expect = {} },
{ name = "perl -e no file", cmd = "perl -e 'print 1'", expect = {} },
}
for _, c in ipairs(cases) do
it(c.name, function()
if IS_WIN then return pending("POSIX path semantics: Unix-only") end
assert.are.same(sorted(c.expect), sorted(shell_detect.detect_write_paths(c.cmd, CWD)))
end)
end
end)

describe("shell_detect.detect combined (POSIX)", function()
it("returns both rm and write paths", function()
if IS_WIN then return pending("POSIX path semantics: Unix-only") end
Expand Down
Loading