From 9bfb5e735c5d2f8f06772f4d1434f9bcfa253d44 Mon Sep 17 00:00:00 2001 From: jamestexas <18285880+jamestexas@users.noreply.github.com> Date: Tue, 19 May 2026 21:26:55 -0600 Subject: [PATCH] security(smell-rules): validate ScopeColumn at load time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External smell rules from $MACHE_SMELL_RULES_DIR are appended to the registry and their ScopeColumn value is interpolated unescaped into runSmellRule's SQL (`"AND " + rule.ScopeColumn + " = ?"`). The trust boundary is operator-controlled, so this isn't a vulnerability — but the cost of a load-time whitelist is one regex-shaped check and the value is "a typo or malicious external rule can't smuggle a `;` terminator, `--` line comment, or unexpected characters into the SQL composition path." Whitelist mirrors the character set the built-in ScopeColumn values actually use (identifiers, `.`, `,`, `(`, `)`, `'`, space) — proven by TestValidateScopeColumn_AcceptsBuiltinShapes which iterates the registry. Rejection coverage in TestValidateScopeColumn_RejectsInjectionShapes spans `;`, `--`, `/*`, `*`, `=`, backtick, double-quote, newline. End-to-end TestLoadExternalSmellRules_RejectsInjectableScopeColumn proves a malicious JSON rule never reaches runSmellRule. --- cmd/serve_find_smells_load.go | 42 ++++++++++++++ cmd/serve_find_smells_load_test.go | 88 ++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/cmd/serve_find_smells_load.go b/cmd/serve_find_smells_load.go index 143a5de2..210aab88 100644 --- a/cmd/serve_find_smells_load.go +++ b/cmd/serve_find_smells_load.go @@ -124,6 +124,17 @@ func LoadExternalSmellRules(dir string) ([]SmellRule, error) { // runSmellRule splices the scope clause via fmt.Sprintf(query, scope). // An unescaped `%f` would be treated as the float verb and produce // `%!f(string=...)` corruption at runtime. Reject at load time. +// +// ScopeColumn is interpolated unescaped into the SQL (runSmellRule +// builds `"AND " + rule.ScopeColumn + " = ?"`), so an external rule +// could in principle break out of the scope clause with `;`, `--`, +// or unexpected characters. The trust boundary here is whoever +// controls $MACHE_SMELL_RULES_DIR (operator), so this is defense in +// depth rather than a vulnerability — but the cost is one regex-shaped +// check, and the value is "a typo in an external rule can't silently +// corrupt the query." The whitelist mirrors what the built-in +// ScopeColumn values use (identifiers, dots, COALESCE/NULLIF call +// shape, comma, paren, single quote, space). func validateSmellRule(r SmellRule) error { if strings.TrimSpace(r.ID) == "" { return fmt.Errorf("rule ID is required") @@ -140,5 +151,36 @@ func validateSmellRule(r SmellRule) error { if strings.Contains(formatted, "%!") { return fmt.Errorf("rule %q: Query has unescaped '%%' chars (other than the single '%%s' placeholder); SQL '%%' must be escaped as '%%%%' (e.g. LIKE '%%%%foo%%%%')", r.ID) } + if err := validateScopeColumn(r.ScopeColumn); err != nil { + return fmt.Errorf("rule %q: %w", r.ID, err) + } + return nil +} + +// validateScopeColumn rejects ScopeColumn values that contain SQL +// statement terminators, comment sequences, or characters outside +// the small set the built-in registry uses. Empty is allowed (the +// rule opts out of source_id scoping). See validateSmellRule's +// comment for the threat model. +func validateScopeColumn(s string) error { + if s == "" { + return nil + } + if strings.Contains(s, ";") { + return fmt.Errorf("ScopeColumn must not contain ';' (statement terminator)") + } + if strings.Contains(s, "--") { + return fmt.Errorf("ScopeColumn must not contain '--' (SQL line comment)") + } + for _, r := range s { + switch { + case 'a' <= r && r <= 'z': + case 'A' <= r && r <= 'Z': + case '0' <= r && r <= '9': + case r == '_' || r == '.' || r == ',' || r == '(' || r == ')' || r == '\'' || r == ' ': + default: + return fmt.Errorf("ScopeColumn contains disallowed character %q; allowed: identifiers, '.', ',', '(', ')', \"'\", and space", r) + } + } return nil } diff --git a/cmd/serve_find_smells_load_test.go b/cmd/serve_find_smells_load_test.go index 2ac58c2e..f4c19bd5 100644 --- a/cmd/serve_find_smells_load_test.go +++ b/cmd/serve_find_smells_load_test.go @@ -210,6 +210,94 @@ func TestLoadExternalSmellRules_FilePassedAsDirRejected(t *testing.T) { assert.Contains(t, err.Error(), "not a directory") } +// TestValidateScopeColumn_AcceptsBuiltinShapes pins that every +// ScopeColumn value used by built-in rules passes the load-time +// validator. If a future contributor tightens the whitelist in a way +// that breaks a real shape, this catches it before the loader starts +// rejecting legitimate external rules that copy the built-in pattern. +func TestValidateScopeColumn_AcceptsBuiltinShapes(t *testing.T) { + for _, r := range smellRegistry { + if r.ScopeColumn == "" { + continue + } + t.Run(r.ID, func(t *testing.T) { + require.NoError(t, validateScopeColumn(r.ScopeColumn), + "built-in ScopeColumn %q must pass validation", r.ScopeColumn) + }) + } +} + +// TestValidateScopeColumn_RejectsInjectionShapes asserts the +// security hardening: even though the trust boundary for external +// rules is "operator controls the rules dir", a malicious or buggy +// rule file cannot smuggle a `;`-terminated second statement, a +// `--` line comment, or any character outside the small whitelist +// that the built-in registry actually uses. +func TestValidateScopeColumn_RejectsInjectionShapes(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"semicolon terminator", "n.source_file; DROP TABLE nodes", "statement terminator"}, + {"line comment", "n.source_file --", "line comment"}, + {"slash-star block comment", "n.source_file /* foo */", "disallowed character"}, + {"asterisk wildcard", "n.*", "disallowed character"}, + {"equals comparator", "n.source_file = 'x'", "disallowed character"}, + {"backtick", "`n.source_file`", "disallowed character"}, + {"double quote identifier", `"n.source_file"`, "disallowed character"}, + {"newline injection", "n.source_file\nDROP TABLE", "disallowed character"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + err := validateScopeColumn(c.in) + require.Error(t, err) + assert.Contains(t, err.Error(), c.want) + }) + } +} + +// TestLoadExternalSmellRules_RejectsInjectableScopeColumn is the +// end-to-end version: a JSON file whose ScopeColumn carries a `;` +// must not load. Closes the loop on the user's "ensure we are +// making our queries safe / not making them injectable" directive +// for the only surface where external (non-source-tree) input +// reaches runSmellRule's SQL composition path. +func TestLoadExternalSmellRules_RejectsInjectableScopeColumn(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "evil.json"), []byte(`{ + "ID": "evil_scope", + "Description": "Tries to escape the scope clause", + "Query": "SELECT 0,0,0,0,0,0,0 FROM nodes %s", + "ScopeColumn": "n.source_file; DROP TABLE nodes; --" + }`), 0o644)) + + _, err := LoadExternalSmellRules(dir) + require.Error(t, err) + assert.Contains(t, err.Error(), "ScopeColumn") + assert.Contains(t, err.Error(), "statement terminator") +} + +// TestLoadExternalSmellRules_AcceptsBuiltinShapedScopeColumn pins +// that a rule using the same ScopeColumn shapes the built-ins use +// (dotted identifiers, COALESCE/NULLIF) loads cleanly. Without +// this, the hardening could quietly grow stricter than the +// built-ins and lock out reasonable external rules. +func TestLoadExternalSmellRules_AcceptsBuiltinShapedScopeColumn(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "coalesce.json"), []byte(`{ + "ID": "external_coalesce_scope", + "Description": "Uses the same COALESCE shape several built-ins use", + "Query": "SELECT 0,0,0,0,0,0,0 FROM nodes n %s", + "ScopeColumn": "COALESCE(NULLIF(n.source_file, ''), '')" + }`), 0o644)) + + rules, err := LoadExternalSmellRules(dir) + require.NoError(t, err) + require.Len(t, rules, 1) + assert.Equal(t, "external_coalesce_scope", rules[0].ID) +} + // snapshotSmellRegistry captures the current registry so a test // that mutates it via appendExternalRulesFromEnv can restore the // original state. The package-level smellRegistry is the source