Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/wardline/scanner/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ def _analyze_inner(self, files: Sequence[Path], config: WardlineConfig, *, root:
if self._cache is not None:
result = resolve_project_taints(
modules=modules,
provider_fingerprint=self._provider.fingerprint(),
provider_fingerprint=parse_stage.provider_fingerprint,
summary_cache=self._cache,
dirty_modules=frozenset(dirty_modules),
config=config,
)
else:
result = resolve_project_taints(
modules=modules,
provider_fingerprint=self._provider.fingerprint(),
provider_fingerprint=parse_stage.provider_fingerprint,
config=config,
)

Expand Down
26 changes: 23 additions & 3 deletions src/wardline/scanner/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections.abc import Sequence
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any, cast

from wardline.core.finding import Finding, Kind, Location, Severity
from wardline.core.qualname import module_dotted_name
Expand Down Expand Up @@ -59,6 +59,15 @@ class ParseProjectOutput:
files: list[ParsedFile]
parse_findings: list[Finding]
dirty_modules: frozenset[str]
provider_fingerprint: str


def _provider_fingerprint_for_project(provider: TaintSourceProvider, project_modules: frozenset[str]) -> str:
project_fingerprint = getattr(provider, "fingerprint_for_project", None)
if callable(project_fingerprint):
typed_project_fingerprint = cast(Any, project_fingerprint)
return str(typed_project_fingerprint(project_modules))
return provider.fingerprint()


def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutput:
Expand All @@ -68,6 +77,17 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu
parse_findings: list[Finding] = []
dirty_modules: set[str] = set()
root = stage_input.root.resolve()
project_modules = frozenset(
module
for path in stage_input.files
Comment on lines +80 to +82

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Detect excluded shadow packages before trusting decorators

Because project_modules is derived only from stage_input.files, a repo can hide a local wardline/decorators/__init__.py with an exclude pattern (or otherwise omit it from discovery) while app.py is still scanned. At runtime from wardline.decorators import trusted in app.py resolves to that local package, but the scanner never sees the shadow here and still anchors @trusted as Wardline's builtin marker, preserving the spoofing false-green this patch is intended to remove.

Useful? React with 👍 / 👎.

if (
module := module_dotted_name(
path.relative_to(root).as_posix() if path.is_relative_to(root) else path.as_posix()
)
Comment on lines +84 to +86

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Account for configured source roots when detecting shadows

When a project uses a non-src source root such as source_roots: ["lib"], a local lib/wardline/decorators/__init__.py is discovered as module lib.wardline.decorators here, so _project_shadows_wardline() returns false and from wardline.decorators import trusted in lib/app.py is still anchored as Wardline's real @trusted. That leaves the spoofing false-green this change is meant to close for any configured import root other than ./src.

Useful? React with 👍 / 👎.

)
is not None
)
provider_fingerprint = _provider_fingerprint_for_project(stage_input.provider, project_modules)

for path in stage_input.files:
relpath = path.relative_to(root).as_posix() if path.is_relative_to(root) else path.as_posix()
Expand All @@ -90,7 +110,6 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu
source = path.read_text(encoding="utf-8")
source_bytes = source.encode("utf-8")

provider_fingerprint = stage_input.provider.fingerprint()
from wardline.scanner.taint.project_resolver import _RESOLVER_VERSION
from wardline.scanner.taint.summary import SUMMARY_SCHEMA_VERSION, compute_cache_key

Expand All @@ -116,7 +135,7 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu
)
seeds = seed_function_taints(
entities,
ctx=SeedContext(module=module, alias_map=alias_map),
ctx=SeedContext(module=module, alias_map=alias_map, project_modules=project_modules),
provider=stage_input.provider,
)
for ent in entities:
Expand Down Expand Up @@ -205,6 +224,7 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu
files=parsed_files,
parse_findings=parse_findings,
dirty_modules=frozenset(dirty_modules),
provider_fingerprint=provider_fingerprint,
)


Expand Down
56 changes: 47 additions & 9 deletions src/wardline/scanner/taint/decorator_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from wardline.scanner.taint.provider import SeedContext

_VOCAB_PREFIX = "wardline.decorators"
_WARDLINE_ROOT = "wardline"
_TAINTSTATE_FQN = "wardline.core.taints.TaintState"


Expand Down Expand Up @@ -78,6 +79,26 @@ def _resolve_decorator_fqn(deco: ast.expr, alias_map: Mapping[str, str]) -> str
return _resolve_dotted_fqn(func, alias_map)


def _project_shadows_wardline(project_modules: frozenset[str]) -> bool:
"""Return whether the scan target defines a local ``wardline`` package/module.

Builtin Wardline decorator declarations must refer to the installed marker
package, not a module supplied by the scanned project. If the project itself
contains ``wardline`` or anything below it, Python import resolution can bind
``wardline.decorators`` to attacker-controlled code, so builtin matching fails
closed for the scan.
"""
return any(module == _WARDLINE_ROOT or module.startswith(_WARDLINE_ROOT + ".") for module in project_modules)


def _is_builtin_decorator_fqn(fqn: str, canonical_name: str, module_prefix: str) -> bool:
"""Return whether *fqn* is one of Wardline's exact builtin decorator exports."""
return fqn in {
f"{module_prefix}.{canonical_name}",
f"{module_prefix}.trust.{canonical_name}",
}


def _level_token(value: ast.expr, alias_map: Mapping[str, str]) -> str | None:
"""Extract a TaintState name token from a keyword-argument value node.

Expand Down Expand Up @@ -179,7 +200,7 @@ def taint_for(self, entity: Entity, ctx: SeedContext) -> SeedResult:
candidates: list[FunctionTaint] = []
unprovable: list[str] = []
for deco in entity.node.decorator_list:
ft, unprov = self._match(deco, ctx.alias_map)
ft, unprov = self._match(deco, ctx.alias_map, ctx.project_modules)
if ft is not None:
candidates.append(ft)
elif unprov is not None:
Expand Down Expand Up @@ -213,7 +234,21 @@ def fingerprint(self) -> str:
return f"decorator-vocab:{REGISTRY_VERSION}"
return f"decorator-vocab:{REGISTRY_VERSION}+grammar:{_grammar_digest(self._boundary_types)}"

def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[FunctionTaint | None, str | None]:
def fingerprint_for_project(self, project_modules: frozenset[str]) -> str:
"""Fingerprint declaration inputs that are external to a single module.

Builtin seeds depend on whether the scanned project shadows ``wardline``;
bind that fact into summary-cache keys so a warm cache cannot reuse trusted
summaries across shadowed and unshadowed scan roots.
"""
return f"{self.fingerprint()}:wardline-shadowed={int(_project_shadows_wardline(project_modules))}"

def _match(
self,
deco: ast.expr,
alias_map: Mapping[str, str],
project_modules: frozenset[str],
) -> tuple[FunctionTaint | None, str | None]:
"""Match one decorator against the loaded boundary types. Returns:

``(seed, None)`` — a boundary type matched and its levels proved;
Expand All @@ -225,15 +260,18 @@ def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[Function
fqn = _resolve_decorator_fqn(deco, alias_map)
if fqn is None:
return None, None
# A decorator matches a boundary type when its FQN is UNDER the type's module
# prefix and its final segment is the canonical name. This accepts BOTH the
# package re-export (``wardline.decorators.trusted``) and the submodule path
# (``wardline.decorators.trust.trusted``) — preserving the pre-Track-2 matcher
# exactly (it used the same prefix + last-segment rule), and generalizing it
# consistently for custom types.
# Builtin Wardline markers are security-sensitive defaults. Match only the
# exact public re-export or implementation-module export, and reject them
# when the scanned project itself defines ``wardline`` (which would shadow
# the real marker package under normal import resolution). Custom grammar
# markers keep the documented prefix + canonical-name matching behavior.
last = fqn.rsplit(".", 1)[-1]
wardline_shadowed = _project_shadows_wardline(project_modules)
for bt in self._boundary_types:
if last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."):
if bt.builtin:
if wardline_shadowed or not _is_builtin_decorator_fqn(fqn, bt.canonical_name, bt.module_prefix):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Require builtin decorators to be import-proven

In a scanned module that defines a local object named wardline (for example a class or namespace with decorators.trusted) and then uses @wardline.decorators.trusted, there is no project module shadow to trip wardline_shadowed, but _resolve_decorator_fqn() still returns the exact builtin-looking string and this branch anchors the function as trusted. The decorator root was never proven to come from an import of Wardline's package, so local symbol spoofing still produces the false-green this hardening is trying to prevent.

Useful? React with 👍 / 👎.

continue
elif last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."):
continue
levels: dict[str, TaintState] = {}
unreadable = False
Expand Down
8 changes: 5 additions & 3 deletions src/wardline/scanner/taint/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ class SeedContext:

``alias_map`` is the file's ``{local_name: fully_qualified_name}`` import
map (from ``build_import_alias_map``); a provider uses it to resolve aliased
decorator names against the trust vocabulary. Defaults to empty so callers
that do not seed from decorators (e.g. the trivial default provider's tests)
need not supply it.
decorator names against the trust vocabulary. ``project_modules`` contains the
modules discovered in the scanned project, allowing providers to fail closed
when a declaration package would be shadowed by project-local code. Defaults
keep callers that do not seed from decorators lightweight.
"""

module: str
alias_map: Mapping[str, str] = field(default_factory=dict)
project_modules: frozenset[str] = field(default_factory=frozenset)


@dataclass(frozen=True, slots=True)
Expand Down
32 changes: 30 additions & 2 deletions tests/unit/scanner/taint/test_decorator_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,17 @@
from wardline.scanner.taint.provider import FunctionTaint, SeedContext


def _seed(src: str, *, module: str = "m") -> dict[str, FunctionTaint | None]:
def _seed(
src: str,
*,
module: str = "m",
project_modules: frozenset[str] = frozenset(),
) -> dict[str, FunctionTaint | None]:
"""Run the provider over every function entity in *src*; map qualname -> result."""
tree = ast.parse(src)
alias_map = build_import_alias_map(tree, module_path=module)
entities = discover_file_entities(tree, module=module, path="m.py")
ctx = SeedContext(module=module, alias_map=alias_map)
ctx = SeedContext(module=module, alias_map=alias_map, project_modules=project_modules)
provider = DecoratorTaintSourceProvider()
# .taint: assertions here compare the declared FunctionTaint; the unprovable-
# boundary signal (Track 2 T2.4) is exercised separately in tests/grammar/.
Expand Down Expand Up @@ -233,3 +238,26 @@ def test_wardline_prefixed_but_unknown_decorator_is_no_opinion() -> None:
# (``wardline.decorators.bogus``) — canonical not in REGISTRY -> no opinion.
out = _seed("import wardline.decorators\n@wardline.decorators.bogus\ndef f():\n return 1\n")
assert out["m.f"] is None


def test_builtin_decorator_requires_exact_known_export() -> None:
# Prefix + final-component matching would accept this spoofable nested path.
# Builtin Wardline markers must be exact public/implementation exports.
out = _seed("from wardline.decorators import evil\n@evil.trusted\ndef f():\n return 1\n")
assert out["m.f"] is None


def test_builtin_decorator_fails_closed_when_project_shadows_wardline() -> None:
# A scanned project that defines wardline.decorators controls what this import
# means at runtime, so the default provider must not anchor it as Wardline's real
# marker package.
out = _seed(
"from wardline.decorators import trusted\n@trusted\ndef f():\n return 1\n",
project_modules=frozenset({"app", "wardline", "wardline.decorators"}),
)
assert out["m.f"] is None


def test_builtin_decorator_accepts_implementation_module_export() -> None:
out = _seed("from wardline.decorators.trust import trusted\n@trusted\ndef f():\n return 1\n")
assert out["m.f"] == FunctionTaint(T.INTEGRAL, T.INTEGRAL)
35 changes: 35 additions & 0 deletions tests/unit/scanner/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,38 @@ def test_parse_project_stage_returns_typed_modules_and_dirty_scope(tmp_path) ->
assert result.files[0].relpath == "m.py"
assert result.files[0].module == "m"
assert result.files[0].entities[0].qualname == "m.read_raw"


def test_parse_project_stage_fails_closed_for_shadowed_wardline_decorators(tmp_path) -> None:
app = tmp_path / "app.py"
app.write_text(
"from wardline.decorators import trusted\n"
"@trusted\n"
"def unsafe(p):\n"
" return p\n",
encoding="utf-8",
)
shadow_pkg = tmp_path / "wardline" / "decorators"
shadow_pkg.mkdir(parents=True)
(tmp_path / "wardline" / "__init__.py").write_text("", encoding="utf-8")
(shadow_pkg / "__init__.py").write_text(
"def trusted(fn):\n"
" return fn\n",
encoding="utf-8",
)

result = run_parse_project_stage(
ParseProjectInput(
files=(app, tmp_path / "wardline" / "__init__.py", shadow_pkg / "__init__.py"),
root=tmp_path,
provider=DecoratorTaintSourceProvider(),
config=WardlineConfig(),
star_exports=vocabulary_star_exports(),
)
)

app_module = next(module for module in result.modules if module.module_path == "app")
seed = app_module.seeds["app.unsafe"]
assert seed.source == "default"
assert seed.body_taint == T.UNKNOWN_RAW
assert "wardline-shadowed=1" in result.provider_fingerprint