From 7d9957e3c6dba46c98faf848c4b504ddd3343b48 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Thu, 25 Jun 2026 07:39:24 -0400 Subject: [PATCH 1/3] feat(analyzer): add phase-1 structured skill summaries Signed-off-by: Rod Boev --- src/skillspector/multi_skill.py | 8 +- src/skillspector/nodes/analyzers/__init__.py | 5 + .../nodes/analyzers/structured_skill_roles.py | 75 ++++++ src/skillspector/nodes/build_context.py | 9 +- src/skillspector/state.py | 2 + src/skillspector/structured_skill.py | 250 ++++++++++++++++++ tests/nodes/analyzers/test_registry.py | 1 + .../analyzers/test_structured_skill_roles.py | 95 +++++++ tests/nodes/test_build_context.py | 67 +++++ tests/test_multi_skill.py | 85 ++++++ tests/unit/test_cli.py | 32 +++ 11 files changed, 627 insertions(+), 2 deletions(-) create mode 100644 src/skillspector/nodes/analyzers/structured_skill_roles.py create mode 100644 src/skillspector/structured_skill.py create mode 100644 tests/nodes/analyzers/test_structured_skill_roles.py diff --git a/src/skillspector/multi_skill.py b/src/skillspector/multi_skill.py index be4c7eb..327f0b9 100644 --- a/src/skillspector/multi_skill.py +++ b/src/skillspector/multi_skill.py @@ -26,6 +26,7 @@ from pathlib import Path from skillspector.logging_config import get_logger +from skillspector.structured_skill import extract_structured_skill_context logger = get_logger(__name__) @@ -73,7 +74,7 @@ def detect_skills(directory: Path) -> MultiSkillDetectionResult: continue if child.name.startswith("."): continue - if _has_skill_md(child): + if _has_skill_md(child) or _is_structured_skill_bundle(child): name = _extract_skill_name(child) skills.append( SkillDirectory( @@ -91,6 +92,11 @@ def detect_skills(directory: Path) -> MultiSkillDetectionResult: ) +def _is_structured_skill_bundle(child_dir: Path) -> bool: + """Return true when a child directory contains a valid AISOP/AISP bundle.""" + return extract_structured_skill_context(child_dir) is not None + + def _has_skill_md(directory: Path) -> bool: """Check if directory contains a SKILL.md or skill.md at root level.""" return (directory / "SKILL.md").is_file() or (directory / "skill.md").is_file() diff --git a/src/skillspector/nodes/analyzers/__init__.py b/src/skillspector/nodes/analyzers/__init__.py index b2ef9bc..db89da2 100644 --- a/src/skillspector/nodes/analyzers/__init__.py +++ b/src/skillspector/nodes/analyzers/__init__.py @@ -76,6 +76,9 @@ node as static_patterns_tool_misuse_node, ) from skillspector.nodes.analyzers.static_yara import node as static_yara_node +from skillspector.nodes.analyzers.structured_skill_roles import ( + node as structured_skill_roles_node, +) ANALYZER_NODE_IDS: list[str] = [ "static_patterns_prompt_injection", @@ -98,6 +101,7 @@ "mcp_least_privilege", "mcp_tool_poisoning", "mcp_rug_pull", + "structured_skill_roles", "semantic_security_discovery", "semantic_developer_intent", "semantic_quality_policy", @@ -124,6 +128,7 @@ "mcp_least_privilege": mcp_least_privilege_node, "mcp_tool_poisoning": mcp_tool_poisoning_node, "mcp_rug_pull": mcp_rug_pull_node, + "structured_skill_roles": structured_skill_roles_node, "semantic_security_discovery": semantic_security_discovery_node, "semantic_developer_intent": semantic_developer_intent_node, "semantic_quality_policy": semantic_quality_policy_node, diff --git a/src/skillspector/nodes/analyzers/structured_skill_roles.py b/src/skillspector/nodes/analyzers/structured_skill_roles.py new file mode 100644 index 0000000..33f80c3 --- /dev/null +++ b/src/skillspector/nodes/analyzers/structured_skill_roles.py @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Structured skill role summary analyzer (SSR-*).""" + +from __future__ import annotations + +from skillspector.models import Finding +from skillspector.state import AnalyzerNodeResponse, SkillspectorState + +ANALYZER_ID = "structured_skill_roles" + + +def _build_finding(context: dict[str, object]) -> Finding: + """Build a single SSR-1 finding from validated structured-skill context.""" + protocol = str(context.get("protocol", "AISOP/AISP")) + layout_kind = str(context.get("layout_kind", "structured")) + bundle_path = str(context.get("bundle_path", "")) + tools = context.get("declared_tools") or [] + tools_text = ", ".join(sorted(str(t) for t in tools)) if isinstance(tools, list) else "" + if not tools_text: + tools_text = "(no declared tools)" + + workflow_nodes = context.get("workflow_nodes") or [] + workflow_text = ", ".join(str(n) for n in workflow_nodes) if workflow_nodes else "(none)" + + constraints = context.get("constraint_anchors") or [] + constraints_text = ", ".join(str(c) for c in constraints) if constraints else "(none)" + + resources = context.get("resource_anchors") or [] + resources_text = ", ".join(str(r) for r in resources) if resources else "(none)" + + return Finding( + rule_id="SSR-1", + message=f"Structured {layout_kind} bundle detected ({protocol})", + severity="LOW", + confidence=1.0, + file=bundle_path, + tags=["AISOP", "AISP", "structured-skill"], + context=( + "Detected structured AISOP/AISP workflow for scan context. " + f"declared_tools=[{tools_text}], workflow_nodes=[{workflow_text}], " + f"constraints=[{constraints_text}], resources=[{resources_text}]" + ), + matched_text=( + f"layout_kind={layout_kind}, protocol={protocol}, " + f"bundle={bundle_path}, declared_tools={tools_text}" + ), + explanation=( + "This scan target appears to define a structured AISOP/AISP workflow. " + "The detector found a valid two-message AISOP/AISP contract and summarized " + "workflow roles, declared tool set, constraints, and resource anchors." + ), + ) + + +def node(state: SkillspectorState) -> AnalyzerNodeResponse: + """Emit one LOW SSR-1 summary finding when structured context is present.""" + context = state.get("structured_skill_context") + if not isinstance(context, dict): + return {"findings": []} + + return {"findings": [_build_finding(context)]} diff --git a/src/skillspector/nodes/build_context.py b/src/skillspector/nodes/build_context.py index a367092..d441e78 100644 --- a/src/skillspector/nodes/build_context.py +++ b/src/skillspector/nodes/build_context.py @@ -29,6 +29,7 @@ from skillspector.constants import MODEL_CONFIG from skillspector.logging_config import get_logger from skillspector.state import SkillspectorState +from skillspector.structured_skill import extract_structured_skill_context logger = get_logger(__name__) @@ -231,8 +232,9 @@ def build_context(state: SkillspectorState) -> dict[str, object]: file_cache = _read_file_cache(skill_dir, components) manifest = _parse_manifest(skill_dir) component_metadata, has_executable_scripts = _build_component_metadata(skill_dir, components) + structured_skill_context = extract_structured_skill_context(skill_dir) - return { + result = { "components": components, "file_cache": file_cache, "ast_cache": {}, @@ -242,3 +244,8 @@ def build_context(state: SkillspectorState) -> dict[str, object]: "component_metadata": component_metadata, "has_executable_scripts": has_executable_scripts, } + + if structured_skill_context is not None: + result["structured_skill_context"] = structured_skill_context + + return result diff --git a/src/skillspector/state.py b/src/skillspector/state.py index 20c3063..779a347 100644 --- a/src/skillspector/state.py +++ b/src/skillspector/state.py @@ -61,6 +61,8 @@ class SkillspectorState(TypedDict, total=False): # Component metadata for reporting and risk scoring (from build_context) component_metadata: list[dict[str, object]] has_executable_scripts: bool + # Structured workflow context for phase-1 AISOP/AISP summaries + structured_skill_context: dict[str, object] # Output: report node writes formatted string here output_format: str diff --git a/src/skillspector/structured_skill.py b/src/skillspector/structured_skill.py new file mode 100644 index 0000000..f94ac22 --- /dev/null +++ b/src/skillspector/structured_skill.py @@ -0,0 +1,250 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Structured AISOP/AISP bundle detection helpers.""" + +from __future__ import annotations + +import json +from pathlib import Path + +_SKIP_DIRS = frozenset( + {".git", "__pycache__", "node_modules", ".venv", "venv", ".tox", ".pytest_cache"} +) + +_AISOP_PROTOCOL_PREFIXES = ("AISOP V", "AISP V") + + +def extract_structured_skill_context(skill_dir: Path) -> dict[str, object] | None: + """Return structured-skill context for the first valid bundle under *skill_dir*.""" + if not skill_dir.is_dir(): + return None + + for path in _iter_aisop_files(skill_dir): + context = _parse_bundle_path(path) + if context is not None: + return context + + return None + + +def _iter_aisop_files(skill_dir: Path) -> list[Path]: + """Yield candidate *.aisop.json files under a directory, skipping noisy paths.""" + files: list[Path] = [] + for path in sorted(skill_dir.rglob("*.aisop.json")): + if any(part in _SKIP_DIRS for part in path.parts): + continue + if any(part.startswith(".") and part != ".aisop" for part in path.parts[:-1]): + # Keep hidden metadata directories out of structured-skill detection. + continue + if path.is_file(): + files.append(path) + return files + + +def _parse_bundle_path(bundle_path: Path) -> dict[str, object] | None: + """Parse and validate one AISOP/AISP bundle path.""" + try: + data = json.loads(bundle_path.read_text(encoding="utf-8", errors="replace")) + except (OSError, json.JSONDecodeError): + return None + return _parse_bundle_payload(bundle_path, data) + + +def _parse_bundle_payload(bundle_path: Path, payload: object) -> dict[str, object] | None: + """Parse the minimal phase-1 AISOP/AISP payload contract.""" + if not isinstance(payload, list) or len(payload) != 2: + return None + + system_msg = _normalize_mapping(payload[0]) + user_msg = _normalize_mapping(payload[1]) + if system_msg is None or user_msg is None: + return None + + system_content = _normalize_mapping(system_msg.get("content")) + if system_content is None: + return None + + protocol = system_content.get("protocol") + if not isinstance(protocol, str) or not protocol.startswith(_AISOP_PROTOCOL_PREFIXES): + return None + if system_msg.get("role") != "system": + return None + + user_content = user_msg.get("content") + contract = _find_contract_payload(user_content) + if contract is None: + return None + + if user_msg.get("role") != "user": + return None + + layout_kind = protocol.split()[0] + declared_tools = _first_non_empty( + ( + system_content.get("declared_tools"), + system_content.get("tools"), + contract.get("declared_tools"), + contract.get("tools"), + ) + ) + functions = contract.get("functions") + function_names = _extract_function_names(functions) + constraint_anchors = _extract_constraint_anchors(functions) + resource_anchors = _extract_resource_anchors(contract.get("resources")) + + return { + "layout_kind": layout_kind, + "format": system_content.get("format", layout_kind), + "protocol": protocol, + "bundle_path": str(bundle_path.resolve()), + "declared_tools": declared_tools, + "workflow_nodes": function_names, + "constraint_anchors": constraint_anchors, + "resource_anchors": resource_anchors, + } + + +def _normalize_mapping(value: object) -> dict[str, object] | None: + """Return a dict if *value* is a mapping object.""" + return value if isinstance(value, dict) else None + + +def _find_contract_payload(content: object) -> dict[str, object] | None: + """Locate the AISOP/AISP contract payload in a user message.""" + container = _normalize_mapping(content) + if container is None: + return None + + for key in ("aisop", "aisp_contract"): + value = container.get(key) + if isinstance(value, dict): + return value + return None + + +def _first_non_empty(values: tuple[object, ...]) -> list[str]: + """Return a stable deduplicated string list from candidate values.""" + result: list[str] = [] + seen = set[str]() + for value in values: + if not isinstance(value, list): + continue + for item in value: + if not isinstance(item, str): + continue + normalized = item.strip() + if not normalized or normalized in seen: + continue + seen.add(normalized) + result.append(normalized) + return result + + +def _extract_function_names(functions: object) -> list[str]: + """Extract function names from a dictionary/list of workflow nodes.""" + names: list[str] = [] + seen: set[str] = set() + + if isinstance(functions, dict): + items = functions.items() + for name, node in items: + if isinstance(name, str): + n = name.strip() + if n and n not in seen: + seen.add(n) + names.append(n) + if isinstance(node, dict): + names.extend(_extract_function_names(node.get("functions"))) + elif isinstance(functions, list): + for item in functions: + if not isinstance(item, dict): + continue + node_name = item.get("name") + if isinstance(node_name, str): + n = node_name.strip() + if n and n not in seen: + seen.add(n) + names.append(n) + names.extend(_extract_function_names(item.get("functions"))) + + return names + + +def _extract_constraint_anchors(functions: object) -> list[str]: + """Extract anchors from content.functions.*.constraints.""" + anchors: list[str] = [] + seen: set[str] = set() + + def _walk(nodes: object) -> None: + if isinstance(nodes, dict): + for maybe_node in nodes.values(): + if isinstance(maybe_node, dict): + constraints = maybe_node.get("constraints") + if isinstance(constraints, list): + for constraint in constraints: + if not isinstance(constraint, dict): + continue + anchor = constraint.get("anchor") + if isinstance(anchor, str): + a = anchor.strip() + if a and a not in seen: + seen.add(a) + anchors.append(a) + _walk(maybe_node.get("functions")) + elif isinstance(maybe_node, list): + _walk(maybe_node) + elif isinstance(nodes, list): + for item in nodes: + if isinstance(item, dict): + _walk(item) + + _walk(functions) + return anchors + + +def _extract_resource_anchors(resources: object) -> list[str]: + """Extract resource path anchors from content.aisp_contract.resources.""" + paths: list[str] = [] + seen: set[str] = set() + + def _collect(path: str) -> None: + p = path.strip() + if p and p not in seen: + seen.add(p) + paths.append(p) + + def _walk(value: object) -> None: + if isinstance(value, dict): + for val in value.values(): + if isinstance(val, dict): + resource_path = val.get("path") + if isinstance(resource_path, str): + _collect(resource_path) + _walk(val.get("resources")) + elif isinstance(val, str): + _collect(val) + elif isinstance(value, list): + for item in value: + if isinstance(item, str): + _collect(item) + elif isinstance(item, dict): + resource_path = item.get("path") + if isinstance(resource_path, str): + _collect(resource_path) + _walk(item.get("resources")) + + _walk(resources) + return paths diff --git a/tests/nodes/analyzers/test_registry.py b/tests/nodes/analyzers/test_registry.py index d3c79bf..3c2b3fe 100644 --- a/tests/nodes/analyzers/test_registry.py +++ b/tests/nodes/analyzers/test_registry.py @@ -42,6 +42,7 @@ "mcp_least_privilege", "mcp_tool_poisoning", "mcp_rug_pull", + "structured_skill_roles", "semantic_security_discovery", "semantic_developer_intent", "semantic_quality_policy", diff --git a/tests/nodes/analyzers/test_structured_skill_roles.py b/tests/nodes/analyzers/test_structured_skill_roles.py new file mode 100644 index 0000000..da4d84a --- /dev/null +++ b/tests/nodes/analyzers/test_structured_skill_roles.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the structured_skill_roles analyzer.""" + +from __future__ import annotations + +from pathlib import Path + +from skillspector.nodes.analyzers import structured_skill_roles as module +from skillspector.structured_skill import extract_structured_skill_context + + +def _write_aisop_bundle(path: Path) -> None: + path.write_text( + """ +[ + { + "role": "system", + "content": { + "protocol": "AISOP V1", + "format": "workflow" + } + }, + { + "role": "user", + "content": { + "aisop": { + "declared_tools": ["search", "calendar"], + "functions": { + "lookup": {"constraints": [{"anchor": "query"}]} + } + } + } + } +] +""", + encoding="utf-8", + ) + + +def test_no_structured_context_returns_no_findings() -> None: + """A skill without structured-skill context yields no SSR-1 findings.""" + assert module.node({})["findings"] == [] + + +def test_structured_bundle_emits_single_low_ssr1(tmp_path: Path) -> None: + """Valid structured bundle context produces one LOW SSR-1 finding.""" + path = tmp_path / "bundle.aisop.json" + _write_aisop_bundle(path) + context = extract_structured_skill_context(tmp_path) + assert context is not None + + result = module.node({"structured_skill_context": context}) + assert len(result["findings"]) == 1 + + finding = result["findings"][0] + assert finding.rule_id == "SSR-1" + assert finding.severity == "LOW" + assert finding.file == str(path.resolve()) + assert finding.matched_text is not None + assert finding.context is not None + assert "declared_tools" in finding.context + + +def test_malformed_context_does_not_raise_no_findings(tmp_path: Path) -> None: + """Malformed bundle parsing failure surfaces as no structured context and no finding.""" + (tmp_path / "bundle.aisop.json").write_text("{bad", encoding="utf-8") + context = extract_structured_skill_context(tmp_path) + assert context is None + + result = module.node({"structured_skill_context": context}) + assert result["findings"] == [] + + +def test_analyzer_does_not_require_llm_credentials(tmp_path: Path) -> None: + """Structured-skill analyzer is static and works without any LLM credentials.""" + path = tmp_path / "bundle.aisop.json" + _write_aisop_bundle(path) + context = extract_structured_skill_context(tmp_path) + assert context is not None + result = module.node({"structured_skill_context": context}) + assert result["findings"][0].rule_id == "SSR-1" diff --git a/tests/nodes/test_build_context.py b/tests/nodes/test_build_context.py index 26edee1..61cbffa 100644 --- a/tests/nodes/test_build_context.py +++ b/tests/nodes/test_build_context.py @@ -20,6 +20,7 @@ from __future__ import annotations +import json from pathlib import Path import pytest @@ -208,3 +209,69 @@ def test_build_context_parses_parameters_from_frontmatter(tmp_path: Path) -> Non assert result["manifest"]["parameters"] == [ {"name": "path", "description": "file path to read"} ] + + +def _write_aisop_bundle(path: Path) -> None: + """Write a valid minimal AISOP/AISP bundle file.""" + bundle = [ + { + "role": "system", + "content": { + "protocol": "AISP V1", + "format": "contract", + }, + }, + { + "role": "user", + "content": { + "aisp_contract": { + "functions": {"inbox": {"constraints": [{"anchor": "inbox.message"}]}}, + "resources": { + "state": {"path": "resources/state.json"}, + }, + "declared_tools": ["mail", "search"], + } + }, + }, + ] + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(bundle), encoding="utf-8") + + +def test_build_context_populates_structured_skill_context(tmp_path: Path) -> None: + """Valid AISOP/AISP bundle yields structured_skill_context metadata in scan context.""" + _write_aisop_bundle(tmp_path / "workflow.aisop.json") + state: SkillspectorState = {"skill_path": str(tmp_path)} + result = build_context(state) + + assert "structured_skill_context" in result + context = result["structured_skill_context"] + assert isinstance(context, dict) + assert context["protocol"] == "AISP V1" + assert context["layout_kind"] == "AISP" + assert context["format"] == "contract" + assert context["bundle_path"] == str((tmp_path / "workflow.aisop.json").resolve()) + assert context["workflow_nodes"] == ["inbox"] + assert context["constraint_anchors"] == ["inbox.message"] + assert context["resource_anchors"] == ["resources/state.json"] + assert context["declared_tools"] == ["mail", "search"] + + +def test_build_context_manifest_may_be_empty_when_only_structured(tmp_path: Path) -> None: + """A structured bundle can populate context while manifest stays empty.""" + _write_aisop_bundle(tmp_path / "workflow.aisop.json") + state: SkillspectorState = {"skill_path": str(tmp_path)} + result = build_context(state) + assert result["manifest"] == {} + assert "structured_skill_context" in result + + +def test_build_context_structured_context_absent_for_malformed_bundle(tmp_path: Path) -> None: + """Malformed AISOP/AISP JSON leaves structured_skill_context unset.""" + (tmp_path / "bad.aisop.json").write_text( + json.dumps([{"role": "system", "content": {"protocol": "AISOP V1"}}, {}]), + encoding="utf-8", + ) + state: SkillspectorState = {"skill_path": str(tmp_path)} + result = build_context(state) + assert "structured_skill_context" not in result diff --git a/tests/test_multi_skill.py b/tests/test_multi_skill.py index 3c1b634..5a9ac91 100644 --- a/tests/test_multi_skill.py +++ b/tests/test_multi_skill.py @@ -17,6 +17,7 @@ from __future__ import annotations +import json from pathlib import Path import pytest @@ -65,6 +66,38 @@ def nested_with_root(tmp_path: Path) -> Path: return tmp_path +def _write_aisop_bundle(path: Path) -> None: + """Write a valid minimal AISOP/AISP bundle file.""" + bundle = [ + { + "role": "system", + "content": { + "protocol": "AISOP V1", + "format": "AIModal", + }, + }, + { + "role": "user", + "content": { + "aisop": { + "declared_tools": ["search", "calendar"], + "functions": { + "lookup": {"constraints": [{"anchor": "query"}]}, + "schedule": {"constraints": [{"anchor": "time"}]}, + }, + }, + "aisp_contract": { + "resources": { + "calendar": {"path": "resources/calendar.json"}, + "memory": {"path": "resources/memory.md"}, + } + }, + }, + }, + ] + path.write_text(json.dumps(bundle), encoding="utf-8") + + class TestDetectSkills: """Tests for detect_skills().""" @@ -81,6 +114,58 @@ def test_skill_names_extracted_from_frontmatter(self, multi_skill_dir: Path) -> names = {s.name for s in result.skills} assert names == {"weather-lookup", "email-sender", "file-manager"} + def test_structured_skill_subdir_detected(self, tmp_path: Path) -> None: + """An immediate subdirectory with a valid AISOP/AISP bundle is detected.""" + sub = tmp_path / "workflow-bundle" + sub.mkdir() + _write_aisop_bundle(sub / "workflow.aisop.json") + result = detect_skills(tmp_path) + assert result.is_multi_skill is False + assert result.has_root_skill is False + assert len(result.skills) == 1 + assert result.skills[0].name == "workflow-bundle" + assert result.skills[0].path == sub + + def test_single_structured_child_not_multi(self, tmp_path: Path) -> None: + """One structured subdirectory should not force multi-skill mode.""" + sub = tmp_path / "only-structured" + sub.mkdir() + _write_aisop_bundle(sub / "workflow.aisop.json") + result = detect_skills(tmp_path) + assert result.is_multi_skill is False + assert len(result.skills) == 1 + + def test_structured_bundle_ignored_when_partial(self, tmp_path: Path) -> None: + """Malformed AISOP/AISP JSON does not count as a structured child skill.""" + malformed = tmp_path / "bad-bundle" + malformed.mkdir() + (malformed / "workflow.aisop.json").write_text( + json.dumps( + [ + { + "role": "system", + "content": {"protocol": "AISOP V1"}, + }, + {"content": {"functions": []}}, + ] + ), + encoding="utf-8", + ) + result = detect_skills(tmp_path) + assert result.is_multi_skill is False + assert len(result.skills) == 0 + + def test_root_skill_still_overrides_structured_nested(self, tmp_path: Path) -> None: + """A root SKILL.md still forces single-skill mode with nested structured bundles.""" + (tmp_path / "SKILL.md").write_text("---\nname: root-skill\n---\n# Root\n", encoding="utf-8") + nested = tmp_path / "nested-structured" + nested.mkdir() + _write_aisop_bundle(nested / "workflow.aisop.json") + result = detect_skills(tmp_path) + assert result.is_multi_skill is False + assert result.has_root_skill is True + assert len(result.skills) == 0 + def test_single_skill_not_multi(self, single_skill_dir: Path) -> None: """Directory with root SKILL.md is not multi-skill.""" result = detect_skills(single_skill_dir) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index b8c8823..78b738d 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -113,3 +113,35 @@ def test_cli_baseline_generate_then_scan_round_trip(tmp_path: Path) -> None: data = json.loads(scan.output) assert data["issues"] == [] assert data["risk_assessment"]["score"] == 0 + + +def test_cli_scan_structured_skill_aisop_no_llm_produces_ssr1_issue(tmp_path: Path) -> None: + """--no-llm JSON scan reports SSR-1 when a valid AISOP/AISP bundle is present.""" + (tmp_path / "workflow.aisop.json").write_text( + """ +[ + { + "role": "system", + "content": { + "protocol": "AISOP V1", + "format": "workflow" + } + }, + { + "role": "user", + "content": { + "aisop": { + "functions": { + "lookup": {"constraints": [{"anchor": "query"}]} + } + } + } + } +] +""", + encoding="utf-8", + ) + result = runner.invoke(app, ["scan", str(tmp_path), "--format", "json", "--no-llm"]) + assert result.exit_code == 0 + data = json.loads(result.output) + assert any(issue["id"] == "SSR-1" for issue in data["issues"]) From 96f7b501cff019a55ef224bb20aadc8bb10188c3 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Thu, 25 Jun 2026 07:44:26 -0400 Subject: [PATCH 2/3] test(analyzer): keep structured workflow summaries stable Signed-off-by: Rod Boev --- src/skillspector/structured_skill.py | 9 +++++---- tests/nodes/test_build_context.py | 29 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/skillspector/structured_skill.py b/src/skillspector/structured_skill.py index f94ac22..825e0a1 100644 --- a/src/skillspector/structured_skill.py +++ b/src/skillspector/structured_skill.py @@ -153,10 +153,11 @@ def _first_non_empty(values: tuple[object, ...]) -> list[str]: return result -def _extract_function_names(functions: object) -> list[str]: +def _extract_function_names(functions: object, seen: set[str] | None = None) -> list[str]: """Extract function names from a dictionary/list of workflow nodes.""" names: list[str] = [] - seen: set[str] = set() + if seen is None: + seen = set() if isinstance(functions, dict): items = functions.items() @@ -167,7 +168,7 @@ def _extract_function_names(functions: object) -> list[str]: seen.add(n) names.append(n) if isinstance(node, dict): - names.extend(_extract_function_names(node.get("functions"))) + names.extend(_extract_function_names(node.get("functions"), seen)) elif isinstance(functions, list): for item in functions: if not isinstance(item, dict): @@ -178,7 +179,7 @@ def _extract_function_names(functions: object) -> list[str]: if n and n not in seen: seen.add(n) names.append(n) - names.extend(_extract_function_names(item.get("functions"))) + names.extend(_extract_function_names(item.get("functions"), seen)) return names diff --git a/tests/nodes/test_build_context.py b/tests/nodes/test_build_context.py index 61cbffa..9074e40 100644 --- a/tests/nodes/test_build_context.py +++ b/tests/nodes/test_build_context.py @@ -275,3 +275,32 @@ def test_build_context_structured_context_absent_for_malformed_bundle(tmp_path: state: SkillspectorState = {"skill_path": str(tmp_path)} result = build_context(state) assert "structured_skill_context" not in result + + +def test_build_context_deduplicates_nested_workflow_names(tmp_path: Path) -> None: + """Nested function names stay unique in structured_skill_context.""" + bundle = [ + { + "role": "system", + "content": { + "protocol": "AISOP V1", + "format": "workflow", + }, + }, + { + "role": "user", + "content": { + "aisop": { + "functions": { + "lookup": { + "functions": {"lookup": {"constraints": [{"anchor": "nested.query"}]}} + } + } + } + }, + }, + ] + (tmp_path / "nested.aisop.json").write_text(json.dumps(bundle), encoding="utf-8") + result = build_context({"skill_path": str(tmp_path)}) + context = result["structured_skill_context"] + assert context["workflow_nodes"] == ["lookup"] From 6efdb559f3f605fa608bf5f1ee7c7524a92ed0a5 Mon Sep 17 00:00:00 2001 From: Rod Boev Date: Thu, 25 Jun 2026 07:55:53 -0400 Subject: [PATCH 3/3] fix(analyzer): parse issue-shaped AISOP metadata --- src/skillspector/structured_skill.py | 72 +++++++++++-------- .../analyzers/test_structured_skill_roles.py | 11 +-- tests/nodes/test_build_context.py | 21 +++--- tests/test_multi_skill.py | 11 ++- tests/unit/test_cli.py | 7 +- 5 files changed, 71 insertions(+), 51 deletions(-) diff --git a/src/skillspector/structured_skill.py b/src/skillspector/structured_skill.py index 825e0a1..d157050 100644 --- a/src/skillspector/structured_skill.py +++ b/src/skillspector/structured_skill.py @@ -83,27 +83,44 @@ def _parse_bundle_payload(bundle_path: Path, payload: object) -> dict[str, objec if system_msg.get("role") != "system": return None - user_content = user_msg.get("content") - contract = _find_contract_payload(user_content) - if contract is None: + user_content = _normalize_mapping(user_msg.get("content")) + if user_content is None: return None if user_msg.get("role") != "user": return None + aisop_payload = _normalize_mapping(user_content.get("aisop")) + aisp_contract = _normalize_mapping(user_content.get("aisp_contract")) + if aisop_payload is None and aisp_contract is None: + return None + layout_kind = protocol.split()[0] declared_tools = _first_non_empty( ( system_content.get("declared_tools"), system_content.get("tools"), - contract.get("declared_tools"), - contract.get("tools"), + user_content.get("declared_tools"), + user_content.get("tools"), + aisop_payload.get("declared_tools") if aisop_payload else None, + aisop_payload.get("tools") if aisop_payload else None, + aisp_contract.get("declared_tools") if aisp_contract else None, + aisp_contract.get("tools") if aisp_contract else None, ) ) - functions = contract.get("functions") + functions = user_content.get("functions") + if functions is None and aisop_payload is not None: + functions = aisop_payload.get("functions") + if functions is None and aisp_contract is not None: + functions = aisp_contract.get("functions") function_names = _extract_function_names(functions) constraint_anchors = _extract_constraint_anchors(functions) - resource_anchors = _extract_resource_anchors(contract.get("resources")) + resource_anchors = _extract_resource_anchors( + aisp_contract.get("resources") if aisp_contract is not None else None + ) + + if not function_names and not resource_anchors: + return None return { "layout_kind": layout_kind, @@ -122,19 +139,6 @@ def _normalize_mapping(value: object) -> dict[str, object] | None: return value if isinstance(value, dict) else None -def _find_contract_payload(content: object) -> dict[str, object] | None: - """Locate the AISOP/AISP contract payload in a user message.""" - container = _normalize_mapping(content) - if container is None: - return None - - for key in ("aisop", "aisp_contract"): - value = container.get(key) - if isinstance(value, dict): - return value - return None - - def _first_non_empty(values: tuple[object, ...]) -> list[str]: """Return a stable deduplicated string list from candidate values.""" result: list[str] = [] @@ -189,6 +193,19 @@ def _extract_constraint_anchors(functions: object) -> list[str]: anchors: list[str] = [] seen: set[str] = set() + def _collect(constraint: object) -> None: + if isinstance(constraint, str): + anchor = constraint.strip() + elif isinstance(constraint, dict): + raw_anchor = constraint.get("anchor") + anchor = raw_anchor.strip() if isinstance(raw_anchor, str) else "" + else: + anchor = "" + + if anchor and anchor not in seen: + seen.add(anchor) + anchors.append(anchor) + def _walk(nodes: object) -> None: if isinstance(nodes, dict): for maybe_node in nodes.values(): @@ -196,21 +213,18 @@ def _walk(nodes: object) -> None: constraints = maybe_node.get("constraints") if isinstance(constraints, list): for constraint in constraints: - if not isinstance(constraint, dict): - continue - anchor = constraint.get("anchor") - if isinstance(anchor, str): - a = anchor.strip() - if a and a not in seen: - seen.add(a) - anchors.append(a) + _collect(constraint) _walk(maybe_node.get("functions")) elif isinstance(maybe_node, list): _walk(maybe_node) elif isinstance(nodes, list): for item in nodes: if isinstance(item, dict): - _walk(item) + constraints = item.get("constraints") + if isinstance(constraints, list): + for constraint in constraints: + _collect(constraint) + _walk(item.get("functions")) _walk(functions) return anchors diff --git a/tests/nodes/analyzers/test_structured_skill_roles.py b/tests/nodes/analyzers/test_structured_skill_roles.py index da4d84a..e4a4e63 100644 --- a/tests/nodes/analyzers/test_structured_skill_roles.py +++ b/tests/nodes/analyzers/test_structured_skill_roles.py @@ -38,11 +38,12 @@ def _write_aisop_bundle(path: Path) -> None: "role": "user", "content": { "aisop": { - "declared_tools": ["search", "calendar"], - "functions": { - "lookup": {"constraints": [{"anchor": "query"}]} - } - } + "main": "graph TD" + }, + "functions": { + "lookup": {"constraints": ["query"]} + }, + "declared_tools": ["search", "calendar"] } } ] diff --git a/tests/nodes/test_build_context.py b/tests/nodes/test_build_context.py index 9074e40..2feab54 100644 --- a/tests/nodes/test_build_context.py +++ b/tests/nodes/test_build_context.py @@ -224,13 +224,15 @@ def _write_aisop_bundle(path: Path) -> None: { "role": "user", "content": { + "functions": { + "inbox": {"constraints": ["Read-only inspection must not modify files."]} + }, "aisp_contract": { - "functions": {"inbox": {"constraints": [{"anchor": "inbox.message"}]}}, "resources": { "state": {"path": "resources/state.json"}, }, "declared_tools": ["mail", "search"], - } + }, }, }, ] @@ -252,7 +254,7 @@ def test_build_context_populates_structured_skill_context(tmp_path: Path) -> Non assert context["format"] == "contract" assert context["bundle_path"] == str((tmp_path / "workflow.aisop.json").resolve()) assert context["workflow_nodes"] == ["inbox"] - assert context["constraint_anchors"] == ["inbox.message"] + assert context["constraint_anchors"] == ["Read-only inspection must not modify files."] assert context["resource_anchors"] == ["resources/state.json"] assert context["declared_tools"] == ["mail", "search"] @@ -290,13 +292,16 @@ def test_build_context_deduplicates_nested_workflow_names(tmp_path: Path) -> Non { "role": "user", "content": { - "aisop": { - "functions": { - "lookup": { - "functions": {"lookup": {"constraints": [{"anchor": "nested.query"}]}} + "aisop": {"main": "graph TD"}, + "functions": { + "lookup": { + "functions": { + "lookup": { + "constraints": ["nested.query"], + } } } - } + }, }, }, ] diff --git a/tests/test_multi_skill.py b/tests/test_multi_skill.py index 5a9ac91..6ed524f 100644 --- a/tests/test_multi_skill.py +++ b/tests/test_multi_skill.py @@ -79,13 +79,12 @@ def _write_aisop_bundle(path: Path) -> None: { "role": "user", "content": { - "aisop": { - "declared_tools": ["search", "calendar"], - "functions": { - "lookup": {"constraints": [{"anchor": "query"}]}, - "schedule": {"constraints": [{"anchor": "time"}]}, - }, + "aisop": {"main": "graph TD"}, + "functions": { + "lookup": {"constraints": ["query"]}, + "schedule": {"constraints": ["time"]}, }, + "declared_tools": ["search", "calendar"], "aisp_contract": { "resources": { "calendar": {"path": "resources/calendar.json"}, diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 78b738d..4e96cb9 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -131,9 +131,10 @@ def test_cli_scan_structured_skill_aisop_no_llm_produces_ssr1_issue(tmp_path: Pa "role": "user", "content": { "aisop": { - "functions": { - "lookup": {"constraints": [{"anchor": "query"}]} - } + "main": "graph TD" + }, + "functions": { + "lookup": {"constraints": ["query"]} } } }