diff --git a/CLAUDE.md b/CLAUDE.md index 6823678a..6c9882a2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -28,6 +28,17 @@ The **Direct** pipeline (`parse_to_tree` → `transform` → `to_lark` → `reco | `cli/helpers.py` | File/directory/stdin conversion helpers | | `cli/hcl_to_json.py` | `hcl2tojson` entry point | | `cli/json_to_hcl.py` | `jsontohcl2` entry point | +| `cli/hq.py` | `hq` CLI entry point — query dispatch, formatting, optional operator | +| `hcl2/query/__init__.py` | Public query API exports | +| `hcl2/query/_base.py` | `NodeView` base class, view registry, `view_for()` factory | +| `hcl2/query/path.py` | Structural path parser (`PathSegment`, `parse_path`, `[select()]`, `type:name`) | +| `hcl2/query/resolver.py` | Path resolver — segment-by-segment with label depth, type filter, FunctionCallView | +| `hcl2/query/pipeline.py` | Pipe operator — `split_pipeline`, `classify_stage`, `execute_pipeline` | +| `hcl2/query/builtins.py` | Built-in transforms: `keys`, `values`, `length` | +| `hcl2/query/diff.py` | Structural diff between two HCL documents | +| `hcl2/query/predicate.py` | `select()` predicate tokenizer, recursive descent parser, evaluator | +| `hcl2/query/safe_eval.py` | AST-validated Python expression eval for hybrid/eval modes | +| `hcl2/query/introspect.py` | `--describe` and `--schema` output generation | `hcl2/__main__.py` is a thin wrapper that imports `cli.hcl_to_json:main`. @@ -141,4 +152,4 @@ Hooks are defined in `.pre-commit-config.yaml` (includes black, mypy, pylint, an ## Keeping Docs Current -Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and `docs/usage.md` when changes affect the public API, CLI flags, or option fields. +Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and the docs in `docs/` (`01_getting_started.md`, `02_querying.md`, `03_advanced_api.md`, `04_hq.md`) when changes affect the public API, CLI flags, or option fields. diff --git a/README.md b/README.md index ad379abc..5ce83c67 100644 --- a/README.md +++ b/README.md @@ -67,12 +67,18 @@ res.block("tags", Name="HelloWorld") hcl_string = hcl2.dumps(doc.build()) ``` -For the full API reference, option dataclasses, intermediate pipeline stages, and more examples -see [docs/usage.md](https://github.com/amplify-education/python-hcl2/blob/main/docs/usage.md). +### Documentation + +| Guide | Contents | +|---|---| +| [Getting Started](docs/01_getting_started.md) | Installation, load/dump, options, CLI converters | +| [Querying HCL (Python)](docs/02_querying.md) | DocumentView, BlockView, tree walking, view hierarchy | +| [Advanced API](docs/03_advanced_api.md) | Pipeline stages, Builder | +| [hq Reference](docs/04_hq.md) | `hq` CLI — structural queries, hybrid/eval, introspection | ### CLI Tools -python-hcl2 ships two command-line converters: +python-hcl2 ships three command-line tools: ```sh # HCL2 → JSON @@ -84,9 +90,13 @@ hcl2tojson terraform/ output/ # converts a directory jsontohcl2 output.json # prints HCL2 to stdout jsontohcl2 output.json main.tf # writes to file jsontohcl2 output/ terraform/ # converts a directory + +# Query HCL2 files +hq 'resource.aws_instance.main.ami' main.tf +hq 'variable[*]' variables.tf --json ``` -Both commands accept `-` as PATH to read from stdin. Run `hcl2tojson --help` or `jsontohcl2 --help` for the full list of flags. +All commands accept `-` as PATH to read from stdin. Run `--help` on any command for the full list of flags. ## Building From Source diff --git a/cli/hq.py b/cli/hq.py new file mode 100755 index 00000000..ff0ece7c --- /dev/null +++ b/cli/hq.py @@ -0,0 +1,559 @@ +"""``hq`` CLI entry point — query HCL2 files.""" + +import argparse +import json +import os +import sys +from typing import Any, List + +from hcl2.query._base import NodeView +from hcl2.query.body import DocumentView +from hcl2.query.introspect import build_schema, describe_results +from hcl2.query.path import QuerySyntaxError +from hcl2.query.pipeline import classify_stage, execute_pipeline, split_pipeline +from hcl2.query.resolver import resolve_path +from hcl2.query.safe_eval import UnsafeExpressionError, safe_eval +from hcl2.version import __version__ + +EXAMPLES_TEXT = """\ +examples: + # Structural queries + hq 'resource.aws_instance.main.ami' main.tf + hq 'variable[*]' variables.tf --json + echo 'x = 1' | hq 'x' --value + + # Pipes + hq 'resource[*] | .aws_instance | .tags' main.tf + hq 'variable[*] | select(.default) | .default' vars.tf --json + + # Builtins + hq 'x | keys' file.tf --json + hq 'x | length' file.tf --value + + # Select (bracket syntax) + hq '*[select(.name == "x")]' file.tf --value + + # String functions (jq-compatible) + hq 'module~[select(.source | contains("docker"))]' dir/ + hq 'resource~[select(.ami | test("^ami-"))]' dir/ + hq 'resource~[select(has("tags"))]' main.tf + hq 'resource~[select(.tags | not)]' main.tf + + # Object construction (jq-style) + hq 'resource[*] | {type: .block_type, name: .name_labels}' main.tf --json + + # Optional (exit 0 on empty results) + hq 'nonexistent?' file.tf --value + + # Raw output (strip quotes, ideal for shell piping) + hq 'resource.aws_instance.main.ami' main.tf --raw + + # Structural diff + hq file1.tf --diff file2.tf + hq file1.tf --diff file2.tf --json + + # Hybrid (structural::eval) + hq 'resource.aws_instance[*]::name_labels' main.tf + hq 'variable[*]::block_type' variables.tf --value + + # Pure eval (-e) + hq -e 'doc.blocks("variable")[0].attribute("default").value' variables.tf --json + + # Introspection + hq --describe 'variable[*]' variables.tf + hq --schema + +docs: https://github.com/amplify-education/python-hcl2/tree/main/docs +""" + + +def _normalize_eval_expr(expr_part: str) -> str: + """Normalize the eval expression after '::' for ergonomics.""" + stripped = expr_part.strip() + if not stripped: + return "_" + if stripped.startswith("_"): + return stripped + if stripped.startswith("."): + return "_" + stripped + # Check if it starts with a known function/variable name + for prefix in ( + "len(", + "str(", + "int(", + "float(", + "bool(", + "list(", + "tuple(", + "sorted(", + "reversed(", + "enumerate(", + "zip(", + "range(", + "min(", + "max(", + "print(", + "any(", + "all(", + "filter(", + "map(", + "isinstance(", + "type(", + "hasattr(", + "getattr(", + "doc", + ): + if stripped.startswith(prefix): + return stripped + return "_." + stripped + + +def _dispatch_query( + query_str: str, + is_eval: bool, + doc_view: DocumentView, + file_path: str = "", +) -> List[Any]: + """Dispatch a query and return results.""" + if is_eval: + result = safe_eval(query_str, {"doc": doc_view}) + if isinstance(result, list): + return result + return [result] + + # Hybrid mode: checked before pipeline since "::" is unambiguous + if "::" in query_str: + from hcl2.query.path import parse_path + + path_part, expr_part = query_str.split("::", 1) + segments = parse_path(path_part) + nodes = resolve_path(doc_view, segments) + expr = _normalize_eval_expr(expr_part) + return [safe_eval(expr, {"_": node, "doc": doc_view}) for node in nodes] + + # Structural mode: route through pipeline (handles pipes, builtins, select) + stages = [classify_stage(s) for s in split_pipeline(query_str)] + return execute_pipeline(doc_view, stages, file_path=file_path) + + +def _strip_dollar_wrap(text: str) -> str: + """Strip ``${...}`` wrapping from a serialized expression string.""" + if text.startswith("${") and text.endswith("}"): + return text[2:-1] + return text + + +def _strip_quotes(text: str) -> str: + """Strip surrounding quotes from a string value.""" + if len(text) >= 2 and text[0] == '"' and text[-1] == '"': + return text[1:-1] + return text + + +def _rawify(value: Any) -> Any: + """Recursively strip quotes and ${} wrapping from all string values.""" + if isinstance(value, str): + return _strip_dollar_wrap(_strip_quotes(value)) + if isinstance(value, dict): + return {k: _rawify(v) for k, v in value.items()} + if isinstance(value, list): + return [_rawify(v) for v in value] + return value + + +def _format_result( + result: Any, + output_json: bool, + output_value: bool, + json_indent: int, + output_raw: bool = False, +) -> str: + """Format a single result for output.""" + if output_json: + return json.dumps(_convert_for_json(result), indent=json_indent, default=str) + + if output_raw: + if isinstance(result, NodeView): + val = result.to_dict() + if isinstance(val, str): + return _strip_dollar_wrap(_strip_quotes(val)) + # For dicts with a single key (e.g. attribute), extract the value + if isinstance(val, dict) and len(val) == 1: + inner = next(iter(val.values())) + if isinstance(inner, str): + return _strip_dollar_wrap(_strip_quotes(inner)) + return str(inner) + return json.dumps(_rawify(val), default=str) + if isinstance(result, dict): + return json.dumps(_rawify(result), default=str) + if isinstance(result, str): + return _strip_dollar_wrap(_strip_quotes(result)) + return str(result) + + if output_value: + if isinstance(result, NodeView): + return _strip_dollar_wrap(str(result.to_dict())) + if isinstance(result, str): + return _strip_dollar_wrap(result) + return str(result) + + # Default: HCL output + if isinstance(result, NodeView): + return result.to_hcl() + if isinstance(result, list): + return _format_list(result, output_json, output_value, json_indent, output_raw) + if isinstance(result, str): + return _strip_dollar_wrap(result) + return str(result) + + +def _format_list( + items: list, + output_json: bool, + output_value: bool, + json_indent: int, + output_raw: bool = False, +) -> str: + """Format a list result (e.g. from hybrid mode returning a list property).""" + if output_json: + converted = [ + item.to_dict() if isinstance(item, NodeView) else item for item in items + ] + return json.dumps(converted, indent=json_indent, default=str) + parts = [] + for item in items: + if isinstance(item, NodeView): + parts.append(item.to_hcl() if not output_value else str(item.to_dict())) + else: + parts.append(str(item)) + return "[" + ", ".join(parts) + "]" if not output_value else "\n".join(parts) + + +def _convert_for_json(value: Any) -> Any: + """Recursively convert NodeViews to dicts for JSON serialization.""" + if isinstance(value, NodeView): + return value.to_dict() + if isinstance(value, list): + return [_convert_for_json(item) for item in value] + return value + + +def _format_output( + results: List[Any], + output_json: bool, + output_value: bool, + json_indent: int, + output_raw: bool = False, +) -> str: + """Format results for final output.""" + if output_json and len(results) > 1: + items = [_convert_for_json(item) for item in results] + return json.dumps(items, indent=json_indent, default=str) + + parts = [] + for result in results: + parts.append( + _format_result(result, output_json, output_value, json_indent, output_raw) + ) + return "\n".join(parts) + + +def _error(msg: str, use_json: bool, **extra) -> str: + """Format an error message.""" + if use_json: + data = {"error": extra.get("error_type", "error"), "message": msg} + data.update(extra) + return json.dumps(data) + return f"Error: {msg}" + + +def _run_diff(file1: str, file2: str, use_json: bool, json_indent: int) -> None: + """Run structural diff between two HCL files.""" + import hcl2 + from hcl2.query.diff import diff_dicts, format_diff_json, format_diff_text + from hcl2.utils import SerializationOptions + + opts = SerializationOptions( + with_comments=False, with_meta=False, explicit_blocks=True + ) + for path in (file1, file2): + if path == "-": + continue + if not os.path.isfile(path): + print( + _error(f"File not found: {path}", use_json, error_type="io_error"), + file=sys.stderr, + ) + sys.exit(1) + + try: + if file1 == "-": + text1 = sys.stdin.read() + else: + with open(file1, encoding="utf-8") as f: + text1 = f.read() + if file2 == "-": + text2 = sys.stdin.read() + else: + with open(file2, encoding="utf-8") as f: + text2 = f.read() + except (OSError, IOError) as exc: + print(_error(str(exc), use_json, error_type="io_error"), file=sys.stderr) + sys.exit(1) + + try: + dict1 = hcl2.loads(text1, serialization_options=opts) + dict2 = hcl2.loads(text2, serialization_options=opts) + except Exception as exc: # pylint: disable=broad-except + print(_error(str(exc), use_json, error_type="parse_error"), file=sys.stderr) + sys.exit(1) + + entries = diff_dicts(dict1, dict2) + if not entries: + sys.exit(0) + + if use_json: + print(format_diff_json(entries)) + else: + print(format_diff_text(entries)) + + +def _find_file_keys(query: str) -> List[str]: + """Find construct output keys that reference ``__file__``. + + Handles both shorthand ``{__file__}`` (key="__file__") and + renamed ``{file: .__file__}`` (key="file"). + """ + import re + + keys: List[str] = [] + # Match renamed: "key: .__file__" or "key: __file__" + for m in re.finditer(r"(\w+)\s*:\s*\.?__file__", query): + keys.append(m.group(1)) + # Match shorthand: bare "__file__" as a construct field (not after ":") + if re.search(r"(? List[str]: + """Return a list of HCL file paths from a file path, directory, or stdin marker.""" + if path == "-": + return ["-"] + if os.path.isdir(path): + files = [] + for dirpath, _, filenames in os.walk(path): + for fname in sorted(filenames): + if os.path.splitext(fname)[1] in _HCL_EXTENSIONS: + files.append(os.path.join(dirpath, fname)) + files.sort() + return files + return [path] + + +def _run_query_on_file( + file_path: str, + query: str, + is_eval: bool, + use_json: bool, + raw_query: str, +) -> "List[Any] | None": + """Parse a file and run a query. Returns results or None on error.""" + try: + if file_path == "-": + text = sys.stdin.read() + else: + with open(file_path, encoding="utf-8") as f: + text = f.read() + except (OSError, IOError) as exc: + print(_error(str(exc), use_json, error_type="io_error"), file=sys.stderr) + return None + + try: + doc = DocumentView.parse(text) + except Exception as exc: # pylint: disable=broad-except + print( + _error(str(exc), use_json, error_type="parse_error", file=file_path), + file=sys.stderr, + ) + return None + + try: + return _dispatch_query(query, is_eval, doc, file_path=file_path) + except QuerySyntaxError as exc: + print( + _error(str(exc), use_json, error_type="query_syntax", query=raw_query), + file=sys.stderr, + ) + return None + except UnsafeExpressionError as exc: + print( + _error( + str(exc), + use_json, + error_type="unsafe_expression", + expression=raw_query, + ), + file=sys.stderr, + ) + return None + except Exception as exc: # pylint: disable=broad-except + print( + _error(str(exc), use_json, error_type="eval_error", query=raw_query), + file=sys.stderr, + ) + return None + + +def main(): + """The ``hq`` console_scripts entry point.""" + parser = argparse.ArgumentParser( + prog="hq", + description=( + "Query HCL2 files using jq-like structural paths. " + "Supports pipes, select(), string functions, object construction. " + "Prefer structural queries over -e (eval) mode." + ), + epilog=EXAMPLES_TEXT, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "QUERY", + nargs="?", + default=None, + help="Structural path, hybrid path::expr, or -e for eval", + ) + parser.add_argument( + "FILE", + nargs="?", + default="-", + help="HCL2 file (default: stdin)", + ) + parser.add_argument( + "-e", + "--eval", + action="store_true", + help="Treat QUERY as a Python expression (doc bound to DocumentView)", + ) + + output_group = parser.add_mutually_exclusive_group() + output_group.add_argument("--json", action="store_true", help="Output as JSON") + output_group.add_argument( + "--value", action="store_true", help="Output raw value only" + ) + output_group.add_argument( + "--raw", + action="store_true", + help="Output raw string (strip surrounding quotes)", + ) + + parser.add_argument( + "--json-indent", + type=int, + default=2, + metavar="N", + help="JSON indentation width (default: 2)", + ) + parser.add_argument( + "--version", + action="version", + version=__version__, + ) + parser.add_argument( + "--describe", + action="store_true", + help="Show type and available properties/methods for query results", + ) + parser.add_argument( + "--schema", + action="store_true", + help="Dump full view API schema as JSON (ignores QUERY/FILE)", + ) + parser.add_argument( + "--no-filename", + action="store_true", + help="Suppress filename prefix when querying directories", + ) + parser.add_argument( + "--diff", + metavar="FILE2", + help="Structural diff against FILE2", + ) + + args = parser.parse_args() + use_json = args.json or args.describe or args.schema + output_raw = getattr(args, "raw", False) + + # --schema: dump schema and exit + if args.schema: + print(json.dumps(build_schema(), indent=2)) + sys.exit(0) + + # --diff: structural diff mode + # Usage: hq FILE1 --diff FILE2 (FILE1 is the first positional arg) + if args.diff: + file1 = args.QUERY + if file1 is None: + parser.error("--diff requires two files: hq FILE1 --diff FILE2") + _run_diff(file1, args.diff, use_json, args.json_indent) + sys.exit(0) + + # QUERY is required unless --schema or --diff + if args.QUERY is None: + parser.error("the following arguments are required: QUERY") + + # Detect common mistake: user passed a file path but no query. + # When only one positional arg is given, argparse puts it in QUERY + # and FILE defaults to stdin. If stdin is a TTY (not piped) and + # QUERY looks like a file/directory path, give a helpful error + # instead of hanging on stdin. + if ( + args.FILE == "-" + and sys.stdin.isatty() + and args.QUERY + and (os.path.exists(args.QUERY) or os.sep in args.QUERY) + ): + parser.error(f"missing QUERY argument (did you mean: hq QUERY {args.QUERY}?)") + + # Handle trailing '?' (optional operator — exit 0 on empty results) + query = args.QUERY + optional = query.rstrip().endswith("?") and not args.eval + if optional: + query = query.rstrip()[:-1].rstrip() + + # Collect input files + file_paths = _collect_files(args.FILE) + + any_results = False + for file_path in file_paths: + multi = len(file_paths) > 1 + results = _run_query_on_file(file_path, query, args.eval, use_json, args.QUERY) + if results is None: + continue # parse/query error already printed + if not results: + continue + any_results = True + + if args.describe: + print(json.dumps(describe_results(results), indent=2)) + continue + + output = _format_output( + results, args.json, args.value, args.json_indent, output_raw + ) + if multi and not args.no_filename: + prefix = f"{file_path}:" + print("\n".join(prefix + line for line in output.splitlines())) + else: + print(output) + + if not any_results: + sys.exit(0 if optional else 1) + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/docs/usage.md b/docs/01_getting_started.md similarity index 56% rename from docs/usage.md rename to docs/01_getting_started.md index f6a5f6d6..518be420 100644 --- a/docs/usage.md +++ b/docs/01_getting_started.md @@ -1,4 +1,14 @@ -# python-hcl2 Usage Guide +# Getting Started + +python-hcl2 parses [HCL2](https://github.com/hashicorp/hcl/blob/hcl2/hclsyntax/spec.md) into Python dicts and converts them back. This guide covers installation, everyday usage, and the CLI tools. + +## Installation + +python-hcl2 requires Python 3.8 or higher. + +```sh +pip install python-hcl2 +``` ## Quick Reference @@ -18,6 +28,9 @@ | `hcl2.from_json(text)` | Convert a JSON string into a LarkElement tree | | `hcl2.reconstruct(tree)` | Convert a LarkElement tree (or Lark tree) to HCL2 text | | `hcl2.Builder()` | Build HCL documents programmatically | +| `hcl2.query(source)` | Query HCL documents with typed view facades | + +For intermediate pipeline stages (`parse_to_tree`, `transform`, `serialize`, `from_dict`, `from_json`, `reconstruct`) and the `Builder` class, see [Advanced API Reference](03_advanced_api.md). ## HCL to Python dict @@ -35,6 +48,8 @@ data = hcl2.loads('resource "aws_instance" "web" { ami = "abc-123" }') ### SerializationOptions +The default serialization options are tuned for **content fidelity** — the output preserves enough detail (`__is_block__` markers, heredoc delimiters, quoted strings like `'"hello"'`, scientific notation, etc.) that it can be deserialized back into a LarkElement tree and reconstructed into valid HCL2 without information loss. This makes the defaults ideal for round-trip workflows (`load` → modify → `dump`), but it does add noise to the output compared to what you might expect from a plain JSON conversion. If you only need to *read* values and don't plan to reconstruct HCL2 from the dict, you can disable options like `explicit_blocks` and `preserve_heredocs`, or enable `strip_string_quotes` for cleaner output. + Pass `serialization_options` to control how the dict is produced: ```python @@ -46,16 +61,37 @@ data = loads(text, serialization_options=SerializationOptions( )) ``` -| Field | Type | Default | Description | -|---|---|---|---| -| `with_comments` | `bool` | `True` | Include comments in the output | -| `with_meta` | `bool` | `False` | Add `__start_line__` / `__end_line__` metadata | -| `wrap_objects` | `bool` | `False` | Wrap object values as inline HCL2 strings | -| `wrap_tuples` | `bool` | `False` | Wrap tuple values as inline HCL2 strings | -| `explicit_blocks` | `bool` | `True` | Add `__is_block__: True` markers to blocks | -| `preserve_heredocs` | `bool` | `True` | Keep heredocs in their original form | -| `force_operation_parentheses` | `bool` | `False` | Force parentheses around all operations | -| `preserve_scientific_notation` | `bool` | `True` | Keep scientific notation as-is | +| Field | Type | Default | Description | +|---|---|---|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `with_comments` | `bool` | `True` | Include comments as `__comments__` and `__inline_comments__` keys (see [Comment Format](#comment-format)) | +| `with_meta` | `bool` | `False` | Add `__start_line__` / `__end_line__` metadata | +| `wrap_objects` | `bool` | `False` | Wrap object values as inline HCL2 strings | +| `wrap_tuples` | `bool` | `False` | Wrap tuple values as inline HCL2 strings | +| `explicit_blocks` | `bool` | `True` | Add `__is_block__: True` markers to blocks. **Mandatory for JSON->HCL2 deserialization and reconstruction.** | +| `preserve_heredocs` | `bool` | `True` | Keep heredocs in their original form | +| `force_operation_parentheses` | `bool` | `False` | Force parentheses around all operations | +| `preserve_scientific_notation` | `bool` | `True` | Keep scientific notation as-is | +| `strip_string_quotes` | `bool` | `False` | Remove surrounding quotes from string values (e.g. `"hello"` instead of `'"hello"'`). **Breaks JSON->HCL2 deserialization and reconstruction.** | + +### Comment Format + +When `with_comments` is enabled (the default), comments are included as lists of objects under the `__comments__` and `__inline_comments__` keys. Each object has a `"value"` key containing the comment text (with delimiters stripped): + +```python +from hcl2 import loads, SerializationOptions + +data = loads( + "# Configure the provider\nx = 1\n", + serialization_options=SerializationOptions(with_comments=True), +) + +data["__comments__"] +# [{"value": "Configure the provider"}] +``` + +`__comments__` contains standalone comments (on their own lines), while `__inline_comments__` contains comments found inside expressions. + +> **Note:** Comments are currently **read-only** — they are captured during parsing but not restored when converting a dict back to HCL2 with `dump`/`dumps`. ## Python dict to HCL @@ -111,108 +147,10 @@ text = dumps(data, formatter_options=FormatterOptions( | `vertically_align_attributes` | `bool` | `True` | Vertically align `=` signs in attribute groups | | `vertically_align_object_elements` | `bool` | `True` | Vertically align `=` signs in object elements | -## Building HCL from scratch - -The `Builder` class produces dicts with the correct `__is_block__` markers so that `dumps` can distinguish blocks from plain objects: - -```python -import hcl2 - -doc = hcl2.Builder() -res = doc.block("resource", labels=["aws_instance", "web"], - ami="abc-123", instance_type="t2.micro") -res.block("tags", Name="HelloWorld") - -hcl_string = hcl2.dumps(doc.build()) -``` - -Output: - -```hcl -resource "aws_instance" "web" { - ami = "abc-123" - instance_type = "t2.micro" - - tags { - Name = "HelloWorld" - } -} -``` - -### Builder.block() - -```python -block( - block_type: str, - labels: Optional[List[str]] = None, - __nested_builder__: Optional[Builder] = None, - **attributes, -) -> Builder -``` - -Returns the child `Builder` for the new block, allowing chained calls. - -## Intermediate pipeline stages - -The full pipeline looks like this: - -``` -Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict -Reverse: Python Dict → LarkElement Tree → HCL2 Text -``` - -You can access each stage individually for advanced use cases. - -### parse / parses — HCL2 text to LarkElement tree - -```python -tree = hcl2.parses('x = 1') # StartRule -tree = hcl2.parse(open("main.tf")) # StartRule -``` - -Pass `discard_comments=True` to strip comments during transformation. - -### parse_to_tree / parses_to_tree — HCL2 text to raw Lark tree - -```python -lark_tree = hcl2.parses_to_tree('x = 1') # lark.Tree -``` - -### transform — raw Lark tree to LarkElement tree - -```python -lark_tree = hcl2.parses_to_tree('x = 1') -tree = hcl2.transform(lark_tree) # StartRule -``` - -### serialize — LarkElement tree to Python dict - -```python -tree = hcl2.parses('x = 1') -data = hcl2.serialize(tree) -# or with options: -from hcl2 import SerializationOptions -data = hcl2.serialize(tree, serialization_options=SerializationOptions(with_meta=True)) -``` - -### from_dict / from_json — Python dict or JSON to LarkElement tree - -```python -tree = hcl2.from_dict(data) # StartRule -tree = hcl2.from_json('{"x": 1}') # StartRule -``` - -Both accept optional `deserializer_options`, `formatter_options`, and `apply_format` (default `True`). - -### reconstruct — LarkElement tree (or Lark tree) to HCL2 text - -```python -tree = hcl2.from_dict(data) -text = hcl2.reconstruct(tree) -``` - ## CLI Tools +python-hcl2 ships three console scripts: `hcl2tojson`, `jsontohcl2`, and [`hq`](04_hq.md). + ### hcl2tojson Convert HCL2 files to JSON. @@ -231,7 +169,7 @@ cat main.tf | hcl2tojson - # read from stdin | `-s` | Skip un-parsable files | | `--json-indent N` | JSON indentation width (default: 2) | | `--with-meta` | Add `__start_line__` / `__end_line__` metadata | -| `--with-comments` | Include comments in the output | +| `--with-comments` | Include comments as `__comments__` / `__inline_comments__` object lists | | `--wrap-objects` | Wrap object values as inline HCL2 | | `--wrap-tuples` | Wrap tuple values as inline HCL2 | | `--no-explicit-blocks` | Disable `__is_block__` markers | @@ -267,40 +205,19 @@ cat output.json | jsontohcl2 - # read from stdin | `--no-align` | Disable vertical alignment of attributes and object elements | | `--version` | Show version and exit | -## Pipeline Diagram +### hq +Query HCL2 files by structure, with optional Python expressions. + +```sh +hq 'resource.aws_instance.main.ami' main.tf +hq 'variable[*]' variables.tf --json ``` - Forward Pipeline - ================ - HCL2 Text - │ - ▼ - ┌──────────────────┐ parse_to_tree / parses_to_tree - │ Lark Parse Tree │ - └────────┬─────────┘ - │ transform - ▼ - ┌──────────────────┐ - │ LarkElement Tree │ parse / parses (shortcut: HCL2 text → here) - └────────┬─────────┘ - │ serialize - ▼ - ┌──────────────────┐ - │ Python Dict │ load / loads (shortcut: HCL2 text → here) - └──────────────────┘ - - - Reverse Pipeline - ================ - Python Dict / JSON - │ - ▼ - ┌──────────────────┐ from_dict / from_json - │ LarkElement Tree │ - └────────┬─────────┘ - │ reconstruct - ▼ - ┌──────────────────┐ - │ HCL2 Text │ dump / dumps (shortcut: Python Dict / JSON → here) - └──────────────────┘ -``` + +For the full guide, see [hq Reference](04_hq.md). + +## Next Steps + +- [Querying HCL (Python)](02_querying.md) — navigate documents with typed view facades +- [Advanced API Reference](03_advanced_api.md) — intermediate pipeline stages, Builder, pipeline diagram +- [hq Reference](04_hq.md) — query HCL files from the command line diff --git a/docs/02_querying.md b/docs/02_querying.md new file mode 100644 index 00000000..eacbd3d7 --- /dev/null +++ b/docs/02_querying.md @@ -0,0 +1,221 @@ +# Querying HCL (Python API) + +The query system lets you navigate HCL documents by structure rather than serializing to dicts. This page covers the Python API; for the `hq` CLI tool, see [hq Reference](04_hq.md). + +## Quick Start + +```python +import hcl2 + +doc = hcl2.query('resource "aws_instance" "main" { ami = "abc-123" }') + +for block in doc.blocks("resource"): + print(block.block_type, block.name_labels) + ami = block.attribute("ami") + if ami: + print(f" ami = {ami.value}") +``` + +You can also parse from a file: + +```python +from hcl2.query import DocumentView + +doc = DocumentView.parse_file("main.tf") +``` + +## DocumentView + +The entry point for queries. Wraps a `StartRule`. + +```python +doc = DocumentView.parse(text) # from string +doc = DocumentView.parse_file("main.tf") # from file +doc = hcl2.query(text) # convenience alias +doc = hcl2.query(open("main.tf")) # also accepts file objects +``` + +| Method / Property | Returns | Description | +|---|---|---| +| `body` | `BodyView` | The document body | +| `blocks(block_type?, *labels)` | `List[BlockView]` | Blocks matching type and optional labels | +| `attributes(name?)` | `List[AttributeView]` | Attributes, optionally filtered by name | +| `attribute(name)` | `AttributeView \| None` | Single attribute by name | + +## BodyView + +Wraps a `BodyRule`. Same filtering methods as `DocumentView`. + +## BlockView + +Wraps a `BlockRule`. + +```python +block = doc.blocks("resource", "aws_instance")[0] +block.block_type # "resource" +block.labels # ["resource", "aws_instance", "main"] +block.name_labels # ["aws_instance", "main"] +block.body # BodyView +``` + +| Property / Method | Returns | Description | +|---|---|---| +| `block_type` | `str` | First label (the block type name) | +| `labels` | `List[str]` | All labels as plain strings | +| `name_labels` | `List[str]` | Labels after the block type (`labels[1:]`) | +| `body` | `BodyView` | The block body | +| `blocks(...)` | `List[BlockView]` | Nested blocks (delegates to body) | +| `attributes(...)` | `List[AttributeView]` | Nested attributes (delegates to body) | +| `attribute(name)` | `AttributeView \| None` | Single nested attribute | + +## AttributeView + +Wraps an `AttributeRule`. + +```python +attr = doc.attribute("ami") +attr.name # "ami" +attr.value # '"abc-123"' (serialized Python value) +attr.value_node # NodeView over the expression +``` + +## Container Views + +### TupleView + +Wraps a `TupleRule`. Access via `find_all` or by navigating to a tuple-valued attribute. + +```python +from hcl2.query.containers import TupleView +from hcl2.walk import find_first +from hcl2.rules.containers import TupleRule + +doc = DocumentView.parse('x = [1, 2, 3]\n') +node = find_first(doc.attribute("x").raw, TupleRule) +tv = TupleView(node) +len(tv) # 3 +tv[0] # NodeView for the first element +tv.elements # List[NodeView] +``` + +### ObjectView + +Wraps an `ObjectRule`. + +```python +from hcl2.query.containers import ObjectView +from hcl2.rules.containers import ObjectRule + +node = find_first(doc.attribute("tags").raw, ObjectRule) +ov = ObjectView(node) +ov.keys # ["Name", "Env"] +ov.get("Name") # NodeView for the value +ov.entries # List[Tuple[str, NodeView]] +``` + +## Expression Views + +### ForTupleView / ForObjectView + +Wraps `ForTupleExprRule` / `ForObjectExprRule`. + +```python +from hcl2.query.for_exprs import ForTupleView +from hcl2.rules.for_expressions import ForTupleExprRule + +doc = DocumentView.parse('x = [for item in var.list : item]\n') +node = find_first(doc.raw, ForTupleExprRule) +fv = ForTupleView(node) +fv.iterator_name # "item" +fv.second_iterator_name # None (or "v" for "k, v in ...") +fv.iterable # NodeView +fv.value_expr # NodeView +fv.has_condition # bool +fv.condition # NodeView | None +``` + +`ForObjectView` adds `key_expr` and `has_ellipsis`. + +### ConditionalView + +Wraps a `ConditionalRule` (ternary `condition ? true : false`). + +```python +from hcl2.query.expressions import ConditionalView +from hcl2.rules.expressions import ConditionalRule + +doc = DocumentView.parse('x = var.enabled ? "on" : "off"\n') +node = find_first(doc.raw, ConditionalRule) +cv = ConditionalView(node) +cv.condition # NodeView over the condition expression +cv.true_val # NodeView over the true branch +cv.false_val # NodeView over the false branch +``` + +### FunctionCallView + +Wraps a `FunctionCallRule`. + +```python +from hcl2.query.functions import FunctionCallView +from hcl2.rules.functions import FunctionCallRule + +doc = DocumentView.parse('x = length(var.list)\n') +node = find_first(doc.raw, FunctionCallRule) +fv = FunctionCallView(node) +fv.name # "length" +fv.args # List[NodeView] +fv.has_ellipsis # bool +``` + +## Common NodeView Methods + +All view classes inherit from `NodeView`: + +| Method / Property | Returns | Description | +|---|---|---| +| `raw` | `LarkElement` | The underlying IR node | +| `parent_view` | `NodeView \| None` | View over the parent node | +| `to_hcl()` | `str` | Reconstruct this subtree as HCL text | +| `to_dict(options?)` | `Any` | Serialize to a Python value | +| `find_all(rule_type)` | `List[NodeView]` | Find descendants by rule class | +| `find_by_predicate(fn)` | `List[NodeView]` | Find descendants where `fn(view)` is truthy | +| `walk_semantic()` | `List[NodeView]` | All semantic descendant nodes | +| `walk_rules()` | `List[NodeView]` | All rule descendant nodes | + +## Tree Walking Primitives + +The `hcl2.walk` module provides free functions for traversing the IR tree directly (without view wrappers): + +```python +from hcl2.walk import walk, walk_rules, walk_semantic, find_all, find_first, ancestors +from hcl2.rules.base import AttributeRule + +tree = hcl2.parses('x = 1\ny = 2\n') + +# All nodes depth-first (including tokens) +for node in walk(tree): + print(node) + +# Only LarkRule nodes +for rule in walk_rules(tree): + print(rule) + +# Only semantic rules (skip NewLineOrCommentRule) +for rule in walk_semantic(tree): + print(rule) + +# Find specific rule types +attrs = list(find_all(tree, AttributeRule)) +first_attr = find_first(tree, AttributeRule) + +# Walk up the parent chain +for parent in ancestors(first_attr): + print(parent) +``` + +## Next Steps + +- [hq Reference](04_hq.md) — query HCL files from the command line +- [Advanced API Reference](03_advanced_api.md) — intermediate pipeline stages, Builder +- [Getting Started](01_getting_started.md) — core API (`load`/`dump`), options, CLI converters diff --git a/docs/03_advanced_api.md b/docs/03_advanced_api.md new file mode 100644 index 00000000..d8cc5b54 --- /dev/null +++ b/docs/03_advanced_api.md @@ -0,0 +1,147 @@ +# Advanced API Reference + +This document covers the intermediate pipeline stages, programmatic document construction with `Builder`, and the full pipeline diagram. For basic `load`/`dump` usage and options, see [Getting Started](01_getting_started.md). + +## Intermediate Pipeline Stages + +The full pipeline looks like this: + +``` +Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict +Reverse: Python Dict → LarkElement Tree → HCL2 Text +``` + +You can access each stage individually for advanced use cases. + +### parse / parses — HCL2 text to LarkElement tree + +```python +tree = hcl2.parses('x = 1') # StartRule +tree = hcl2.parse(open("main.tf")) # StartRule +``` + +Pass `discard_comments=True` to strip comments during transformation. + +### parse_to_tree / parses_to_tree — HCL2 text to raw Lark tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') # lark.Tree +``` + +### transform — raw Lark tree to LarkElement tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') +tree = hcl2.transform(lark_tree) # StartRule +``` + +### serialize — LarkElement tree to Python dict + +```python +tree = hcl2.parses('x = 1') +data = hcl2.serialize(tree) +# or with options: +from hcl2 import SerializationOptions +data = hcl2.serialize(tree, serialization_options=SerializationOptions(with_meta=True)) +``` + +### from_dict / from_json — Python dict or JSON to LarkElement tree + +```python +tree = hcl2.from_dict(data) # StartRule +tree = hcl2.from_json('{"x": 1}') # StartRule +``` + +Both accept optional `deserializer_options`, `formatter_options`, and `apply_format` (default `True`). + +### reconstruct — LarkElement tree (or Lark tree) to HCL2 text + +```python +tree = hcl2.from_dict(data) +text = hcl2.reconstruct(tree) +``` + +## Builder + +The `Builder` class produces dicts with the correct `__is_block__` markers so that `dumps` can distinguish blocks from plain objects: + +```python +import hcl2 + +doc = hcl2.Builder() +res = doc.block("resource", labels=["aws_instance", "web"], + ami="abc-123", instance_type="t2.micro") +res.block("tags", Name="HelloWorld") + +hcl_string = hcl2.dumps(doc.build()) +``` + +Output: + +```hcl +resource "aws_instance" "web" { + ami = "abc-123" + instance_type = "t2.micro" + + tags { + Name = "HelloWorld" + } +} +``` + +### Builder.block() + +```python +block( + block_type: str, + labels: Optional[List[str]] = None, + __nested_builder__: Optional[Builder] = None, + **attributes, +) -> Builder +``` + +Returns the child `Builder` for the new block, allowing chained calls. + +## Pipeline Diagram + +``` + Forward Pipeline + ================ + HCL2 Text + │ + ▼ + ┌──────────────────┐ parse_to_tree / parses_to_tree + │ Lark Parse Tree │ + └────────┬─────────┘ + │ transform + ▼ + ┌──────────────────┐ + │ LarkElement Tree │ parse / parses (shortcut: HCL2 text → here) + └────────┬─────────┘ + │ serialize + ▼ + ┌──────────────────┐ + │ Python Dict │ load / loads (shortcut: HCL2 text → here) + └──────────────────┘ + + + Reverse Pipeline + ================ + Python Dict / JSON + │ + ▼ + ┌──────────────────┐ from_dict / from_json + │ LarkElement Tree │ + └────────┬─────────┘ + │ reconstruct + ▼ + ┌──────────────────┐ + │ HCL2 Text │ dump / dumps (shortcut: Python Dict / JSON → here) + └──────────────────┘ +``` + +## See Also + +- [Getting Started](01_getting_started.md) — basic `load`/`dump` usage, options reference +- [Querying HCL (Python)](02_querying.md) — typed view facades and tree walking +- [hq Reference](04_hq.md) — query HCL files from the command line diff --git a/docs/04_hq.md b/docs/04_hq.md new file mode 100644 index 00000000..47913efe --- /dev/null +++ b/docs/04_hq.md @@ -0,0 +1,531 @@ +# hq — HCL Query CLI + +`hq` is a jq-like query tool for HCL2 files. It ships with python-hcl2 and supports structural queries, hybrid Python expressions, and full eval mode. + +**Mode preference** (use the first one that works): + +1. **Structural** (default) — jq-like syntax with pipes, select, string functions, object construction; recommended. +1. **Hybrid** (`::`) — structural path on the left, Python expression on the right. Only when structural can't express the transform. +1. **Eval** (`-e`) — full Python expressions. Last resort — many operations are blocked for safety. + +## Structural Queries + +`hq` queries HCL2 files using dot-separated paths. Segments match block types, then name labels, then body contents. + +```sh +# Get all variable blocks +hq 'variable[*]' variables.tf + +# Navigate into a specific resource +hq 'resource.aws_instance.main.ami' main.tf + +# Output as JSON +hq 'variable[*]' variables.tf --json + +# Output raw values only +hq 'resource.aws_instance.main.ami' main.tf --value + +# Wildcard: all top-level blocks/attributes +hq '*' main.tf + +# Index: first variable only +hq 'variable[0]' variables.tf +``` + +### Path Grammar + +``` +path := segment ("." segment)* +segment := (type_filter ":")? name "~"? ("[*]" | "[" INT "]" | "[select(PRED)]")? +name := "*" | IDENTIFIER +type_filter := IDENTIFIER +``` + +- `name` matches block types and attribute names +- `type:name` matches only nodes of the given type (e.g. `function_call:length`) +- `name~` skips all block labels, going straight to the body (see below) +- `[*]` selects all matches at that level +- `[N]` selects the Nth match (zero-based) +- `[select(PRED)]` filters matches using a predicate (see below) + +### Resolution Rules + +1. On a `DocumentView`/`BodyView`: segment matches block types and attribute names +1. On a `BlockView` with unconsumed name labels: segment matches the next label +1. On a `BlockView` with all labels consumed: delegates to body +1. On an `AttributeView`: unwraps to the value expression +1. On an `ObjectView`: segment matches keys +1. On a `TupleView`: `[N]` or `[*]` selects elements + +### Skip Labels (`~`) + +HCL blocks have labels (e.g. `resource "aws_instance" "main"`). Normally you consume them one segment at a time: `resource.aws_instance.main`. The `~` suffix explicitly skips all remaining labels and goes straight to the block body: + +```sh +# Without ~: must name every label or use wildcard +hq 'resource.aws_instance.main.ami' main.tf + +# With ~: skip all labels, access body directly +hq 'resource~.ami' main.tf + +# All resource blocks regardless of labels +hq 'resource~[*]' main.tf + +# Filter blocks by body content +hq 'resource~[select(.ami)]' main.tf + +# Combine with wildcards +hq '*~[*] | .block_type' main.tf --value +``` + +### Pipes + +Chain stages with `|`. Each stage feeds its results into the next. Between stages, attributes unwrap to their values and blocks unwrap to their bodies (for non-path stages like builtins and select). + +```sh +# Navigate through block body, then extract an attribute +hq 'resource.aws_instance.main | .ami' main.tf --value + +# Select with pipe +hq 'variable[*] | select(.default)' variables.tf --json + +# Builtins +hq 'x | keys' file.tf --json +hq 'x | length' file.tf --value +``` + +**Property accessors** — when a pipe stage like `.name` or `.block_type` doesn't match a structural path, it falls back to Python properties on the view: + +| View Type | Available Properties | +|---|---| +| `BlockView` | `.block_type` (e.g. `"resource"`), `.labels` (all labels including type), `.name_labels` (labels after the block type, e.g. `["aws_instance", "main"]`) | +| `AttributeView` | `.name` (attribute name), `.value` (serialized value) | +| `FunctionCallView` | `.name` (function name), `.args` (argument list), `.has_ellipsis` | +| `ForTupleView` | `.iterator_name`, `.second_iterator_name`, `.iterable`, `.value_expr`, `.has_condition`, `.condition` | +| `ForObjectView` | same as ForTupleView plus `.key_expr`, `.has_ellipsis` | +| `ConditionalView` | `.condition`, `.true_val`, `.false_val` | +| `TupleView` | `.elements` | +| `ObjectView` | `.keys`, `.entries` | +| All views | `.type` (short string like `"block"`, `"attribute"`, etc.) | + +```sh +# Get block types +hq 'resource[*] | .block_type' main.tf --value + +# Get name labels (labels after the block type) +hq 'resource[*] | .name_labels' main.tf --json + +# Get all labels including block type +hq 'resource[*] | .labels' main.tf --json + +# Get attribute names +hq '*[*] | .name' main.tf --value + +# Get function call names +hq '*..function_call:*[*] | .name' main.tf --value + +# Chain with builtins +hq 'resource[*] | .labels | length' main.tf --value +``` + +### Pipeline Semantics + +Between pipe stages: + +- **AttributeView** → unwraps to value node (ObjectView, TupleView, etc.) +- **BlockView** → unwraps to body (BodyView) +- **ExprTermRule** → unwraps to inner expression + +This means label traversal should be done within a single stage: + +```sh +# Good: label traversal in one stage, pipe at body boundary +hq 'resource.aws_instance.main | .ami' main.tf + +# Won't work: labels split across pipe stages +# hq 'resource | .aws_instance | .main | .ami' main.tf +``` + +### Select Predicates + +Filter results without eval. Two syntactic positions: + +**Bracket syntax** — inline in a path segment (works with type qualifiers too): + +```sh +hq '*[select(.name == "x")]' file.tf --value +hq 'variable[select(.default)]' variables.tf +hq '*..function_call:*[select(.args[2])]' file.tf # functions with >2 args +``` + +**Pipe stage** — as a pipeline stage: + +```sh +hq 'variable[*] | select(.default)' variables.tf --json +hq 'resource[*] | select(.block_type == "resource")' main.tf +``` + +**Predicate grammar:** + +``` +predicate := or_expr +or_expr := and_expr ("or" and_expr)* +and_expr := not_expr ("and" not_expr)* +not_expr := "not" not_expr | comparison +comparison := accessor (comp_op literal)? | any_all | has_expr +any_all := ("any" | "all") "(" accessor ";" predicate ")" +has_expr := "has" "(" STRING ")" +accessor := "." IDENT ("." IDENT)* ("[" INT "]")? ("|" BUILTIN_OR_FUNC)? +BUILTIN := "keys" | "values" | "length" | "not" +FUNC := ("contains" | "test" | "startswith" | "endswith") "(" STRING ")" +literal := STRING | NUMBER | "true" | "false" | "null" +comp_op := "==" | "!=" | "<" | ">" | "<=" | ">=" +``` + +Without a comparison operator, the accessor is an existence/truthy check. + +**Builtin transforms in accessors** — append `| builtin` to apply a transform before comparing: + +```sh +# Functions with more than 2 arguments +hq '*..function_call:*[select(.args | length > 2)] | .name' file.tf --value + +# Blocks with more than 2 labels +hq '*[select(.labels | length > 2)]' file.tf +``` + +**String functions** (jq-compatible) — filter by substring, regex, or prefix/suffix: + +```sh +# contains — substring match +hq 'module~[select(.source | contains("docker"))]' dir/ --value + +# test — regex match +hq 'resource.aws_instance~[select(.ami | test("^ami-[0-9]+"))]' dir/ --value + +# startswith / endswith +hq '*[select(.name | startswith("prod-"))]' file.tf +hq '*[select(.path | endswith("/api"))]' file.tf +``` + +**`has("key")`** — jq-compatible key existence check: + +```sh +# Blocks with a "tags" attribute +hq 'resource~[select(has("tags"))]' main.tf + +# Equivalent to: select(.tags) +``` + +**Postfix `not`** — jq-style postfix negation (equivalent to prefix `not`): + +```sh +# Blocks without tags +hq 'resource~[select(.tags | not)]' main.tf + +# Equivalent to: select(not .tags) +``` + +**`any` / `all`** — iterate over a list-valued accessor and test a predicate on each element (jq-style `any(generator; condition)`): + +```sh +# Tuples that contain function calls +hq '*..tuple:*[*] | select(any(.elements; .type == "function_call"))' file.tf + +# Tuples where ALL elements are plain nodes +hq '*..tuple:*[*] | select(all(.elements; .type == "node"))' file.tf + +# Combine with boolean operators +hq '*..tuple:*[*] | select(any(.elements; .type == "function_call" or .type == "tuple"))' file.tf +``` + +**Virtual accessor `.type`** — returns the view type as a short string: + +| View Class | `.type` value | +|---|---| +| `DocumentView` | `"document"` | +| `BodyView` | `"body"` | +| `BlockView` | `"block"` | +| `AttributeView` | `"attribute"` | +| `ObjectView` | `"object"` | +| `TupleView` | `"tuple"` | +| `ForTupleView` | `"for_tuple"` | +| `ForObjectView` | `"for_object"` | +| `FunctionCallView` | `"function_call"` | +| `NodeView` | `"node"` | + +```sh +# Filter to only object-valued attributes, then get keys +hq '*[select(.type == "attribute")] | select(.type == "object") | keys' file.tf --json + +# Or use the type qualifier syntax +hq 'attribute:*[*] | select(.type == "object") | keys' file.tf --json +``` + +### Type Qualifiers + +Prefix a segment with `type:` to match only nodes of that type. Most useful with recursive descent: + +```sh +# Find all function calls named "length" anywhere in the document +hq '*..function_call:length' file.tf + +# Get the arguments of a specific function call +hq '*..function_call:length | .args' file.tf --json + +# All function calls (wildcard name) +hq '*..function_call:*[*]' file.tf + +# Filter top-level to only blocks +hq 'block:*[*]' file.tf +``` + +After resolving to a `FunctionCallView`, you can navigate into it: + +| Segment | Behavior | +|---|---| +| `args` | All arguments | +| `args[*]` | All arguments (explicit select-all) | +| `args[N]` | Nth argument (zero-based) | + +### Builtins + +Terminal transforms available as pipe stages: + +| Builtin | Description | +|---|---| +| `keys` | Object → key list; Body/Document → block type + attribute names; Block → labels | +| `values` | Object → values; Tuple → elements; Body → blocks + attributes | +| `length` | Tuple/Object/Body → count; others → 1 | + +Append `[*]` to unpack list results into individual pipeline items: + +```sh +hq 'tags | keys' file.tf --json # one JSON array +hq 'tags | keys[*]' file.tf --value # one key per line +hq 'items | length' file.tf --value +``` + +### Object Construction `{...}` (jq-style) + +Extract multiple fields into a JSON object per result. Matches jq syntax: + +```sh +# Shorthand — field name = key name +hq 'module~[*] | {source, cpu, memory}' dir/ --json +# Output: {"source": "...", "cpu": 2, "memory": 512} + +# Renamed keys +hq 'resource[*] | {type: .block_type, name: .name_labels}' main.tf --json +# Output: {"type": "resource", "name": ["aws_instance", "main"]} + +# Combine with select +hq 'resource~[select(has("tags"))] | {name: .name_labels, tags}' main.tf --json +``` + +### Optional Operator (`?`) + +Append `?` to a query to exit 0 even when no results are found: + +```sh +hq 'nonexistent?' file.tf --value; echo "exit: $?" # exit: 0 +hq 'x?' file.tf --value # prints value, exit: 0 +``` + +The `?` is a CLI-level concern only — it is not stripped in eval mode. + +## Output Modes + +| Flag | Behavior | +|---|---| +| *(default)* | HCL reconstruction via `to_hcl()`, `str()` for primitives | +| `--json` | `to_dict()` then `json.dumps()`. Array for multiple results | +| `--value` | `to_dict()` for views, `str()` for primitives. One per line | +| `--raw` | Like `--value` but strips surrounding `"quotes"` from strings — ideal for shell piping | + +## Exit Codes + +| Code | Meaning | +|---|---| +| 0 | Query matched at least one result (or `?` suffix used) | +| 1 | No results, query syntax error, or unsafe expression | +| 2 | HCL parse error or I/O error | + +## Diff + +Compare two HCL files structurally: + +```sh +hq file1.tf --diff file2.tf +hq file1.tf --diff file2.tf --json +``` + +## Hybrid Queries + +> **Note:** Most queries should use structural mode (pipes, select, object construction). Only reach for hybrid mode when you need a Python transform that structural mode can't express. + +Use `::` to split a structural path (left) from a Python eval expression (right). The expression runs once per result from the structural path, with `_` bound to each result. + +```sh +# Get name_labels for all variables +hq 'variable[*]::name_labels' variables.tf + +# Get block_type +hq 'variable[*]::block_type' variables.tf --value + +# Call methods +hq 'resource.aws_instance[*].tags::entries()' main.tf + +# Use builtins +hq 'variable[*]::len(_.name_labels)' variables.tf --value +``` + +**Expression normalization** (right of `::`): + +| Input | Normalized | +|---|---| +| `name_labels` | `_.name_labels` | +| `.foo` | `_.foo` | +| `_.foo` | `_.foo` (unchanged) | +| `len(_.x)` | `len(_.x)` (unchanged) | +| `doc.blocks()` | `doc.blocks()` (unchanged) | + +## Eval Mode + +> **Note:** Eval mode is a last resort. Many operations (comprehensions, imports, f-strings) are blocked for safety. Prefer structural queries with pipes, select, and object construction. + +Use `-e` to treat the entire query as a Python expression. `doc` is bound to the `DocumentView`. + +```sh +# Access specific block attributes +hq -e 'doc.blocks("variable")[0].attribute("default").value' variables.tf --json + +# Sort blocks +hq -e 'sorted(doc.blocks("variable"), key=lambda b: b.name_labels[0])' variables.tf + +# Filter blocks +hq -e 'list(filter(lambda b: b.attribute("default"), doc.blocks("variable")))' variables.tf + +# Find by predicate +hq -e 'doc.find_by_predicate(lambda n: hasattr(n, "name") and n.name == "ami")' main.tf +``` + +### Safe Eval Namespace + +- **Variables:** `doc` (DocumentView), `_` (per-result in hybrid mode) +- **Builtins:** `len`, `str`, `int`, `float`, `bool`, `list`, `tuple`, `type`, `isinstance`, `sorted`, `reversed`, `enumerate`, `zip`, `range`, `min`, `max`, `print`, `any`, `all`, `filter`, `map`, `hasattr`, `getattr` +- **Allowed:** attribute access, method calls, subscripts, lambdas, comparisons, boolean/arithmetic ops, keyword arguments +- **Blocked:** imports, comprehensions, assignments, f-strings, walrus operator, `exec`/`eval`/`__import__` + +## Introspection + +**`--describe`** — Show type info and available API for query results: + +```sh +hq --describe 'variable[*]' variables.tf +``` + +```json +{ + "results": [ + { + "type": "BlockView", + "properties": ["block_type", "labels", "name_labels", "body", "raw", "parent_view"], + "methods": ["blocks(...)", "attributes(...)", "attribute(...)"], + "summary": "block_type='variable', labels=['variable', 'name']" + } + ] +} +``` + +**`--schema`** — Dump the full view API hierarchy as JSON (no QUERY or FILE needed): + +```sh +hq --schema +``` + +## All Flags + +| Flag | Description | +|---|---| +| `-e`, `--eval` | Treat QUERY as a Python expression | +| `--json` | Output as JSON | +| `--value` | Output raw values only | +| `--raw` | Output raw strings (strip surrounding quotes) | +| `--json-indent N` | JSON indentation width (default: 2) | +| `--describe` | Show type and available properties/methods | +| `--schema` | Dump full view API schema as JSON | +| `--diff FILE2` | Structural diff against FILE2 | +| `--no-filename` | Suppress filename prefix when querying directories | +| `--version` | Show version and exit | + +## Error Output + +Errors are printed to stderr. When `--json`, `--describe`, or `--schema` is active, errors are JSON: + +```json +{"error": "query_syntax", "message": "Invalid path segment: '123' in '123invalid'", "query": "123invalid"} +{"error": "unsafe_expression", "message": "comprehensions are not allowed", "expression": "[x for x in _]"} +{"error": "parse_error", "message": "Unexpected token ..."} +``` + +## Real-World Examples + +```sh +# All unique tag keys across every resource (skip labels, unpack keys) +hq 'resource~[*] | .tags | keys[*]' main.tf --value + +# Resources that have a tags attribute +hq 'resource~[select(.tags)] | .block_type' main.tf --value +# Or using jq-style has(): +hq 'resource~[select(has("tags"))] | .block_type' main.tf --value + +# Variables with defaults — get their name labels as JSON +hq 'variable~[select(.default)] | .name_labels' variables.tf --json + +# Function calls with more than 1 argument — get the function name +hq '*..function_call:*[select(.args | length > 1)] | .name' main.tf --value + +# Extract the condition from every ternary expression +hq '*..conditional:*.condition' main.tf + +# All tag values across resources, one per line +hq 'resource~[*] | .tags | values[*]' main.tf --value + +# For expressions that have an if-condition +hq 'locals~[*] | *..for_tuple:*[select(.has_condition == true)]' main.tf + +# Name labels for all aws_instance resources +hq 'resource.aws_instance[*] | .name_labels' main.tf --json + +# Conditionals where the true branch is the literal `true` +hq '*..conditional:* | select(.true_val == true)' main.tf + +# First variable that lacks a default +hq 'variable~[select(not .default)][0]' variables.tf + +# Variable names alongside their defaults (hybrid mode) +hq 'variable[select(.default)]::name_labels[0] + " = " + str(_.attribute("default").value)' variables.tf --value + +# String matching — find modules sourcing docker +hq 'module~[select(.source | contains("docker"))]' dir/ --value + +# Regex matching — find AMI references +hq 'resource.aws_instance~[select(.ami | test("^ami-"))] | .ami' dir/ --raw + +# Resources without tags (postfix not) +hq 'resource~[select(.tags | not)] | .name_labels' main.tf --json + +# Object construction — extract multiple fields as JSON +hq 'resource[*] | {type: .block_type, name: .name_labels}' main.tf --json + +# Structural diff between two files +hq file1.tf --diff file2.tf +hq file1.tf --diff file2.tf --json +``` + +## See Also + +- [Getting Started](01_getting_started.md) — core API (`load`/`dump`), options, CLI converters +- [Querying HCL (Python)](02_querying.md) — typed view facades for programmatic access +- [Advanced API Reference](03_advanced_api.md) — pipeline stages, Builder diff --git a/hcl2/__init__.py b/hcl2/__init__.py index d3a9ea7b..73f1ee07 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -19,6 +19,7 @@ reconstruct, transform, serialize, + query, ) from .builder import Builder diff --git a/hcl2/api.py b/hcl2/api.py index db4caa72..5d1c9520 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -207,6 +207,18 @@ def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: ) +def query(source): + """Parse HCL2 text or file into a DocumentView for querying. + + :param source: HCL2 text string or file-like object. + """ + from hcl2.query.body import DocumentView # avoid circular with hcl2.query package + + if hasattr(source, "read"): + return DocumentView(parse(source)) + return DocumentView(parses(source)) + + def serialize( tree: StartRule, *, diff --git a/hcl2/query/__init__.py b/hcl2/query/__init__.py new file mode 100644 index 00000000..39ce9e5d --- /dev/null +++ b/hcl2/query/__init__.py @@ -0,0 +1,46 @@ +"""Query facades for navigating HCL2 LarkElement trees.""" + +from hcl2.query._base import NodeView, view_for, register_view +from hcl2.query.body import DocumentView, BodyView +from hcl2.query.blocks import BlockView +from hcl2.query.attributes import AttributeView +from hcl2.query.containers import TupleView, ObjectView +from hcl2.query.for_exprs import ForTupleView, ForObjectView +from hcl2.query.functions import FunctionCallView +from hcl2.query.expressions import ConditionalView +from hcl2.query.pipeline import ( + split_pipeline, + classify_stage, + execute_pipeline, + PathStage, + BuiltinStage, + SelectStage, +) +from hcl2.query.builtins import apply_builtin, BUILTIN_NAMES +from hcl2.query.predicate import parse_predicate, evaluate_predicate + +__all__ = [ + "NodeView", + "view_for", + "register_view", + "DocumentView", + "BodyView", + "BlockView", + "AttributeView", + "TupleView", + "ObjectView", + "ForTupleView", + "ForObjectView", + "FunctionCallView", + "ConditionalView", + "split_pipeline", + "classify_stage", + "execute_pipeline", + "PathStage", + "BuiltinStage", + "SelectStage", + "apply_builtin", + "BUILTIN_NAMES", + "parse_predicate", + "evaluate_predicate", +] diff --git a/hcl2/query/_base.py b/hcl2/query/_base.py new file mode 100644 index 00000000..27421410 --- /dev/null +++ b/hcl2/query/_base.py @@ -0,0 +1,120 @@ +"""Base view class and registry for query facades.""" + +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Type, + TypeVar, +) + +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.utils import SerializationOptions +from hcl2 import walk as _walk_mod + +T = TypeVar("T", bound=LarkRule) + +_VIEW_REGISTRY: Dict[Type[LarkElement], Type["NodeView"]] = {} + + +def register_view(rule_type: Type[LarkElement]): + """Class decorator: register a view class for a given rule type.""" + + def decorator(cls): + _VIEW_REGISTRY[rule_type] = cls + return cls + + return decorator + + +def view_for(node: LarkElement) -> "NodeView": + """Factory: dispatch by type, walk MRO for base matches, fallback to NodeView.""" + node_type = type(node) + # Direct match + if node_type in _VIEW_REGISTRY: + return _VIEW_REGISTRY[node_type](node) + # Walk MRO + for base in node_type.__mro__: + if base in _VIEW_REGISTRY: + return _VIEW_REGISTRY[base](node) + return NodeView(node) + + +class NodeView: + """Base view wrapping a LarkElement node.""" + + def __init__(self, node: LarkElement): + self._node = node + + @property + def raw(self) -> LarkElement: + """Return the underlying IR node.""" + return self._node + + @property + def parent_view(self) -> Optional["NodeView"]: + """Return a view over the parent node, or None.""" + parent = getattr(self._node, "_parent", None) + if parent is None: + return None + return view_for(parent) + + def find_all(self, rule_type: Type[T]) -> List["NodeView"]: + """Find all descendants matching a rule class, returned as views.""" + return [view_for(n) for n in _walk_mod.find_all(self._node, rule_type)] + + def find_by_predicate(self, predicate: Callable[..., bool]) -> List["NodeView"]: + """Find descendants matching a predicate on their views.""" + results = [] + for element in _walk_mod.walk_semantic(self._node): + wrapped = view_for(element) + if predicate(wrapped): + results.append(wrapped) + return results + + def walk_semantic(self) -> List["NodeView"]: + """Return all semantic descendant nodes as views.""" + return [view_for(n) for n in _walk_mod.walk_semantic(self._node)] + + def walk_rules(self) -> List["NodeView"]: + """Return all rule descendant nodes as views.""" + return [view_for(n) for n in _walk_mod.walk_rules(self._node)] + + def to_hcl(self) -> str: + """Reconstruct this subtree as HCL text.""" + from hcl2.reconstructor import HCLReconstructor + + reconstructor = HCLReconstructor() + return reconstructor.reconstruct_fragment(self._node) + + def to_dict(self, options: Optional[SerializationOptions] = None) -> Any: + """Serialize this node to a Python value.""" + if options is not None: + return self._node.serialize(options=options) + return self._node.serialize() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} wrapping {self._node!r}>" + + +VIEW_TYPE_NAMES = { + "DocumentView": "document", + "BodyView": "body", + "BlockView": "block", + "AttributeView": "attribute", + "TupleView": "tuple", + "ObjectView": "object", + "ForTupleView": "for_tuple", + "ForObjectView": "for_object", + "FunctionCallView": "function_call", + "ConditionalView": "conditional", + "NodeView": "node", +} + + +def view_type_name(node: "NodeView") -> str: + """Return a short type name string for a view node.""" + cls_name = type(node).__name__ + return VIEW_TYPE_NAMES.get(cls_name, cls_name.lower()) diff --git a/hcl2/query/attributes.py b/hcl2/query/attributes.py new file mode 100644 index 00000000..bf795a21 --- /dev/null +++ b/hcl2/query/attributes.py @@ -0,0 +1,29 @@ +"""AttributeView facade.""" + +from typing import Any + +from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.base import AttributeRule + + +@register_view(AttributeRule) +class AttributeView(NodeView): + """View over an HCL2 attribute (AttributeRule).""" + + @property + def name(self) -> str: + """Return the attribute name as a plain string.""" + node: AttributeRule = self._node # type: ignore[assignment] + return node.identifier.serialize() + + @property + def value(self) -> Any: + """Return the serialized Python value of the attribute expression.""" + node: AttributeRule = self._node # type: ignore[assignment] + return node.expression.serialize() + + @property + def value_node(self) -> "NodeView": + """Return a view over the expression node.""" + node: AttributeRule = self._node # type: ignore[assignment] + return view_for(node.expression) diff --git a/hcl2/query/blocks.py b/hcl2/query/blocks.py new file mode 100644 index 00000000..1c14a0f8 --- /dev/null +++ b/hcl2/query/blocks.py @@ -0,0 +1,74 @@ +"""BlockView facade.""" + +from typing import List, Optional + +from hcl2.query._base import NodeView, register_view +from hcl2.rules.base import BlockRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule + + +def _label_to_str(label) -> str: + """Convert a block label (IdentifierRule or StringRule) to a plain string.""" + if isinstance(label, IdentifierRule): + return label.serialize() + if isinstance(label, StringRule): + raw = label.serialize() + # Strip surrounding quotes + if isinstance(raw, str) and len(raw) >= 2 and raw[0] == '"' and raw[-1] == '"': + return raw[1:-1] + return str(raw) + return str(label.serialize()) + + +@register_view(BlockRule) +class BlockView(NodeView): + """View over an HCL2 block (BlockRule).""" + + @property + def block_type(self) -> str: + """Return the block type (first label) as a plain string.""" + node: BlockRule = self._node # type: ignore[assignment] + return _label_to_str(node.labels[0]) + + @property + def labels(self) -> List[str]: + """Return all labels as plain strings.""" + node: BlockRule = self._node # type: ignore[assignment] + return [_label_to_str(lbl) for lbl in node.labels] + + @property + def name_labels(self) -> List[str]: + """Return labels after the block type (labels[1:]) as plain strings.""" + return self.labels[1:] + + @property + def body(self) -> "NodeView": + """Return the block body as a BodyView.""" + from hcl2.query.body import BodyView + + node: BlockRule = self._node # type: ignore[assignment] + return BodyView(node.body) + + def blocks( + self, block_type: Optional[str] = None, *labels: str + ) -> List["NodeView"]: + """Delegate to body.""" + from hcl2.query.body import BodyView + + node: BlockRule = self._node # type: ignore[assignment] + return BodyView(node.body).blocks(block_type, *labels) + + def attributes(self, name: Optional[str] = None) -> List["NodeView"]: + """Delegate to body.""" + from hcl2.query.body import BodyView + + node: BlockRule = self._node # type: ignore[assignment] + return BodyView(node.body).attributes(name) + + def attribute(self, name: str) -> Optional["NodeView"]: + """Delegate to body.""" + from hcl2.query.body import BodyView + + node: BlockRule = self._node # type: ignore[assignment] + return BodyView(node.body).attribute(name) diff --git a/hcl2/query/body.py b/hcl2/query/body.py new file mode 100644 index 00000000..fdb6aa42 --- /dev/null +++ b/hcl2/query/body.py @@ -0,0 +1,96 @@ +"""DocumentView and BodyView facades.""" + +from typing import List, Optional + +from hcl2.query._base import NodeView, register_view +from hcl2.rules.base import AttributeRule, BlockRule, BodyRule, StartRule + + +@register_view(StartRule) +class DocumentView(NodeView): + """View over the top-level HCL2 document (StartRule).""" + + @staticmethod + def parse(text: str) -> "DocumentView": + """Parse HCL2 text into a DocumentView.""" + from hcl2 import api + + tree = api.parses(text) + return DocumentView(tree) + + @staticmethod + def parse_file(path: str) -> "DocumentView": + """Parse an HCL2 file into a DocumentView.""" + from hcl2 import api + + with open(path, encoding="utf-8") as f: + tree = api.parse(f) + return DocumentView(tree) + + @property + def body(self) -> "BodyView": + """Return the document body as a BodyView.""" + node: StartRule = self._node # type: ignore[assignment] + return BodyView(node.body) + + def blocks( + self, block_type: Optional[str] = None, *labels: str + ) -> List["NodeView"]: + """Return matching blocks, delegating to body.""" + return self.body.blocks(block_type, *labels) + + def attributes(self, name: Optional[str] = None) -> List["NodeView"]: + """Return matching attributes, delegating to body.""" + return self.body.attributes(name) + + def attribute(self, name: str) -> Optional["NodeView"]: + """Return a single attribute by name, or None.""" + return self.body.attribute(name) + + +@register_view(BodyRule) +class BodyView(NodeView): + """View over an HCL2 body (BodyRule).""" + + def blocks( + self, block_type: Optional[str] = None, *labels: str + ) -> List["NodeView"]: + """Return blocks, optionally filtered by type and labels.""" + from hcl2.query.blocks import BlockView + + node: BodyRule = self._node # type: ignore[assignment] + results: List[NodeView] = [] + for child in node.children: + if not isinstance(child, BlockRule): + continue + block_view = BlockView(child) + if block_type is not None and block_view.block_type != block_type: + continue + if labels: + name_lbls = block_view.name_labels + if len(labels) > len(name_lbls): + continue + if any(l != nl for l, nl in zip(labels, name_lbls)): + continue + results.append(block_view) + return results + + def attributes(self, name: Optional[str] = None) -> List["NodeView"]: + """Return attributes, optionally filtered by name.""" + from hcl2.query.attributes import AttributeView + + node: BodyRule = self._node # type: ignore[assignment] + results: List[NodeView] = [] + for child in node.children: + if not isinstance(child, AttributeRule): + continue + attr_view = AttributeView(child) + if name is not None and attr_view.name != name: + continue + results.append(attr_view) + return results + + def attribute(self, name: str) -> Optional["NodeView"]: + """Return a single attribute by name, or None.""" + attrs = self.attributes(name) + return attrs[0] if attrs else None diff --git a/hcl2/query/builtins.py b/hcl2/query/builtins.py new file mode 100644 index 00000000..2861cfab --- /dev/null +++ b/hcl2/query/builtins.py @@ -0,0 +1,114 @@ +"""Built-in terminal transforms for the hq query pipeline.""" + +from typing import Any, List + +from hcl2.query.path import QuerySyntaxError + +BUILTIN_NAMES = frozenset({"keys", "values", "length"}) + + +def apply_builtin(name: str, nodes: List[Any]) -> List[Any]: + """Apply a builtin function to a list of nodes. + + Each builtin produces one result per input node. + """ + nodes = _unwrap_to_values(nodes) + if name == "keys": + return _apply_keys(nodes) + if name == "values": + return _apply_values(nodes) + if name == "length": + return _apply_length(nodes) + raise QuerySyntaxError(f"Unknown builtin: {name!r}") + + +def _unwrap_to_values(nodes: List[Any]) -> List[Any]: + """Unwrap AttributeView and ExprTermRule wrappers for builtins.""" + from hcl2.query._base import NodeView, view_for + from hcl2.query.attributes import AttributeView + from hcl2.rules.expressions import ExprTermRule + + result: List[Any] = [] + for node in nodes: + if isinstance(node, AttributeView): + node = node.value_node + if isinstance(node, NodeView) and isinstance(node._node, ExprTermRule): + inner = node._node.expression + if inner is not None: + node = view_for(inner) + result.append(node) + return result + + +def _apply_keys(nodes: List[Any]) -> List[Any]: + from hcl2.query.blocks import BlockView + from hcl2.query.body import BodyView, DocumentView + from hcl2.query.containers import ObjectView + + results: List[Any] = [] + for node in nodes: + if isinstance(node, ObjectView): + results.append(node.keys) + elif isinstance(node, (DocumentView, BodyView)): + body = node.body if isinstance(node, DocumentView) else node + names: List[str] = [] + for blk in body.blocks(): + names.append(blk.block_type) # type: ignore[attr-defined] + for attr in body.attributes(): + names.append(attr.name) # type: ignore[attr-defined] + results.append(names) + elif isinstance(node, BlockView): + results.append(node.labels) + elif isinstance(node, dict): + results.append(list(node.keys())) + # other types silently produce nothing + return results + + +def _apply_values(nodes: List[Any]) -> List[Any]: + from hcl2.query.body import BodyView, DocumentView + from hcl2.query.containers import ObjectView, TupleView + + results: List[Any] = [] + for node in nodes: + if isinstance(node, ObjectView): + results.append([v for _, v in node.entries]) + elif isinstance(node, TupleView): + results.append(node.elements) + elif isinstance(node, (DocumentView, BodyView)): + body = node.body if isinstance(node, DocumentView) else node + items: list = [] + items.extend(body.blocks()) + items.extend(body.attributes()) + results.append(items) + elif isinstance(node, dict): + results.append(list(node.values())) + elif isinstance(node, list): + results.append(node) + return results + + +def _apply_length(nodes: List[Any]) -> List[Any]: + from hcl2.query._base import NodeView + from hcl2.query.body import BodyView, DocumentView + from hcl2.query.containers import ObjectView, TupleView + from hcl2.query.functions import FunctionCallView + + results: List[Any] = [] + for node in nodes: + if isinstance(node, TupleView): + results.append(len(node)) + elif isinstance(node, ObjectView): + results.append(len(node.entries)) + elif isinstance(node, FunctionCallView): + results.append(len(node.args)) + elif isinstance(node, (DocumentView, BodyView)): + body = node.body if isinstance(node, DocumentView) else node + results.append(len(body.blocks()) + len(body.attributes())) + elif isinstance(node, NodeView): + results.append(1) + elif isinstance(node, (list, dict, str)): + results.append(len(node)) + else: + results.append(1) + return results diff --git a/hcl2/query/containers.py b/hcl2/query/containers.py new file mode 100644 index 00000000..812f3719 --- /dev/null +++ b/hcl2/query/containers.py @@ -0,0 +1,53 @@ +"""TupleView and ObjectView facades.""" + +from typing import List, Optional, Tuple + +from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.containers import ObjectRule, TupleRule + + +@register_view(TupleRule) +class TupleView(NodeView): + """View over an HCL2 tuple (TupleRule).""" + + @property + def elements(self) -> List[NodeView]: + """Return the tuple elements as views.""" + node: TupleRule = self._node # type: ignore[assignment] + return [view_for(elem) for elem in node.elements] + + def __len__(self) -> int: + node: TupleRule = self._node # type: ignore[assignment] + return len(node.elements) + + def __getitem__(self, index: int) -> NodeView: + node: TupleRule = self._node # type: ignore[assignment] + return view_for(node.elements[index]) + + +@register_view(ObjectRule) +class ObjectView(NodeView): + """View over an HCL2 object (ObjectRule).""" + + @property + def entries(self) -> List[Tuple[str, NodeView]]: + """Return (key, value_view) pairs.""" + node: ObjectRule = self._node # type: ignore[assignment] + result = [] + for elem in node.elements: + key = str(elem.key.serialize()) + val = view_for(elem.expression) + result.append((key, val)) + return result + + def get(self, key: str) -> Optional[NodeView]: + """Get a value view by key, or None.""" + for entry_key, entry_val in self.entries: + if entry_key == key: + return entry_val + return None + + @property + def keys(self) -> List[str]: + """Return all keys as strings.""" + return [k for k, _ in self.entries] diff --git a/hcl2/query/diff.py b/hcl2/query/diff.py new file mode 100644 index 00000000..432af783 --- /dev/null +++ b/hcl2/query/diff.py @@ -0,0 +1,88 @@ +"""Structural diff between two HCL documents.""" + +import json +from dataclasses import dataclass +from typing import Any, List + + +@dataclass +class DiffEntry: + """A single difference between two structures.""" + + path: str + kind: str # "added", "removed", "changed" + left: Any = None + right: Any = None + + +def diff_dicts(left: Any, right: Any, path: str = "") -> List[DiffEntry]: + """Recursively compare two Python structures and return differences.""" + entries: List[DiffEntry] = [] + + if isinstance(left, dict) and isinstance(right, dict): + all_keys = sorted(set(list(left.keys()) + list(right.keys()))) + for key in all_keys: + child_path = f"{path}.{key}" if path else key + if key not in left: + entries.append( + DiffEntry(path=child_path, kind="added", right=right[key]) + ) + elif key not in right: + entries.append( + DiffEntry(path=child_path, kind="removed", left=left[key]) + ) + else: + entries.extend(diff_dicts(left[key], right[key], child_path)) + elif isinstance(left, list) and isinstance(right, list): + max_len = max(len(left), len(right)) + for i in range(max_len): + child_path = f"{path}[{i}]" + if i >= len(left): + entries.append(DiffEntry(path=child_path, kind="added", right=right[i])) + elif i >= len(right): + entries.append(DiffEntry(path=child_path, kind="removed", left=left[i])) + else: + entries.extend(diff_dicts(left[i], right[i], child_path)) + elif left != right: + entries.append( + DiffEntry(path=path or "(root)", kind="changed", left=left, right=right) + ) + + return entries + + +def format_diff_text(entries: List[DiffEntry]) -> str: + """Format diff entries as human-readable text.""" + if not entries: + return "" + lines: List[str] = [] + for entry in entries: + if entry.kind == "added": + lines.append(f"+ {entry.path}: {_fmt_val(entry.right)}") + elif entry.kind == "removed": + lines.append(f"- {entry.path}: {_fmt_val(entry.left)}") + elif entry.kind == "changed": + lines.append( + f"~ {entry.path}: {_fmt_val(entry.left)} -> {_fmt_val(entry.right)}" + ) + return "\n".join(lines) + + +def format_diff_json(entries: List[DiffEntry]) -> str: + """Format diff entries as JSON.""" + data = [] + for entry in entries: + item: dict = {"path": entry.path, "kind": entry.kind} + if entry.left is not None: + item["left"] = entry.left + if entry.right is not None: + item["right"] = entry.right + data.append(item) + return json.dumps(data, indent=2, default=str) + + +def _fmt_val(val: Any) -> str: + """Format a value for text diff display.""" + if isinstance(val, str): + return repr(val) + return str(val) diff --git a/hcl2/query/expressions.py b/hcl2/query/expressions.py new file mode 100644 index 00000000..ee57c755 --- /dev/null +++ b/hcl2/query/expressions.py @@ -0,0 +1,27 @@ +"""View facade for HCL2 conditional expressions.""" + +from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.expressions import ConditionalRule + + +@register_view(ConditionalRule) +class ConditionalView(NodeView): + """View over a ternary conditional expression (condition ? true : false).""" + + @property + def condition(self) -> NodeView: + """Return the condition expression.""" + node: ConditionalRule = self._node # type: ignore[assignment] + return view_for(node.condition) + + @property + def true_val(self) -> NodeView: + """Return the true-branch expression.""" + node: ConditionalRule = self._node # type: ignore[assignment] + return view_for(node.if_true) + + @property + def false_val(self) -> NodeView: + """Return the false-branch expression.""" + node: ConditionalRule = self._node # type: ignore[assignment] + return view_for(node.if_false) diff --git a/hcl2/query/for_exprs.py b/hcl2/query/for_exprs.py new file mode 100644 index 00000000..64638f40 --- /dev/null +++ b/hcl2/query/for_exprs.py @@ -0,0 +1,112 @@ +"""ForTupleView and ForObjectView facades.""" + +from typing import Optional + +from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.for_expressions import ForObjectExprRule, ForTupleExprRule + + +@register_view(ForTupleExprRule) +class ForTupleView(NodeView): + """View over a for-tuple expression ([for ...]).""" + + @property + def iterator_name(self) -> str: + """Return the first iterator identifier name.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + return node.for_intro.first_iterator.serialize() + + @property + def second_iterator_name(self) -> Optional[str]: + """Return the second iterator identifier name, or None.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + second = node.for_intro.second_iterator + if second is None: + return None + return second.serialize() + + @property + def iterable(self) -> NodeView: + """Return a view over the iterable expression.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + return view_for(node.for_intro.iterable) + + @property + def value_expr(self) -> NodeView: + """Return a view over the value expression.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + return view_for(node.value_expr) + + @property + def has_condition(self) -> bool: + """Return whether the for expression has an if condition.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + return node.condition is not None + + @property + def condition(self) -> Optional[NodeView]: + """Return a view over the condition, or None.""" + node: ForTupleExprRule = self._node # type: ignore[assignment] + cond = node.condition + if cond is None: + return None + return view_for(cond) + + +@register_view(ForObjectExprRule) +class ForObjectView(NodeView): + """View over a for-object expression ({for ...}).""" + + @property + def iterator_name(self) -> str: + """Return the first iterator identifier name.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return node.for_intro.first_iterator.serialize() + + @property + def second_iterator_name(self) -> Optional[str]: + """Return the second iterator identifier name, or None.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + second = node.for_intro.second_iterator + if second is None: + return None + return second.serialize() + + @property + def iterable(self) -> NodeView: + """Return a view over the iterable expression.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return view_for(node.for_intro.iterable) + + @property + def key_expr(self) -> NodeView: + """Return a view over the key expression.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return view_for(node.key_expr) + + @property + def value_expr(self) -> NodeView: + """Return a view over the value expression.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return view_for(node.value_expr) + + @property + def has_ellipsis(self) -> bool: + """Return whether the for expression has an ellipsis.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return node.ellipsis is not None + + @property + def has_condition(self) -> bool: + """Return whether the for expression has an if condition.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + return node.condition is not None + + @property + def condition(self) -> Optional[NodeView]: + """Return a view over the condition, or None.""" + node: ForObjectExprRule = self._node # type: ignore[assignment] + cond = node.condition + if cond is None: + return None + return view_for(cond) diff --git a/hcl2/query/functions.py b/hcl2/query/functions.py new file mode 100644 index 00000000..58e3c496 --- /dev/null +++ b/hcl2/query/functions.py @@ -0,0 +1,35 @@ +"""FunctionCallView facade.""" + +from typing import List + +from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.functions import FunctionCallRule + + +@register_view(FunctionCallRule) +class FunctionCallView(NodeView): + """View over an HCL2 function call (FunctionCallRule).""" + + @property + def name(self) -> str: + """Return the function name (namespace::name joined).""" + node: FunctionCallRule = self._node # type: ignore[assignment] + return "::".join(ident.serialize() for ident in node.identifiers) + + @property + def args(self) -> List[NodeView]: + """Return the function arguments as views.""" + node: FunctionCallRule = self._node # type: ignore[assignment] + args_rule = node.arguments + if args_rule is None: + return [] + return [view_for(arg) for arg in args_rule.arguments] + + @property + def has_ellipsis(self) -> bool: + """Return whether the argument list ends with ellipsis.""" + node: FunctionCallRule = self._node # type: ignore[assignment] + args_rule = node.arguments + if args_rule is None: + return False + return args_rule.has_ellipsis diff --git a/hcl2/query/introspect.py b/hcl2/query/introspect.py new file mode 100644 index 00000000..d097fad3 --- /dev/null +++ b/hcl2/query/introspect.py @@ -0,0 +1,185 @@ +"""Introspection utilities for --describe and --schema flags.""" + +import inspect +from typing import Any, Dict, List + +from hcl2.query._base import NodeView, _VIEW_REGISTRY +from hcl2.query.safe_eval import _SAFE_CALLABLE_NAMES + + +def describe_results(results: List[Any]) -> Dict[str, Any]: + """Build a description dict for --describe output.""" + described = [] + for result in results: + if isinstance(result, NodeView): + described.append(_describe_view(result)) + else: + described.append( + { + "type": type(result).__name__, + "value": repr(result), + } + ) + return {"results": described} + + +def _describe_view(view: NodeView) -> Dict[str, Any]: + """Describe a single view instance.""" + cls = type(view) + props = [] + methods = [] + + for name, obj in inspect.getmembers(cls): + if name.startswith("_"): + continue + if isinstance(obj, property): + props.append(name) + elif callable(obj) and not isinstance(obj, (staticmethod, classmethod)): + sig = "" + try: + sig = str(inspect.signature(obj)) + except (ValueError, TypeError): + pass + methods.append(f"{name}{sig}") + + summary = _summarize_view(view) + + result: Dict[str, Any] = { + "type": cls.__name__, + "properties": props, + "methods": methods, + } + if summary: + result["summary"] = summary + return result + + +def _summarize_view(view: NodeView) -> str: + """Generate a brief summary string for a view.""" + from hcl2.query.blocks import BlockView + from hcl2.query.attributes import AttributeView + + if isinstance(view, BlockView): + return f"block_type={view.block_type!r}, labels={view.labels!r}" + if isinstance(view, AttributeView): + return f"name={view.name!r}" + return "" + + +def build_schema() -> Dict[str, Any]: + """Build the full view API schema for --schema output.""" + views = {} + for rule_type, view_cls in _VIEW_REGISTRY.items(): + views[view_cls.__name__] = _schema_for_class(view_cls, rule_type) + + # Add base NodeView + views["NodeView"] = _schema_for_class(NodeView, None) + + return { + "docs": "https://github.com/amplify-education/python-hcl2/tree/main/docs", + "query_guide": { + "mode_preference": [ + "1. Structural (default) — preferred for all queries. jq-like syntax.", + "2. Hybrid (::) — only when you need Python on structural results.", + "3. Eval (-e) — last resort. Many expressions are blocked for safety.", + ], + "structural_syntax": { + "navigate": "resource.aws_instance.main.ami", + "wildcard": "variable[*]", + "skip_labels": "resource~[*]", + "pipes": "resource[*] | .tags | keys", + "select": "resource~[select(.ami)]", + "string_functions": ( + 'select(.source | contains("x")), ' + 'select(.ami | test("^ami-")), ' + 'select(.name | startswith("prod-")), ' + 'select(.path | endswith("/api"))' + ), + "has": 'select(has("tags"))', + "postfix_not": "select(.tags | not)", + "any_all": 'any(.elements; .type == "function_call")', + "construct": "{name: .name, type: .block_type, file: .__file__}", + "recursive": "*..function_call:*", + "optional": "nonexistent?", + }, + "output_flags": { + "--json": "JSON output", + "--value": "Raw value (keeps quotes on strings)", + "--raw": "Raw value (strips quotes, ideal for shell piping)", + "--no-filename": "Suppress filename prefix in multi-file mode", + }, + "examples": [ + "hq 'resource.aws_instance~[*] | .ami' dir/ --raw", + "hq 'module~[select(.source | contains(\"docker\"))]' dir/ --json", + "hq 'resource~[select(has(\"tags\"))] | {name: .name_labels, tags}' dir/ --json", + "hq 'variable~[select(.default)] | {name: .name_labels, default}' . --raw", + "hq file1.tf --diff file2.tf --json", + ], + }, + "views": views, + "eval_namespace": { + "note": "Eval mode (-e) is a last resort. Prefer structural queries.", + "builtins": sorted(_SAFE_CALLABLE_NAMES), + "variables": { + "doc": "DocumentView", + "_": "NodeView (per-result in hybrid mode)", + }, + }, + } + + +def _schema_for_class(cls, rule_type) -> Dict[str, Any]: + """Build schema for a single view class.""" + result: Dict[str, Any] = {} + if rule_type is not None: + result["wraps"] = rule_type.__name__ + + props = {} + methods = {} + static_methods = {} + + # Collect staticmethod names from __dict__ of cls and its bases + static_names = set() + for klass in cls.__mro__: + for attr_name, attr_val in klass.__dict__.items(): + if isinstance(attr_val, staticmethod): + static_names.add(attr_name) + + for name in sorted(dir(cls)): + if name.startswith("_"): + continue + obj = getattr(cls, name) + if isinstance(obj, property): + # Get return annotation if available + ann = "" + if obj.fget and hasattr(obj.fget, "__annotations__"): + ret = obj.fget.__annotations__.get("return") + if ret: + ann = str(ret) + prop_info: Dict[str, str] = {"type": ann or "Any"} + # Extract description from property docstring + doc = obj.fget.__doc__ if obj.fget else None + if doc: + prop_info["description"] = doc.strip() + props[name] = prop_info + elif name in static_names: + try: + sig = str(inspect.signature(obj)) + except (ValueError, TypeError): + sig = "(...)" + static_methods[name] = sig + elif callable(obj): + try: + sig = str(inspect.signature(obj)) + except (ValueError, TypeError): + sig = "(...)" + methods[name] = sig + + if props: + result["properties"] = props + if methods: + result["methods"] = methods + if static_methods: + result["static_methods"] = static_methods + + return result diff --git a/hcl2/query/path.py b/hcl2/query/path.py new file mode 100644 index 00000000..0a6f8628 --- /dev/null +++ b/hcl2/query/path.py @@ -0,0 +1,251 @@ +"""Structural path parser for the hq query language.""" + +import re +from dataclasses import dataclass +from typing import List, Optional, Tuple + + +class QuerySyntaxError(Exception): + """Raised when a structural path cannot be parsed.""" + + +@dataclass(frozen=True) +class PathSegment: + """A single segment in a structural path.""" + + name: str # identifier or "*" for wildcard + select_all: bool # True if [*] suffix + index: Optional[int] # integer if [N] suffix, None otherwise + recursive: bool = False # True for ".." recursive descent + predicate: object = None # PredicateNode if [select(...)] suffix + type_filter: Optional[str] = None # e.g. "function_call" in function_call:name + skip_labels: bool = False # True if ~ suffix (skip remaining block labels) + + +# Optional type qualifier prefix: type_filter:name~?[bracket]? +_SEGMENT_RE = re.compile( + r"^(?:([a-z_]+):)?([a-zA-Z_][a-zA-Z0-9_-]*|\*)(~)?(?:\[(\*|[0-9]+)\])?\??$" +) + + +def parse_path(path_str: str) -> List[PathSegment]: + """Parse a structural path string into segments. + + Grammar: + path := segment ("." segment)* + segment := name ("[*]" | "[" INT "]")? + name := "*" | IDENTIFIER + + Raises QuerySyntaxError on invalid input. + """ + if not path_str or not path_str.strip(): + raise QuerySyntaxError("Empty path") + + segments: List[PathSegment] = [] + parts = _split_path(path_str) + + for is_recursive, part in parts: + # Check for [select(...)] syntax + select_match = _extract_select(part) + if select_match is not None: + seg_name, predicate, type_filter, skip, sel_all, sel_idx = select_match + segments.append( + PathSegment( + name=seg_name, + select_all=sel_all, + index=sel_idx, + recursive=is_recursive, + predicate=predicate, + type_filter=type_filter, + skip_labels=skip, + ) + ) + continue + + match = _SEGMENT_RE.match(part) + if not match: + raise QuerySyntaxError(f"Invalid path segment: {part!r} in {path_str!r}") + + type_filter = match.group(1) # optional "type:" prefix + name = match.group(2) + skip_labels = match.group(3) is not None # "~" suffix + bracket = match.group(4) + + if bracket is None: + segments.append( + PathSegment( + name=name, + select_all=False, + index=None, + recursive=is_recursive, + type_filter=type_filter, + skip_labels=skip_labels, + ) + ) + elif bracket == "*": + segments.append( + PathSegment( + name=name, + select_all=True, + index=None, + recursive=is_recursive, + type_filter=type_filter, + skip_labels=skip_labels, + ) + ) + else: + segments.append( + PathSegment( + name=name, + select_all=False, + index=int(bracket), + recursive=is_recursive, + type_filter=type_filter, + skip_labels=skip_labels, + ) + ) + + return segments + + +def _split_path(path_str: str) -> List[Tuple[bool, str]]: + """Split a path string into (is_recursive, segment_text) pairs. + + Handles both single dots (normal) and double dots (recursive descent). + Bracket-aware: dots inside ``[...]`` are not treated as separators. + """ + result: List[Tuple[bool, str]] = [] + i = 0 + current: List[str] = [] + bracket_depth = 0 + paren_depth = 0 + + while i < len(path_str): + char = path_str[i] + + if char == "[": + bracket_depth += 1 + current.append(char) + i += 1 + elif char == "]": + bracket_depth -= 1 + current.append(char) + i += 1 + elif char == "(": + paren_depth += 1 + current.append(char) + i += 1 + elif char == ")": + paren_depth -= 1 + current.append(char) + i += 1 + elif char == '"': + # Consume entire quoted string + current.append(char) + i += 1 + while i < len(path_str) and path_str[i] != '"': + current.append(path_str[i]) + i += 1 + if i < len(path_str): + current.append(path_str[i]) + i += 1 + elif char == "." and bracket_depth == 0 and paren_depth == 0: + # Emit current segment if any + if current: + result.append((False, "".join(current))) + current = [] + elif not result: + raise QuerySyntaxError(f"Path cannot start with '.': {path_str!r}") + + # Check for ".." (recursive descent) + if i + 1 < len(path_str) and path_str[i + 1] == ".": + i += 2 # skip both dots + # Collect the next segment (respecting brackets) + next_seg: List[str] = [] + bd = 0 + while i < len(path_str): + c = path_str[i] + if c == "[": + bd += 1 + elif c == "]": + bd -= 1 + elif c == "." and bd == 0: + break + next_seg.append(c) + i += 1 + if not next_seg: + raise QuerySyntaxError(f"Expected segment after '..': {path_str!r}") + result.append((True, "".join(next_seg))) + else: + i += 1 # skip single dot + else: + current.append(char) + i += 1 + + if current: + result.append((False, "".join(current))) + + if not result: + raise QuerySyntaxError(f"Empty path: {path_str!r}") + + return result + + +def _extract_select(part: str) -> Optional[tuple]: + """Extract ``name[select(...)]`` from a segment string. + + Returns ``(name, predicate_node)`` or ``None`` if not a select bracket. + """ + select_marker = "[select(" + idx = part.find(select_marker) + if idx == -1: + return None + + seg_name = part[:idx] + if not seg_name or not re.match( + r"^(?:[a-z_]+:)?(?:[a-zA-Z_][a-zA-Z0-9_-]*|\*)~?$", seg_name + ): + raise QuerySyntaxError(f"Invalid segment name before [select(): {seg_name!r}") + + # Parse optional type_filter:name prefix + type_filter = None + if ":" in seg_name: + type_filter, seg_name = seg_name.split(":", 1) + + # Parse optional ~ suffix + skip_labels = seg_name.endswith("~") + if skip_labels: + seg_name = seg_name[:-1] + + # Find matching )] for select(...), allowing optional trailing [*] or [N] + inner_start = idx + len(select_marker) + close_idx = part.find(")]", inner_start) + if close_idx == -1: + raise QuerySyntaxError(f"Expected )] at end of select bracket in: {part!r}") + inner = part[inner_start:close_idx] + tail = part[close_idx + 2 :] # text after ")]" + + from hcl2.query.predicate import parse_predicate + + predicate = parse_predicate(inner) + + # Parse optional trailing [*] or [N] after [select(...)], with optional ? + select_all = True # default: select returns all matches + index = None + if tail: + # Strip trailing ? (optional operator is a no-op at segment level) + clean_tail = tail.rstrip("?") + if clean_tail: + tail_match = re.match(r"^\[(\*|[0-9]+)\]$", clean_tail) + if not tail_match: + raise QuerySyntaxError( + f"Unexpected suffix after [select(...)]: {tail!r} in {part!r}" + ) + bracket = tail_match.group(1) + if bracket == "*": + select_all = True + else: + select_all = False + index = int(bracket) + + return (seg_name, predicate, type_filter, skip_labels, select_all, index) diff --git a/hcl2/query/pipeline.py b/hcl2/query/pipeline.py new file mode 100644 index 00000000..266c242b --- /dev/null +++ b/hcl2/query/pipeline.py @@ -0,0 +1,454 @@ +"""Pipeline operator for chaining query stages.""" + +from dataclasses import dataclass +from typing import Any, List, Tuple + +from hcl2.query.path import QuerySyntaxError, PathSegment, parse_path + + +@dataclass(frozen=True) +class PathStage: + """A normal dotted-path stage.""" + + segments: List[PathSegment] + + +@dataclass(frozen=True) +class BuiltinStage: + """A builtin function stage (keys, values, length).""" + + name: str + unpack: bool = False # True when [*] suffix is used + + +@dataclass(frozen=True) +class SelectStage: + """A select() predicate stage.""" + + predicate: Any # PredicateNode from predicate.py + + +@dataclass(frozen=True) +class ConstructStage: + """A ``{field1, field2, key: .path}`` object construction stage.""" + + fields: List[Tuple[str, List[PathSegment]]] # [(output_key, path_segments), ...] + + +def split_pipeline(query_str: str) -> List[str]: + """Split a query string on ``|`` at depth 0. + + Tracks ``[]``, ``()`` depth and ``"..."`` quote state so that + pipes inside brackets, parentheses, or strings are not split. + + Raises :class:`QuerySyntaxError` on empty stages. + """ + stages: List[str] = [] + current: List[str] = [] + bracket_depth = 0 + paren_depth = 0 + brace_depth = 0 + in_string = False + + for char in query_str: + if in_string: + current.append(char) + if char == '"': + in_string = False + continue + + if char == '"': + in_string = True + current.append(char) + elif char == "[": + bracket_depth += 1 + current.append(char) + elif char == "]": + bracket_depth -= 1 + current.append(char) + elif char == "(": + paren_depth += 1 + current.append(char) + elif char == ")": + paren_depth -= 1 + current.append(char) + elif char == "{": + brace_depth += 1 + current.append(char) + elif char == "}": + brace_depth -= 1 + current.append(char) + elif ( + char == "|" and bracket_depth == 0 and paren_depth == 0 and brace_depth == 0 + ): + stage = "".join(current).strip() + if not stage: + raise QuerySyntaxError("Empty stage in pipeline") + stages.append(stage) + current = [] + else: + current.append(char) + + # Final stage + tail = "".join(current).strip() + if not tail and stages: + raise QuerySyntaxError("Empty stage in pipeline") + if tail: + stages.append(tail) + + if not stages: + raise QuerySyntaxError("Empty pipeline") + + return stages + + +def classify_stage(stage_str: str) -> Any: + """Classify a stage string into a PipeStage type. + + - ``select(...)`` → :class:`SelectStage` + - ``keys`` / ``values`` / ``length`` → :class:`BuiltinStage` + - Otherwise → :class:`PathStage` + """ + from hcl2.query.builtins import BUILTIN_NAMES + + stripped = stage_str.strip() + + # Strip trailing ? (optional operator is a no-op at stage level) + if stripped.endswith("?"): + stripped = stripped[:-1].rstrip() + + if stripped.startswith("select(") and stripped.endswith(")"): + from hcl2.query.predicate import parse_predicate + + inner = stripped[len("select(") : -1] + predicate = parse_predicate(inner) + return SelectStage(predicate=predicate) + + if stripped in BUILTIN_NAMES: + return BuiltinStage(name=stripped) + + # Allow builtin[*] to unpack list results into individual items + if stripped.endswith("[*]") and stripped[:-3] in BUILTIN_NAMES: + return BuiltinStage(name=stripped[:-3], unpack=True) + + # Object construction: {field1, field2} or {key: .path, ...} + if stripped.startswith("{") and stripped.endswith("}"): + fields = _parse_construct(stripped[1:-1]) + return ConstructStage(fields=fields) + + # Allow jq-style leading dot (e.g. ".foo" in a pipe stage) + path_str = stripped + if path_str.startswith(".") and len(path_str) > 1 and path_str[1] != ".": + path_str = path_str[1:] + + return PathStage(segments=parse_path(path_str)) + + +def _split_construct_fields(inner: str) -> List[str]: + """Split the inner part of ``{...}`` on commas, respecting brackets and parens.""" + fields: List[str] = [] + current: List[str] = [] + bracket_depth = 0 + paren_depth = 0 + + for char in inner: + if char == "[": + bracket_depth += 1 + current.append(char) + elif char == "]": + bracket_depth -= 1 + current.append(char) + elif char == "(": + paren_depth += 1 + current.append(char) + elif char == ")": + paren_depth -= 1 + current.append(char) + elif char == "," and bracket_depth == 0 and paren_depth == 0: + field = "".join(current).strip() + if field: + fields.append(field) + current = [] + else: + current.append(char) + + tail = "".join(current).strip() + if tail: + fields.append(tail) + + return fields + + +def _parse_construct(inner: str) -> List[Tuple[str, List[PathSegment]]]: + """Parse the fields inside ``{...}`` into (key, path_segments) pairs.""" + raw_fields = _split_construct_fields(inner) + if not raw_fields: + raise QuerySyntaxError("Empty object construction: {}") + + result: List[Tuple[str, List[PathSegment]]] = [] + for field in raw_fields: + if ":" in field: + # Renamed: key: .path + colon_idx = field.index(":") + key = field[:colon_idx].strip() + path_str = field[colon_idx + 1 :].strip() + if path_str.startswith(".") and len(path_str) > 1: + path_str = path_str[1:] + result.append((key, parse_path(path_str))) + elif field.startswith("."): + # Dotted shorthand: .path → key=last segment + path_str = field[1:] + segments = parse_path(path_str) + key = segments[-1].name + result.append((key, segments)) + else: + # Shorthand: field_name → key=field_name, path=field_name + result.append((field, parse_path(field))) + + return result + + +def _unwrap_construct_value(value: Any) -> Any: + """Unwrap an AttributeView to its value for object construction. + + When constructing ``{name, type}``, resolving ``name`` returns an + ``AttributeView`` whose ``to_dict()`` produces ``{"name": "..."}`` + — but we want just the value, not the key-value wrapper. + """ + from hcl2.query.attributes import AttributeView + + if isinstance(value, AttributeView): + return value.value_node + if isinstance(value, list): + return [_unwrap_construct_value(v) for v in value] + return value + + +def _to_json_value(value: Any) -> Any: + """Convert a value to a JSON-serializable Python value.""" + from hcl2.query._base import NodeView + + if isinstance(value, NodeView): + return value.to_dict() + if isinstance(value, list): + return [_to_json_value(v) for v in value] + return value + + +def _resolve_path_item(item: Any, segments: List[PathSegment]) -> List[Any]: + """Resolve a path stage against a single item. + + Tries property access, then structural resolution, then structural + resolution on an unwrapped version of the item. As a last resort, + checks whether the unwrapped item itself satisfies a type-qualifier + filter (so ``object:*`` in a pipe stage acts like ``select(.type == …)``). + """ + from hcl2.query._base import NodeView + from hcl2.query.resolver import resolve_path + + if not isinstance(item, NodeView): + return [] + + # Try property access first (before unwrapping) + prop = _try_property_access(item, segments) + if prop is not None: + return [prop] + + # Structural resolution on the item as-is + resolved = resolve_path(item, segments) + if resolved: + return resolved + + # Try structural resolution on unwrapped item + unwrapped_item = _unwrap_single(item) + if unwrapped_item is not item: + resolved = resolve_path(unwrapped_item, segments) + if resolved: + return resolved + + # Last resort: single type-qualified wildcard in a pipe stage can match + # the unwrapped item itself (e.g. ``| object:*`` keeps only objects). + if unwrapped_item is not item: + matched = _try_type_match(unwrapped_item, segments) + if matched is not None: + return [matched] + + return [] + + +def execute_pipeline(root: Any, stages: List[Any], file_path: str = "") -> List[Any]: + """Execute a list of stages against a root view. + + Starts with ``[root]`` and feeds results through each stage. + """ + from hcl2.query.builtins import apply_builtin + from hcl2.query.predicate import evaluate_predicate + + results: List[Any] = [root] + + for i, stage in enumerate(stages): + next_results: List[Any] = [] + + if isinstance(stage, PathStage): + for item in results: + next_results.extend(_resolve_path_item(item, stage.segments)) + + # When the next stage is a builtin or select, unwrap so they + # see underlying values instead of wrapper views. + # Don't unwrap for ConstructStage — it needs original views + # for property access like .block_type, .name_labels. + if i < len(stages) - 1 and not isinstance( + stages[i + 1], (PathStage, ConstructStage) + ): + next_results = _unwrap_for_next_stage(next_results) + + elif isinstance(stage, BuiltinStage): + next_results = apply_builtin(stage.name, results) + if stage.unpack: + unpacked: List[Any] = [] + for item in next_results: + if isinstance(item, list): + unpacked.extend(item) + else: + unpacked.append(item) + next_results = unpacked + elif isinstance(stage, SelectStage): + for item in results: + if evaluate_predicate(stage.predicate, item): + next_results.append(item) + elif isinstance(stage, ConstructStage): + for item in results: + obj: dict = {} + for key, segments in stage.fields: + # __file__ is a virtual field resolved to the source path + if len(segments) == 1 and segments[0].name == "__file__": + obj[key] = file_path + continue + resolved = _resolve_path_item(item, segments) + if resolved: + val = resolved[0] if len(resolved) == 1 else resolved + obj[key] = _to_json_value(_unwrap_construct_value(val)) + else: + obj[key] = None + next_results.append(obj) + else: + raise QuerySyntaxError(f"Unknown stage type: {stage!r}") + + results = next_results + if not results: + return [] + + return results + + +def _try_type_match(node: Any, segments: List[PathSegment]) -> Any: + """Check if a node matches a single type-qualified wildcard segment. + + Enables ``| object:*`` as a pipe-stage type filter. Returns the node + if it matches, or ``None`` otherwise. + """ + from hcl2.query._base import NodeView, view_type_name + + if len(segments) != 1: + return None + + seg = segments[0] + if seg.type_filter is None or seg.name != "*": + return None + + if not isinstance(node, NodeView): + return None + + if view_type_name(node) == seg.type_filter: + return node + return None + + +def _try_property_access( # pylint: disable=too-many-return-statements + node: Any, segments: List[PathSegment] +) -> Any: + """Try resolving segments as Python property accesses on a view. + + Falls back to this when structural resolution returns nothing. + Only handles single-segment paths (no dots) with no type filter. + Returns the property value, or ``None`` if no matching property exists. + """ + from hcl2.query._base import NodeView + + if len(segments) != 1: + return None + + seg = segments[0] + if seg.type_filter is not None or not isinstance(node, NodeView): + return None + + # Check for a Python property on the view class + # In query context, .value resolves to .value_node so it formats + # consistently across output modes (HCL expression, not ${...} wrapped). + prop_name = seg.name + if prop_name == "value" and hasattr(type(node), "value_node"): + prop_name = "value_node" + + prop_descriptor = getattr(type(node), prop_name, None) + if not isinstance(prop_descriptor, property): + return None + + value = getattr(node, prop_name) + + # Apply index/select_all to list-valued properties + if seg.select_all and isinstance(value, list): + return value + if seg.index is not None and isinstance(value, list): + if 0 <= seg.index < len(value): + return value[seg.index] + return None + + return value + + +def _unwrap_single(item: Any) -> Any: + """Unwrap a single view for structural resolution. + + Returns the unwrapped view, or the original item if no unwrapping applies. + """ + from hcl2.query._base import NodeView, view_for + from hcl2.query.attributes import AttributeView + from hcl2.query.blocks import BlockView + from hcl2.rules.expressions import ExprTermRule + + if isinstance(item, AttributeView): + item = item.value_node + elif isinstance(item, BlockView): + item = item.body + if isinstance(item, NodeView) and isinstance(item._node, ExprTermRule): + inner = item._node.expression + if inner is not None: + item = view_for(inner) + return item + + +def _unwrap_for_next_stage(results: List[Any]) -> List[Any]: + """Unwrap views for pipeline chaining between stages. + + - AttributeView → value node (unwrapped from ExprTermRule) + - BlockView → body (so subsequent stages see attributes/blocks, not labels) + - ExprTermRule wrapper → concrete inner view + """ + from hcl2.query._base import NodeView, view_for + from hcl2.query.attributes import AttributeView + from hcl2.query.blocks import BlockView + from hcl2.rules.expressions import ExprTermRule + + unwrapped: List[Any] = [] + for item in results: + if isinstance(item, AttributeView): + item = item.value_node + elif isinstance(item, BlockView): + item = item.body + # Unwrap ExprTermRule wrappers to concrete view types + if isinstance(item, NodeView) and isinstance(item._node, ExprTermRule): + inner = item._node.expression + if inner is not None: + item = view_for(inner) + unwrapped.append(item) + return unwrapped diff --git a/hcl2/query/predicate.py b/hcl2/query/predicate.py new file mode 100644 index 00000000..39e5a4c1 --- /dev/null +++ b/hcl2/query/predicate.py @@ -0,0 +1,569 @@ +"""Self-contained recursive descent parser and evaluator for select() predicates. + +Predicate grammar:: + + predicate := or_expr + or_expr := and_expr ("or" and_expr)* + and_expr := not_expr ("and" not_expr)* + not_expr := "not" not_expr | comparison + comparison := accessor (comp_op literal)? | any_all | has_expr + any_all := ("any" | "all") "(" accessor ";" predicate ")" + has_expr := "has" "(" STRING ")" + accessor := "." IDENT ("." IDENT)* ("[" INT "]")? ("|" BUILTIN_OR_FUNC)? + BUILTIN := "keys" | "values" | "length" | "not" + FUNC := ("contains" | "test" | "startswith" | "endswith") "(" STRING ")" + literal := STRING | NUMBER | "true" | "false" | "null" + comp_op := "==" | "!=" | "<" | ">" | "<=" | ">=" + +No Python eval() is used. +""" + +import re +from dataclasses import dataclass +from typing import Any, List, Optional, Union + +from hcl2.query.path import QuerySyntaxError + + +# --------------------------------------------------------------------------- +# AST nodes +# --------------------------------------------------------------------------- + + +_STRING_FUNCTIONS = frozenset({"contains", "test", "startswith", "endswith"}) + + +@dataclass(frozen=True) +class Accessor: + """A dotted accessor, e.g. ``.foo.bar[0]`` or ``.foo | length``.""" + + parts: List[str] # ["foo", "bar"] + index: Optional[int] = None # [0] suffix + builtin: Optional[str] = None # "length", "keys", "values", "not" + builtin_arg: Optional[str] = None # argument for string functions + + +@dataclass(frozen=True) +class Comparison: + """``accessor comp_op literal`` or bare ``accessor`` (existence check).""" + + accessor: Accessor + operator: Optional[str] = None # "==", "!=", "<", ">", "<=", ">=" + value: Any = None # Python literal value + + +@dataclass(frozen=True) +class NotExpr: + """``not expr``.""" + + child: Any # PredicateNode + + +@dataclass(frozen=True) +class AndExpr: + """``expr and expr ...``.""" + + children: List[Any] + + +@dataclass(frozen=True) +class OrExpr: + """``expr or expr ...``.""" + + children: List[Any] + + +@dataclass(frozen=True) +class AnyExpr: + """``any(accessor; predicate)`` — true if any element matches.""" + + accessor: "Accessor" + predicate: Any # PredicateNode + + +@dataclass(frozen=True) +class AllExpr: + """``all(accessor; predicate)`` — true if all elements match.""" + + accessor: "Accessor" + predicate: Any # PredicateNode + + +@dataclass(frozen=True) +class HasExpr: + """``has("key")`` — true if the key exists on the target.""" + + key: str + + +PredicateNode = Union[Comparison, NotExpr, AndExpr, OrExpr, AnyExpr, AllExpr, HasExpr] + + +# --------------------------------------------------------------------------- +# Tokeniser +# --------------------------------------------------------------------------- + +_TOKEN_RE = re.compile( + r""" + (?P\.) + | (?P\|) + | (?P;) + | (?P\() + | (?P\)) + | (?P\[) + | (?P\]) + | (?P==|!=|<=|>=|<|>) + | (?P"(?:[^"\\]|\\.)*") + | (?P-?[0-9]+(?:\.[0-9]+)?) + | (?P[a-zA-Z_][a-zA-Z0-9_-]*) + | (?P\s+) + """, + re.VERBOSE, +) + + +@dataclass +class Token: + """A single token from the predicate tokeniser.""" + + kind: str + value: str + + +def tokenize(text: str) -> List[Token]: + """Tokenize a predicate string.""" + tokens: List[Token] = [] + pos = 0 + while pos < len(text): + match = _TOKEN_RE.match(text, pos) + if match is None: + raise QuerySyntaxError( + f"Unexpected character at position {pos} in predicate: {text!r}" + ) + pos = match.end() + kind = match.lastgroup + assert kind is not None + if kind == "WS": + continue + tokens.append(Token(kind=kind, value=match.group())) + return tokens + + +# --------------------------------------------------------------------------- +# Recursive descent parser +# --------------------------------------------------------------------------- + + +class _Parser: # pylint: disable=too-few-public-methods + """Consumes token list and builds a predicate AST.""" + + def __init__(self, tokens: List[Token]): + self.tokens = tokens + self.pos = 0 + + def _peek(self) -> Optional[Token]: + """Return current token without consuming.""" + if self.pos < len(self.tokens): + return self.tokens[self.pos] + return None + + def _advance(self) -> Token: + """Consume and return the current token.""" + tok = self.tokens[self.pos] + self.pos += 1 + return tok + + def _expect(self, kind: str) -> Token: + """Consume token of *kind*, or raise.""" + tok = self._peek() + if tok is None or tok.kind != kind: + found = tok.value if tok else "end-of-input" + raise QuerySyntaxError(f"Expected {kind}, got {found!r}") + return self._advance() + + def parse(self) -> PredicateNode: + """Parse the full token stream into a predicate AST.""" + node = self._or_expr() + if self.pos < len(self.tokens): + raise QuerySyntaxError(f"Unexpected token: {self.tokens[self.pos].value!r}") + return node + + def _or_expr(self) -> PredicateNode: + """Parse ``and_expr ('or' and_expr)*``.""" + children = [self._and_expr()] + tok = self._peek() + while tok and tok.kind == "WORD" and tok.value == "or": + self._advance() + children.append(self._and_expr()) + tok = self._peek() + return children[0] if len(children) == 1 else OrExpr(children=children) + + def _and_expr(self) -> PredicateNode: + """Parse ``not_expr ('and' not_expr)*``.""" + children = [self._not_expr()] + tok = self._peek() + while tok and tok.kind == "WORD" and tok.value == "and": + self._advance() + children.append(self._not_expr()) + tok = self._peek() + return children[0] if len(children) == 1 else AndExpr(children=children) + + def _not_expr(self) -> PredicateNode: + """Parse ``'not' not_expr | comparison``.""" + tok = self._peek() + if tok and tok.kind == "WORD" and tok.value == "not": + self._advance() + return NotExpr(child=self._not_expr()) + return self._comparison() + + def _comparison(self) -> PredicateNode: + """Parse ``accessor (comp_op literal)?``, ``any/all(...)``, or ``has(...)``.""" + tok = self._peek() + if tok and tok.kind == "WORD" and tok.value in ("any", "all"): + return self._any_all() + + if tok and tok.kind == "WORD" and tok.value == "has": + return self._has_expr() + + accessor = self._accessor() + tok = self._peek() + if tok and tok.kind == "OP": + comp_op = self._advance().value + value = self._literal() + return Comparison(accessor=accessor, operator=comp_op, value=value) + return Comparison(accessor=accessor) + + def _has_expr(self) -> PredicateNode: + """Parse ``has("key")``.""" + self._advance() # consume "has" + self._expect("LPAREN") + key_tok = self._expect("STRING") + key = key_tok.value[1:-1].replace('\\"', '"').replace("\\\\", "\\") + self._expect("RPAREN") + return HasExpr(key=key) + + def _any_all(self) -> PredicateNode: + """Parse ``any(accessor; predicate)`` or ``all(accessor; predicate)``.""" + func_name = self._advance().value # "any" or "all" + self._expect("LPAREN") + accessor = self._accessor() + self._expect("SEMI") + predicate = self._or_expr() + self._expect("RPAREN") + if func_name == "any": + return AnyExpr(accessor=accessor, predicate=predicate) + return AllExpr(accessor=accessor, predicate=predicate) + + def _accessor(self) -> Accessor: + """Parse ``'.' IDENT ('.' IDENT)* ('[' INT ']')? ('|' BUILTIN)?``.""" + from hcl2.query.builtins import BUILTIN_NAMES + + parts: List[str] = [] + self._expect("DOT") + parts.append(self._expect("WORD").value) + + tok = self._peek() + while tok and tok.kind == "DOT": + self._advance() + parts.append(self._expect("WORD").value) + tok = self._peek() + + # Optional [N] index + index = None + tok = self._peek() + if tok and tok.kind == "LBRACKET": + self._advance() + num_tok = self._expect("NUMBER") + index = int(num_tok.value) + self._expect("RBRACKET") + + # Optional | builtin/function (e.g. ``| length``, ``| contains("x")``, + # ``| not``) + builtin = None + builtin_arg = None + tok = self._peek() + if tok and tok.kind == "PIPE": + self._advance() + # Allow optional leading dot (jq-style ``| .length``) + dot_tok = self._peek() + if dot_tok and dot_tok.kind == "DOT": + self._advance() + word_tok = self._expect("WORD") + if word_tok.value in _STRING_FUNCTIONS: + builtin = word_tok.value + self._expect("LPAREN") + arg_tok = self._expect("STRING") + builtin_arg = ( + arg_tok.value[1:-1].replace('\\"', '"').replace("\\\\", "\\") + ) + self._expect("RPAREN") + elif word_tok.value == "not": + builtin = "not" + elif word_tok.value in BUILTIN_NAMES: + builtin = word_tok.value + else: + raise QuerySyntaxError( + f"Expected builtin or string function after |, " + f"got {word_tok.value!r}" + ) + + return Accessor( + parts=parts, index=index, builtin=builtin, builtin_arg=builtin_arg + ) + + def _literal(self) -> Any: # pylint: disable=too-many-return-statements + """Parse a literal value (string, number, boolean, or null).""" + tok = self._peek() + if tok is None: + raise QuerySyntaxError("Expected literal, got end-of-input") + + if tok.kind == "STRING": + self._advance() + return tok.value[1:-1].replace('\\"', '"').replace("\\\\", "\\") + + if tok.kind == "NUMBER": + self._advance() + if "." in tok.value: + return float(tok.value) + return int(tok.value) + + if tok.kind == "WORD": + if tok.value == "true": + self._advance() + return True + if tok.value == "false": + self._advance() + return False + if tok.value == "null": + self._advance() + return None + + raise QuerySyntaxError(f"Expected literal, got {tok.value!r}") + + +def parse_predicate(text: str) -> PredicateNode: + """Parse a predicate expression string into an AST.""" + tokens = tokenize(text) + if not tokens: + raise QuerySyntaxError("Empty predicate") + return _Parser(tokens).parse() + + +# --------------------------------------------------------------------------- +# Evaluator +# --------------------------------------------------------------------------- + + +def _resolve_accessor( # pylint: disable=too-many-return-statements + accessor: Accessor, target: Any +) -> Any: + """Resolve an accessor path against a target (typically a NodeView).""" + from hcl2.query._base import NodeView + from hcl2.query.blocks import BlockView + from hcl2.query.path import parse_path + from hcl2.query.resolver import resolve_path + + current = target + + for part in accessor.parts: + if current is None: + return None + + # Virtual ".type" accessor — returns short type name string + # Unwraps ExprTermRule so concrete inner type is reported. + if part == "type" and isinstance(current, NodeView): + from hcl2.query._base import view_for, view_type_name + from hcl2.rules.expressions import ExprTermRule + + unwrapped = current + if ( + type(current).__name__ == "NodeView" + and isinstance(current._node, ExprTermRule) + and current._node.expression is not None + ): + unwrapped = view_for(current._node.expression) + current = view_type_name(unwrapped) + continue + + # Try Python property first + if isinstance(current, NodeView) and hasattr(type(current), part): + prop = getattr(type(current), part, None) + if isinstance(prop, property): + current = getattr(current, part) + continue + + # Try structural resolution + if isinstance(current, NodeView): + segments = parse_path(part) + resolved = resolve_path(current, segments) + # For BlockViews, if label matching fails, try the body directly + if not resolved and isinstance(current, BlockView): + resolved = resolve_path(current.body, segments) + if not resolved: + current = None + break + current = resolved[0] if len(resolved) == 1 else resolved + elif isinstance(current, dict): + current = current.get(part) + else: + current = None + break + + # Apply index + if accessor.index is not None: + if isinstance(current, (list, tuple)): + if 0 <= accessor.index < len(current): + current = current[accessor.index] + else: + return None + elif hasattr(current, "__getitem__"): + try: + current = current[accessor.index] + except (IndexError, KeyError): + return None + else: + return None + + # Apply builtin transform (e.g. ``| length``, ``| contains("x")``, ``| not``) + # Note: postfix not and string functions must run even when current is None + if accessor.builtin is not None: + if accessor.builtin == "not": + return not (current is not None and current is not False and current != 0) + if accessor.builtin_arg is not None: + return _apply_string_function( + accessor.builtin, accessor.builtin_arg, current + ) + if current is not None: + current = _apply_accessor_builtin(accessor.builtin, current) + + return current + + +def _coerce_str(value: Any) -> str: + """Coerce a value to a string for string function matching.""" + from hcl2.query._base import NodeView + + if isinstance(value, NodeView): + d = value.to_dict() + if isinstance(d, str): + return d + return str(d) + if isinstance(value, str): + # Strip surrounding quotes from serialized HCL strings + if len(value) >= 2 and value[0] == '"' and value[-1] == '"': + return value[1:-1] + return value + if value is None: + return "" + return str(value) + + +def _apply_string_function(name: str, arg: str, current: Any) -> bool: + """Apply a string function (contains, test, startswith, endswith).""" + if current is None: + return False + s = _coerce_str(current) + if name == "contains": + return arg in s + if name == "startswith": + return s.startswith(arg) + if name == "endswith": + return s.endswith(arg) + if name == "test": + try: + return bool(re.search(arg, s)) + except re.error as exc: + raise QuerySyntaxError(f"Invalid regex in test(): {exc}") from exc + raise QuerySyntaxError(f"Unknown string function: {name!r}") + + +def _apply_accessor_builtin(name: str, value: Any) -> Any: + """Apply a builtin transform inside a predicate accessor.""" + from hcl2.query.builtins import apply_builtin + + results = apply_builtin(name, [value]) + if results: + return results[0] + return None + + +_KEYWORD_MAP = {"true": True, "false": False, "null": None} + + +def _to_comparable(value: Any) -> Any: + """Convert a NodeView to a comparable Python value.""" + from hcl2.query._base import NodeView + + if isinstance(value, NodeView): + value = value.to_dict() + # Coerce HCL keyword strings to Python types so that + # ``select(.x == true)`` matches the HCL keyword ``true``. + if isinstance(value, str) and value in _KEYWORD_MAP: + return _KEYWORD_MAP[value] + return value + + +_COMPARISON_OPS = { + "==": lambda a, b: a == b, + "!=": lambda a, b: a != b, + "<": lambda a, b: a < b, + ">": lambda a, b: a > b, + "<=": lambda a, b: a <= b, + ">=": lambda a, b: a >= b, +} + + +def evaluate_predicate(pred: PredicateNode, target: Any) -> bool: + """Evaluate a predicate against a target (typically a NodeView).""" + if isinstance(pred, HasExpr): + return _evaluate_has(pred.key, target) + + if isinstance(pred, Comparison): + resolved = _resolve_accessor(pred.accessor, target) + if pred.operator is None: + # String functions and postfix not return bool directly + if isinstance(resolved, bool): + return resolved + # Existence / truthy check + return resolved is not None and resolved is not False and resolved != 0 + left = _to_comparable(resolved) + comp_fn = _COMPARISON_OPS.get(pred.operator) + if comp_fn is None: + raise QuerySyntaxError(f"Unknown operator: {pred.operator!r}") + return comp_fn(left, pred.value) + + if isinstance(pred, NotExpr): + return not evaluate_predicate(pred.child, target) + + if isinstance(pred, AndExpr): + return all(evaluate_predicate(c, target) for c in pred.children) + + if isinstance(pred, OrExpr): + return any(evaluate_predicate(c, target) for c in pred.children) + + if isinstance(pred, (AnyExpr, AllExpr)): + return _evaluate_any_all(pred, target) + + raise QuerySyntaxError(f"Unknown predicate node type: {type(pred).__name__}") + + +def _evaluate_has(key: str, target: Any) -> bool: + """Evaluate ``has("key")`` — check if a key exists on the target.""" + # Same as existence check for the given key + accessor = Accessor(parts=[key]) + resolved = _resolve_accessor(accessor, target) + return resolved is not None and resolved is not False and resolved != 0 + + +def _evaluate_any_all(pred: Union[AnyExpr, AllExpr], target: Any) -> bool: + """Evaluate ``any(accessor; predicate)`` or ``all(accessor; predicate)``.""" + resolved = _resolve_accessor(pred.accessor, target) + if resolved is None: + return isinstance(pred, AllExpr) # all() on empty is True, any() is False + + # Ensure we iterate over a list + if not isinstance(resolved, list): + resolved = [resolved] + + check = all if isinstance(pred, AllExpr) else any + return check(evaluate_predicate(pred.predicate, item) for item in resolved) diff --git a/hcl2/query/resolver.py b/hcl2/query/resolver.py new file mode 100644 index 00000000..dbfe62ec --- /dev/null +++ b/hcl2/query/resolver.py @@ -0,0 +1,345 @@ +"""Structural path resolver for the hq query language.""" + +from dataclasses import dataclass +from typing import List + +from hcl2 import walk as _walk_mod +from hcl2.query._base import NodeView +from hcl2.query.path import PathSegment + + +@dataclass +class _ResolverState: + """Tracks position within multi-label blocks during resolution.""" + + node: NodeView + label_depth: int = 0 # how many block labels consumed so far + + +def resolve_path(root: NodeView, segments: List[PathSegment]) -> List[NodeView]: + """Resolve a structural path against a document view.""" + if not segments: + return [root] + + states = [_ResolverState(node=root)] + + for segment in segments: + next_states: List[_ResolverState] = [] + + if segment.recursive: + # Recursive descent: collect all descendants, then match + for state in states: + next_states.extend(_resolve_recursive(state, segment)) + else: + for state in states: + next_states.extend(_resolve_segment(state, segment)) + + states = next_states + if not states: + return [] + + return [s.node for s in states] + + +def _resolve_segment( # pylint: disable=too-many-return-statements + state: _ResolverState, segment: PathSegment +) -> List[_ResolverState]: + """Resolve a single segment against a state.""" + from hcl2.query.attributes import AttributeView + from hcl2.query.blocks import BlockView + from hcl2.query.body import BodyView, DocumentView + from hcl2.query.containers import ObjectView, TupleView + from hcl2.query.expressions import ConditionalView + from hcl2.query.functions import FunctionCallView + + node = state.node + + # DocumentView/BodyView: look up blocks and attributes by name + if isinstance(node, (DocumentView, BodyView)): + return _resolve_on_body(node, segment) + + # BlockView with unconsumed labels + if isinstance(node, BlockView) and state.label_depth < len(node.name_labels): + return _resolve_on_block_labels(node, segment, state.label_depth) + + # BlockView with labels consumed: delegate to body + if isinstance(node, BlockView): + return _resolve_on_body(node.body, segment) + + # AttributeView: unwrap to value_node + if isinstance(node, AttributeView): + value_view = node.value_node + return _resolve_segment(_ResolverState(node=value_view), segment) + + # ExprTermRule wrapper: unwrap to inner rule + if _is_expr_term(node): + inner = _unwrap_expr_term(node) + if inner is not None: + return _resolve_segment(_ResolverState(node=inner), segment) + return [] + + # ObjectView + if isinstance(node, ObjectView): + return _resolve_on_object(node, segment) + + # TupleView + if isinstance(node, TupleView): + return _resolve_on_tuple(node, segment) + + # FunctionCallView: resolve .args and .name + if isinstance(node, FunctionCallView): + return _resolve_on_function_call(node, segment) + + # ConditionalView: resolve .condition, .true_val, .false_val + if isinstance(node, ConditionalView): + return _resolve_on_conditional(node, segment) + + return [] + + +def _resolve_recursive( + state: _ResolverState, segment: PathSegment +) -> List[_ResolverState]: + """Recursive descent: try matching segment on the node and all descendants.""" + from hcl2.query._base import view_for + + results: List[_ResolverState] = [] + seen_ids: set = set() + + # Collect all descendant views to try matching against + candidates = [state] + for element in _walk_mod.walk_semantic(state.node._node): + wrapped = view_for(element) + candidates.append(_ResolverState(node=wrapped)) + + if segment.type_filter is not None: + # Type-qualified matching: match by type and name directly + results = _match_by_type_and_name(candidates, segment, seen_ids) + else: + non_recursive = PathSegment( + name=segment.name, + select_all=segment.select_all, + index=segment.index, + recursive=False, + predicate=segment.predicate, + type_filter=None, + ) + for candidate in candidates: + for match in _resolve_segment(candidate, non_recursive): + node_id = id(match.node._node) + if node_id not in seen_ids: + seen_ids.add(node_id) + results.append(match) + + return _apply_index_filter(results, segment) + + +def _match_by_type_and_name( + candidates: List[_ResolverState], segment: PathSegment, seen_ids: set +) -> List[_ResolverState]: + """Match candidates by type filter and name property.""" + from hcl2.query._base import view_type_name + + results: List[_ResolverState] = [] + for candidate in candidates: + node = candidate.node + type_name = view_type_name(node) + if type_name != segment.type_filter: + continue + + # Check name match + if segment.name == "*" or _node_matches_name(node, segment.name): + node_id = id(node._node) + if node_id not in seen_ids: + seen_ids.add(node_id) + results.append(candidate) + + return results + + +def _node_matches_name(node: NodeView, name: str) -> bool: + """Check if a node's name property matches the given name.""" + node_name = getattr(node, "name", None) + if node_name is not None: + return node_name == name + # BlockView: check block_type + block_type = getattr(node, "block_type", None) + if block_type is not None: + return block_type == name + return False + + +def _resolve_on_body(node: NodeView, segment: PathSegment) -> List[_ResolverState]: + """Resolve a segment on a DocumentView or BodyView.""" + from hcl2.query.body import BodyView, DocumentView + + # Get the actual body view for delegation + if isinstance(node, DocumentView): + body = node.body + elif isinstance(node, BodyView): + body = node + else: + return [] + + candidates: List[_ResolverState] = [] + + if segment.name == "*": + # Wildcard: all blocks and attributes + for blk in body.blocks(): + depth = len(blk.name_labels) if segment.skip_labels else 0 + candidates.append(_ResolverState(node=blk, label_depth=depth)) + for attr in body.attributes(): + candidates.append(_ResolverState(node=attr)) + else: + # Match block types + for blk in body.blocks(segment.name): + depth = len(blk.name_labels) if segment.skip_labels else 0 + candidates.append(_ResolverState(node=blk, label_depth=depth)) + # Match attribute names + for attr in body.attributes(segment.name): + candidates.append(_ResolverState(node=attr)) + + return _apply_index_filter(candidates, segment) + + +def _resolve_on_block_labels( + node: "NodeView", segment: PathSegment, label_depth: int +) -> List[_ResolverState]: + """Resolve a segment against unconsumed block labels.""" + from hcl2.query.blocks import BlockView + + # Type-qualified segments (e.g. tuple:*) never match labels + if segment.type_filter is not None: + return [] + + block: BlockView = node # type: ignore[assignment] + name_labels = block.name_labels + + if segment.name == "*": + # Wildcard matches any label + return [_ResolverState(node=block, label_depth=label_depth + 1)] + + if label_depth < len(name_labels) and name_labels[label_depth] == segment.name: + return [_ResolverState(node=block, label_depth=label_depth + 1)] + + return [] + + +def _resolve_on_object(node: "NodeView", segment: PathSegment) -> List[_ResolverState]: + """Resolve a segment on an ObjectView.""" + from hcl2.query.containers import ObjectView + + obj: ObjectView = node # type: ignore[assignment] + + if segment.name == "*": + candidates = [_ResolverState(node=v) for _, v in obj.entries] + return _apply_index_filter(candidates, segment) + + val = obj.get(segment.name) + if val is not None: + return [_ResolverState(node=val)] + return [] + + +def _resolve_on_tuple(node: "NodeView", segment: PathSegment) -> List[_ResolverState]: + """Resolve a segment on a TupleView.""" + from hcl2.query.containers import TupleView + + tup: TupleView = node # type: ignore[assignment] + + if segment.select_all: + return [_ResolverState(node=elem) for elem in tup.elements] + + if segment.index is not None: + try: + elem = tup[segment.index] + return [_ResolverState(node=elem)] + except IndexError: + return [] + + return [] + + +def _resolve_on_function_call( + node: "NodeView", segment: PathSegment +) -> List[_ResolverState]: + """Resolve a segment on a FunctionCallView.""" + from hcl2.query.functions import FunctionCallView + + func: FunctionCallView = node # type: ignore[assignment] + + if segment.name == "args": + args = func.args + candidates = [_ResolverState(node=arg) for arg in args] + return _apply_index_filter(candidates, segment) + + return [] + + +def _resolve_on_conditional( + node: "NodeView", segment: PathSegment +) -> List[_ResolverState]: + """Resolve a segment on a ConditionalView.""" + from hcl2.query.expressions import ConditionalView + + cond: ConditionalView = node # type: ignore[assignment] + + if segment.name == "condition": + return [_ResolverState(node=cond.condition)] + if segment.name == "true_val": + return [_ResolverState(node=cond.true_val)] + if segment.name == "false_val": + return [_ResolverState(node=cond.false_val)] + + return [] + + +def _apply_index_filter( + candidates: List[_ResolverState], segment: PathSegment +) -> List[_ResolverState]: + """Apply type filter, predicate filter, and [*]/[N] index to candidates.""" + # Apply type filter if present + if segment.type_filter is not None: + from hcl2.query._base import view_type_name + + candidates = [ + c for c in candidates if view_type_name(c.node) == segment.type_filter + ] + + # Apply predicate filter if present + if segment.predicate is not None: + from hcl2.query.predicate import evaluate_predicate + + pred = segment.predicate + candidates = [ + c + for c in candidates + if evaluate_predicate(pred, c.node) # type: ignore[arg-type] + ] + + if segment.select_all: + return candidates + if segment.index is not None: + if 0 <= segment.index < len(candidates): + return [candidates[segment.index]] + return [] + return candidates + + +def _is_expr_term(node: NodeView) -> bool: + """Check if a node wraps an ExprTermRule.""" + from hcl2.rules.expressions import ExprTermRule + + return isinstance(node._node, ExprTermRule) + + +def _unwrap_expr_term(node: NodeView): + """Unwrap ExprTermRule to a view over its inner rule.""" + from hcl2.query._base import view_for + from hcl2.rules.expressions import ExprTermRule + + expr_term: ExprTermRule = node._node # type: ignore[assignment] + inner = expr_term.expression + if inner is not None: + return view_for(inner) + return None diff --git a/hcl2/query/safe_eval.py b/hcl2/query/safe_eval.py new file mode 100644 index 00000000..0ea1dd39 --- /dev/null +++ b/hcl2/query/safe_eval.py @@ -0,0 +1,153 @@ +"""AST-validated restricted eval for the hq query language.""" + +import ast +from typing import Any, Dict + + +class UnsafeExpressionError(Exception): + """Raised when an expression contains disallowed constructs.""" + + +_ALLOWED_NODES = { + # Expression wrapper + ast.Expression, + # Core access patterns + ast.Attribute, + ast.Subscript, + ast.Call, + ast.Name, + ast.Constant, + ast.Starred, + # Index/slice + ast.Slice, + # Literal collections (as arguments) + ast.List, + ast.Tuple, + # Lambdas (for find_by_predicate, sorted key=, etc.) + ast.Lambda, + ast.arguments, + ast.arg, + # Keyword args + ast.keyword, + # Comparisons and boolean ops + ast.Compare, + ast.BoolOp, + ast.UnaryOp, + ast.BinOp, + ast.Eq, + ast.NotEq, + ast.Lt, + ast.Gt, + ast.LtE, + ast.GtE, + ast.Is, + ast.IsNot, + ast.In, + ast.NotIn, + ast.And, + ast.Or, + ast.Not, + ast.Add, + ast.Sub, + ast.Mult, + ast.Div, + ast.Mod, + ast.FloorDiv, + ast.USub, + ast.UAdd, + # Context + ast.Load, +} + +_SAFE_CALLABLE_NAMES = frozenset( + { + "len", + "str", + "int", + "float", + "bool", + "list", + "tuple", + "type", + "isinstance", + "sorted", + "reversed", + "enumerate", + "zip", + "range", + "min", + "max", + "print", + "any", + "all", + "filter", + "map", + "hasattr", + "getattr", + } +) + +_SAFE_BUILTINS = { + name: ( + __builtins__[name] # type: ignore[index] + if isinstance(__builtins__, dict) + else getattr(__builtins__, name) + ) + for name in _SAFE_CALLABLE_NAMES +} +_SAFE_BUILTINS.update({"True": True, "False": False, "None": None}) + +_MAX_AST_DEPTH = 20 +_MAX_NODE_COUNT = 200 + + +def validate_expression(expr_str: str) -> ast.Expression: + """Parse and validate a Python expression. Raises UnsafeExpressionError on violations.""" + try: + tree = ast.parse(expr_str, mode="eval") + except SyntaxError as exc: + raise UnsafeExpressionError(f"Syntax error: {exc}") from exc + + node_count = 0 + + def _validate(node, depth=0): + nonlocal node_count + node_count += 1 + + if depth > _MAX_AST_DEPTH: + raise UnsafeExpressionError("Expression exceeds maximum depth") + if node_count > _MAX_NODE_COUNT: + raise UnsafeExpressionError("Expression exceeds maximum node count") + + if type(node) not in _ALLOWED_NODES: + raise UnsafeExpressionError(f"{type(node).__name__} is not allowed") + + # Validate Call nodes + if isinstance(node, ast.Call): + func = node.func + # Allow method calls (attr access) + if isinstance(func, ast.Attribute): + pass + # Allow safe built-in names + elif isinstance(func, ast.Name): + if func.id not in _SAFE_CALLABLE_NAMES: + raise UnsafeExpressionError(f"Calling {func.id!r} is not allowed") + else: + raise UnsafeExpressionError( + "Only method calls and safe built-in calls are allowed" + ) + + for child in ast.iter_child_nodes(node): + _validate(child, depth + 1) + + _validate(tree) + return tree + + +def safe_eval(expr_str: str, variables: Dict[str, Any]) -> Any: + """Validate, compile, and eval with restricted namespace.""" + tree = validate_expression(expr_str) + code = compile(tree, "", "eval") + namespace = dict(_SAFE_BUILTINS) + namespace.update(variables) + return eval(code, {"__builtins__": {}}, namespace) # pylint: disable=eval-used diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 2a9ca708..df7df461 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -266,6 +266,20 @@ def _reconstruct_node( # Fallback: convert to string return [str(node)] + def reconstruct_fragment(self, tree) -> str: + """Reconstruct a subtree without trailing-newline normalization. + + Suitable for rendering individual nodes (blocks, attributes, etc.) + rather than full documents. + """ + from hcl2.rules.abstract import LarkRule + + self._reset_state() + if isinstance(tree, LarkRule): + tree = tree.to_lark() + fragments = self._reconstruct_node(tree) + return "".join(fragments) + def reconstruct(self, tree: Tree, postproc=None) -> str: """Convert a Lark.Tree AST back into a string representation of HCL.""" # Reset state diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index 540d6284..96f5952e 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -99,7 +99,7 @@ def serialize( if inline_comments: result[INLINE_COMMENTS_KEY] = inline_comments - return result + return {key: value for key, value in result.items()} class StartRule(LarkRule): diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 6b28837e..9488a77b 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -30,27 +30,33 @@ def serialize( def to_list( self, options: SerializationOptions = SerializationOptions() - ) -> Optional[List[str]]: - """Extract comment text strings, or None if only a newline.""" - comment = self.serialize(options) - if comment == "\n": + ) -> Optional[List[dict]]: + """Extract comment objects, or None if only a newline.""" + raw = self.serialize(options) + if raw == "\n": return None - comments = comment.split("\n") + stripped = raw.strip() + # Block comments: keep as a single value + if stripped.startswith("/*") and stripped.endswith("*/"): + text = stripped[2:-2].strip() + if text: + return [{"value": text}] + return None + + # Line comments: one value per line result = [] - for comment in comments: - comment = comment.strip() - - for delimiter in ("//", "/*", "#"): - if comment.startswith(delimiter): - comment = comment[len(delimiter) :] - if delimiter == "/*" and comment.endswith("*/"): - comment = comment[:-2] + for line in raw.split("\n"): + line = line.strip() + + for delimiter in ("//", "#"): + if line.startswith(delimiter): + line = line[len(delimiter) :] break - if comment != "": - result.append(comment.strip()) + if line != "": + result.append({"value": line.strip()}) return result diff --git a/hcl2/walk.py b/hcl2/walk.py new file mode 100644 index 00000000..be2e4bc0 --- /dev/null +++ b/hcl2/walk.py @@ -0,0 +1,62 @@ +"""Generic tree-walking primitives for the LarkElement IR tree.""" + +from typing import Callable, Iterator, Optional, Type, TypeVar + +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.rules.whitespace import NewLineOrCommentRule + +T = TypeVar("T", bound=LarkElement) + + +def walk(node: LarkElement) -> Iterator[LarkElement]: + """Depth-first pre-order traversal yielding all nodes including tokens.""" + yield node + if isinstance(node, LarkRule): + for child in node.children: + if child is not None: + yield from walk(child) + + +def walk_rules(node: LarkElement) -> Iterator[LarkRule]: + """Walk yielding only LarkRule nodes (skip LarkTokens).""" + for element in walk(node): + if isinstance(element, LarkRule): + yield element + + +def walk_semantic(node: LarkElement) -> Iterator[LarkRule]: + """Walk yielding only semantic LarkRule nodes (skip tokens and whitespace/comments).""" + for element in walk_rules(node): + if not isinstance(element, NewLineOrCommentRule): + yield element + + +def find_all(node: LarkElement, rule_type: Type[T]) -> Iterator[T]: + """Find all descendants matching a rule class (semantic walk).""" + for element in walk_semantic(node): + if isinstance(element, rule_type): + yield element + + +def find_first(node: LarkElement, rule_type: Type[T]) -> Optional[T]: + """Find first descendant matching a rule class, or None.""" + for element in find_all(node, rule_type): + return element + return None + + +def find_by_predicate( + node: LarkElement, predicate: Callable[[LarkElement], bool] +) -> Iterator[LarkElement]: + """Find all descendants matching an arbitrary predicate.""" + for element in walk(node): + if predicate(element): + yield element + + +def ancestors(node: LarkElement) -> Iterator[LarkElement]: + """Walk up the parent chain (excludes node itself).""" + current = getattr(node, "_parent", None) + while current is not None: + yield current + current = getattr(current, "_parent", None) diff --git a/pylintrc b/pylintrc index 34599008..047fbdcd 100644 --- a/pylintrc +++ b/pylintrc @@ -46,7 +46,10 @@ load-plugins= # E1103: %s %r has no %r member (but some types could not be inferred) - fails to infer real members of types, e.g. in Celery # W0231: method from base class is not called - complains about not invoking empty __init__s in parents, which is annoying # R0921: abstract class not referenced, when in fact referenced from another egg -disable=F0401,E0611,E1101,W0212,W0703,R0801,R0901,W0511,E1103,W0231 +# C0415: import-outside-toplevel - needed for circular dep avoidance in query package +# W1113: keyword-arg-before-vararg - intentional API design (blocks(block_type=None, *labels)) +# R0912: too-many-branches - introspect schema builder needs the branches +disable=F0401,E0611,E1101,W0212,W0703,R0801,R0901,W0511,E1103,W0231,C0415,W1113,R0912 [REPORTS] diff --git a/pyproject.toml b/pyproject.toml index 248b6811..e5591815 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,9 +41,10 @@ Homepage = "https://github.com/amplify-education/python-hcl2" [project.scripts] hcl2tojson = "cli.hcl_to_json:main" jsontohcl2 = "cli.json_to_hcl:main" +hq = "cli.hq:main" [tool.setuptools] -packages = ["hcl2", "hcl2.rules", "cli"] +packages = ["hcl2", "hcl2.rules", "hcl2.query", "cli"] zip-safe = false include-package-data = true diff --git a/test/unit/cli/test_hq.py b/test/unit/cli/test_hq.py new file mode 100644 index 00000000..56cf7144 --- /dev/null +++ b/test/unit/cli/test_hq.py @@ -0,0 +1,294 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +import json +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.hq import _dispatch_query, _normalize_eval_expr, main +from hcl2.query.body import DocumentView + + +class TestNormalizeEvalExpr(TestCase): + def test_explicit_underscore(self): + self.assertEqual(_normalize_eval_expr("_.foo"), "_.foo") + + def test_dot_prefix(self): + self.assertEqual(_normalize_eval_expr(".foo"), "_.foo") + + def test_bare_property(self): + self.assertEqual(_normalize_eval_expr("name_labels"), "_.name_labels") + + def test_builtin_call(self): + self.assertEqual(_normalize_eval_expr("len(_.x)"), "len(_.x)") + + def test_doc_ref(self): + self.assertEqual(_normalize_eval_expr("doc.blocks()"), "doc.blocks()") + + def test_empty(self): + self.assertEqual(_normalize_eval_expr(""), "_") + + +class TestDispatchQuery(TestCase): + def _make_doc(self, hcl): + return DocumentView.parse(hcl) + + def test_structural(self): + doc = self._make_doc("x = 1\n") + results = _dispatch_query("x", False, doc) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "x") + + def test_eval(self): + doc = self._make_doc("x = 1\n") + results = _dispatch_query('doc.attribute("x").value', True, doc) + self.assertEqual(results, [1]) + + def test_hybrid(self): + doc = self._make_doc('variable "a" {}\nvariable "b" {}\n') + results = _dispatch_query("variable[*]::block_type", False, doc) + self.assertEqual(results, ["variable", "variable"]) + + def test_hybrid_name_labels(self): + doc = self._make_doc('variable "a" {}\nvariable "b" {}\n') + results = _dispatch_query("variable[*]::name_labels", False, doc) + self.assertEqual(results, [["a"], ["b"]]) + + def test_pipe(self): + doc = self._make_doc('resource "aws_instance" "main" {\n ami = "test"\n}\n') + # Pipe splits at body boundaries; label traversal stays in one stage + results = _dispatch_query("resource.aws_instance.main | .ami", False, doc) + self.assertEqual(len(results), 1) + + def test_pipe_with_builtin(self): + doc = self._make_doc("x = {\n a = 1\n b = 2\n}\n") + results = _dispatch_query("x | keys", False, doc) + self.assertEqual(len(results), 1) + self.assertEqual(sorted(results[0]), ["a", "b"]) + + def test_pipe_with_select(self): + doc = self._make_doc('variable "a" {\n default = 1\n}\nvariable "b" {}\n') + results = _dispatch_query("variable[*] | select(.default)", False, doc) + self.assertEqual(len(results), 1) + + def test_pipe_with_length(self): + doc = self._make_doc("x = [1, 2, 3]\n") + results = _dispatch_query("x | length", False, doc) + self.assertEqual(results, [3]) + + +class TestHqMainCli(TestCase): + def test_main_structural(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + with patch("sys.argv", ["hq", "x", f.name, "--value"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + from cli.hq import main + + main() + self.assertEqual(cm.exception.code, 0) + self.assertIn("1", mock_out.getvalue()) + finally: + os.unlink(f.name) + + def test_main_schema(self): + with patch("sys.argv", ["hq", "--schema"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + from cli.hq import main + + main() + self.assertEqual(cm.exception.code, 0) + data = json.loads(mock_out.getvalue()) + self.assertIn("views", data) + + def test_main_no_results_exits_1(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + with patch("sys.argv", ["hq", "nonexistent", f.name]): + with patch("sys.stdout", new_callable=StringIO): + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 1) + finally: + os.unlink(f.name) + + def test_optional_no_results_exits_0(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + with patch("sys.argv", ["hq", "nonexistent?", f.name, "--value"]): + with patch("sys.stdout", new_callable=StringIO): + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + finally: + os.unlink(f.name) + + def test_optional_with_results(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + with patch("sys.argv", ["hq", "x?", f.name, "--value"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + self.assertIn("1", mock_out.getvalue()) + finally: + os.unlink(f.name) + + def test_optional_not_stripped_in_eval(self): + """? is valid Python syntax, should not be stripped in eval mode.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + # This should fail as "x?" is not valid Python, but the ? + # should NOT be stripped since we're in eval mode + with patch("sys.argv", ["hq", "-e", "x?", f.name]): + with patch("sys.stdout", new_callable=StringIO): + with patch("sys.stderr", new_callable=StringIO): + with self.assertRaises(SystemExit) as cm: + main() + # Should fail (bad Python), not silently succeed + self.assertNotEqual(cm.exception.code, 0) + finally: + os.unlink(f.name) + + def test_raw_strips_string_quotes(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write('ami = "test-123"\n') + f.flush() + try: + with patch("sys.argv", ["hq", "ami", f.name, "--raw"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + output = mock_out.getvalue().strip() + self.assertEqual(output, "test-123") + finally: + os.unlink(f.name) + + def test_raw_integer_unchanged(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 42\n") + f.flush() + try: + with patch("sys.argv", ["hq", "x", f.name, "--raw"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + output = mock_out.getvalue().strip() + self.assertIn("42", output) + finally: + os.unlink(f.name) + + def test_diff_identical_files(self): + with tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f1, tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f2: + f1.write("x = 1\n") + f2.write("x = 1\n") + f1.flush() + f2.flush() + try: + # hq --diff FILE2 FILE1 (FILE1 is first positional) + with patch("sys.argv", ["hq", f1.name, "--diff", f2.name]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + # No output for identical files + self.assertEqual(mock_out.getvalue().strip(), "") + finally: + os.unlink(f1.name) + os.unlink(f2.name) + + def test_diff_changed_files(self): + with tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f1, tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f2: + f1.write("x = 1\n") + f2.write("x = 2\n") + f1.flush() + f2.flush() + try: + with patch("sys.argv", ["hq", f1.name, "--diff", f2.name]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + output = mock_out.getvalue().strip() + self.assertIn("~", output) + finally: + os.unlink(f1.name) + os.unlink(f2.name) + + def test_diff_json_output(self): + with tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f1, tempfile.NamedTemporaryFile( + mode="w", suffix=".tf", delete=False + ) as f2: + f1.write("x = 1\ny = 2\n") + f2.write("x = 1\nz = 3\n") + f1.flush() + f2.flush() + try: + with patch("sys.argv", ["hq", f1.name, "--diff", f2.name, "--json"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + data = json.loads(mock_out.getvalue()) + self.assertIsInstance(data, list) + self.assertTrue(len(data) > 0) + finally: + os.unlink(f1.name) + os.unlink(f2.name) + + def test_missing_query_with_file_arg_errors(self): + """When user passes only a file path (no query), error instead of hanging on stdin.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = 1\n") + f.flush() + try: + with patch("sys.argv", ["hq", f.name, "--json"]): + with patch("sys.stdin") as mock_stdin: + mock_stdin.isatty.return_value = True + with patch("sys.stderr", new_callable=StringIO): + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 2) + finally: + os.unlink(f.name) + + def test_main_pipe_query(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as f: + f.write("x = {\n a = 1\n b = 2\n}\n") + f.flush() + try: + with patch("sys.argv", ["hq", "x | keys", f.name, "--json"]): + with patch("sys.stdout", new_callable=StringIO) as mock_out: + with self.assertRaises(SystemExit) as cm: + main() + self.assertEqual(cm.exception.code, 0) + data = json.loads(mock_out.getvalue()) + self.assertIsInstance(data, list) + finally: + os.unlink(f.name) diff --git a/test/unit/query/__init__.py b/test/unit/query/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/query/test_attributes.py b/test/unit/query/test_attributes.py new file mode 100644 index 00000000..be09030e --- /dev/null +++ b/test/unit/query/test_attributes.py @@ -0,0 +1,40 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView + + +class TestAttributeView(TestCase): + def test_name(self): + doc = DocumentView.parse("x = 1\n") + attr = doc.attribute("x") + self.assertEqual(attr.name, "x") + + def test_value_int(self): + doc = DocumentView.parse("x = 42\n") + attr = doc.attribute("x") + self.assertEqual(attr.value, 42) + + def test_value_string(self): + doc = DocumentView.parse('x = "hello"\n') + attr = doc.attribute("x") + self.assertEqual(attr.value, '"hello"') + + def test_value_node(self): + doc = DocumentView.parse("x = 42\n") + attr = doc.attribute("x") + vn = attr.value_node + self.assertIsNotNone(vn) + + def test_to_hcl(self): + doc = DocumentView.parse("x = 42\n") + attr = doc.attribute("x") + hcl = attr.to_hcl() + self.assertIn("x", hcl) + self.assertIn("42", hcl) + + def test_to_dict(self): + doc = DocumentView.parse("x = 42\n") + attr = doc.attribute("x") + result = attr.to_dict() + self.assertEqual(result, {"x": 42}) diff --git a/test/unit/query/test_base.py b/test/unit/query/test_base.py new file mode 100644 index 00000000..2c0a7de0 --- /dev/null +++ b/test/unit/query/test_base.py @@ -0,0 +1,155 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query._base import NodeView, view_for +from hcl2.rules.base import AttributeRule, BodyRule, StartRule +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import NAME, EQ +from hcl2.utils import SerializationContext, SerializationOptions + +# Ensure views are registered +import hcl2.query # noqa: F401,E402 pylint: disable=unused-import + + +class StubExpression(ExpressionRule): + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_expr_term(value): + return ExprTermRule([StubExpression(value)]) + + +def _make_attribute(name, value): + return AttributeRule([_make_identifier(name), EQ(), _make_expr_term(value)]) + + +class TestViewFor(TestCase): + def test_attribute_dispatches(self): + attr = _make_attribute("x", 1) + view = view_for(attr) + self.assertEqual(type(view).__name__, "AttributeView") + + def test_body_dispatches(self): + body = BodyRule([]) + view = view_for(body) + self.assertEqual(type(view).__name__, "BodyView") + + def test_start_dispatches(self): + body = BodyRule([]) + start = StartRule([body]) + view = view_for(start) + self.assertEqual(type(view).__name__, "DocumentView") + + def test_fallback_to_nodeview(self): + expr = StubExpression("val") + view = view_for(expr) + self.assertIsInstance(view, NodeView) + + +class TestNodeView(TestCase): + def test_raw(self): + attr = _make_attribute("x", 1) + view = NodeView(attr) + self.assertIs(view.raw, attr) + + def test_parent_view(self): + attr = _make_attribute("x", 1) + BodyRule([attr]) # sets parent on attr + view = view_for(attr) + parent = view.parent_view + self.assertIsNotNone(parent) + self.assertEqual(type(parent).__name__, "BodyView") + + def test_parent_view_none_for_root(self): + body = BodyRule([]) + start = StartRule([body]) + view = view_for(start) + self.assertIsNone(view.parent_view) + + def test_to_dict(self): + attr = _make_attribute("x", 1) + view = view_for(attr) + result = view.to_dict() + self.assertEqual(result, {"x": 1}) + + def test_to_hcl(self): + # Use a real parsed tree to avoid lark_name issues with stubs + from hcl2.query.body import DocumentView + + doc = DocumentView.parse("x = 1\n") + attr_view = doc.attribute("x") + hcl = attr_view.to_hcl() + self.assertIn("x", hcl) + self.assertIn("1", hcl) + + def test_find_all(self): + attr1 = _make_attribute("x", 1) + attr2 = _make_attribute("y", 2) + body = BodyRule([attr1, attr2]) + start = StartRule([body]) + view = view_for(start) + found = view.find_all(AttributeRule) + self.assertEqual(len(found), 2) + + def test_walk_semantic(self): + attr = _make_attribute("x", 1) + body = BodyRule([attr]) + start = StartRule([body]) + view = view_for(start) + nodes = view.walk_semantic() + self.assertTrue(len(nodes) > 0) + + def test_repr(self): + attr = _make_attribute("x", 1) + view = view_for(attr) + r = repr(view) + self.assertIn("AttributeView", r) + + def test_find_by_predicate(self): + from hcl2.query.body import DocumentView + + doc = DocumentView.parse("x = 1\ny = 2\n") + found = doc.find_by_predicate(lambda n: hasattr(n, "name") and n.name == "x") + self.assertEqual(len(found), 1) + self.assertEqual(found[0].name, "x") + + def test_find_by_predicate_no_match(self): + from hcl2.query.body import DocumentView + + doc = DocumentView.parse("x = 1\n") + found = doc.find_by_predicate(lambda n: False) + self.assertEqual(len(found), 0) + + def test_walk_rules(self): + from hcl2.query.body import DocumentView + + doc = DocumentView.parse("x = 1\n") + rules = doc.walk_rules() + self.assertTrue(len(rules) > 0) + + def test_to_dict_with_options(self): + from hcl2.query.body import DocumentView + + doc = DocumentView.parse("x = 1\n") + attr = doc.attribute("x") + opts = SerializationOptions(with_meta=False) + result = attr.to_dict(options=opts) + self.assertEqual(result, {"x": 1}) + + def test_view_for_mro_fallback(self): + # ExprTermRule is not directly registered but its parent ExpressionRule + # is also not registered — should fall back to NodeView + expr = StubExpression("val") + view = view_for(expr) + self.assertIsInstance(view, NodeView) + self.assertEqual(type(view), NodeView) diff --git a/test/unit/query/test_blocks.py b/test/unit/query/test_blocks.py new file mode 100644 index 00000000..ca365046 --- /dev/null +++ b/test/unit/query/test_blocks.py @@ -0,0 +1,67 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView + + +class TestBlockView(TestCase): + def test_block_type(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + block = doc.blocks("resource")[0] + self.assertEqual(block.block_type, "resource") + + def test_labels(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + block = doc.blocks("resource")[0] + self.assertEqual(block.labels, ["resource", "aws_instance", "main"]) + + def test_name_labels(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + block = doc.blocks("resource")[0] + self.assertEqual(block.name_labels, ["aws_instance", "main"]) + + def test_body(self): + doc = DocumentView.parse('resource "type" "name" {\n ami = "test"\n}\n') + block = doc.blocks("resource")[0] + body = block.body + self.assertIsNotNone(body) + + def test_nested_attribute(self): + doc = DocumentView.parse('resource "type" "name" {\n ami = "test"\n}\n') + block = doc.blocks("resource")[0] + attr = block.attribute("ami") + self.assertIsNotNone(attr) + self.assertEqual(attr.name, "ami") + + def test_nested_blocks(self): + hcl = 'resource "type" "name" {\n provisioner "local-exec" {\n command = "echo"\n }\n}\n' + doc = DocumentView.parse(hcl) + block = doc.blocks("resource")[0] + inner = block.blocks("provisioner") + self.assertEqual(len(inner), 1) + + def test_to_hcl(self): + doc = DocumentView.parse('resource "type" "name" {\n ami = "test"\n}\n') + block = doc.blocks("resource")[0] + hcl = block.to_hcl() + self.assertIn("resource", hcl) + self.assertIn("ami", hcl) + + def test_identifier_label(self): + doc = DocumentView.parse("locals {\n x = 1\n}\n") + block = doc.blocks("locals")[0] + self.assertEqual(block.block_type, "locals") + self.assertEqual(block.name_labels, []) + + def test_attributes_list(self): + doc = DocumentView.parse('resource "type" "name" {\n a = 1\n b = 2\n}\n') + block = doc.blocks("resource")[0] + attrs = block.attributes() + self.assertEqual(len(attrs), 2) + + def test_attributes_filtered(self): + doc = DocumentView.parse('resource "type" "name" {\n a = 1\n b = 2\n}\n') + block = doc.blocks("resource")[0] + attrs = block.attributes("a") + self.assertEqual(len(attrs), 1) + self.assertEqual(attrs[0].name, "a") diff --git a/test/unit/query/test_body.py b/test/unit/query/test_body.py new file mode 100644 index 00000000..00467e82 --- /dev/null +++ b/test/unit/query/test_body.py @@ -0,0 +1,96 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView, BodyView + + +class TestDocumentView(TestCase): + def test_parse(self): + doc = DocumentView.parse("x = 1\n") + self.assertIsInstance(doc, DocumentView) + + def test_body(self): + doc = DocumentView.parse("x = 1\n") + body = doc.body + self.assertIsInstance(body, BodyView) + + def test_blocks(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + blocks = doc.blocks("resource") + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type, "resource") + + def test_blocks_no_filter(self): + doc = DocumentView.parse('resource "a" "b" {}\nvariable "c" {}\n') + blocks = doc.blocks() + self.assertEqual(len(blocks), 2) + + def test_blocks_with_labels(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {}\nresource "aws_s3_bucket" "data" {}\n' + ) + blocks = doc.blocks("resource", "aws_instance") + self.assertEqual(len(blocks), 1) + + def test_attributes(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + attrs = doc.attributes() + self.assertEqual(len(attrs), 2) + + def test_attributes_filtered(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + attrs = doc.attributes("x") + self.assertEqual(len(attrs), 1) + + def test_attribute(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + attr = doc.attribute("x") + self.assertIsNotNone(attr) + self.assertEqual(attr.name, "x") + + def test_attribute_missing(self): + doc = DocumentView.parse("x = 1\n") + attr = doc.attribute("missing") + self.assertIsNone(attr) + + def test_parse_file(self): + import os + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", suffix=".tf", delete=False) as tmp: + tmp.write("x = 1\n") + tmp.flush() + try: + doc = DocumentView.parse_file(tmp.name) + self.assertIsInstance(doc, DocumentView) + attr = doc.attribute("x") + self.assertIsNotNone(attr) + finally: + os.unlink(tmp.name) + + def test_blocks_label_too_many(self): + doc = DocumentView.parse('resource "type" {}\n') + # Ask for more labels than the block has + blocks = doc.blocks("resource", "type", "extra") + self.assertEqual(len(blocks), 0) + + def test_blocks_label_partial_mismatch(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + blocks = doc.blocks("resource", "aws_s3_bucket") + self.assertEqual(len(blocks), 0) + + +class TestBodyView(TestCase): + def test_blocks(self): + doc = DocumentView.parse('resource "a" "b" {}\n') + body = doc.body + blocks = body.blocks() + self.assertEqual(len(blocks), 1) + + def test_attributes(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + body = doc.body + attrs = body.attributes() + self.assertEqual(len(attrs), 2) diff --git a/test/unit/query/test_builtins.py b/test/unit/query/test_builtins.py new file mode 100644 index 00000000..1a402688 --- /dev/null +++ b/test/unit/query/test_builtins.py @@ -0,0 +1,108 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.builtins import apply_builtin +from hcl2.query.path import QuerySyntaxError, parse_path +from hcl2.query.resolver import resolve_path + + +class TestKeysBuiltin(TestCase): + def test_keys_on_object(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + results = resolve_path(doc, parse_path("x")) + keys = apply_builtin("keys", results) + self.assertEqual(len(keys), 1) + # ObjectView keys + self.assertEqual(sorted(keys[0]), ["a", "b"]) + + def test_keys_on_body(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + keys = apply_builtin("keys", [doc.body]) + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0], ["x", "y"]) + + def test_keys_on_document(self): + doc = DocumentView.parse('resource "a" "b" {}\nx = 1\n') + keys = apply_builtin("keys", [doc]) + self.assertEqual(len(keys), 1) + self.assertIn("resource", keys[0]) + self.assertIn("x", keys[0]) + + def test_keys_on_block(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + blocks = doc.blocks("resource") + keys = apply_builtin("keys", blocks) + self.assertEqual(len(keys), 1) + self.assertEqual(keys[0], ["resource", "aws_instance", "main"]) + + def test_keys_on_dict(self): + keys = apply_builtin("keys", [{"a": 1, "b": 2}]) + self.assertEqual(len(keys), 1) + self.assertEqual(sorted(keys[0]), ["a", "b"]) + + +class TestValuesBuiltin(TestCase): + def test_values_on_object(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + results = resolve_path(doc, parse_path("x")) + vals = apply_builtin("values", results) + self.assertEqual(len(vals), 1) + self.assertEqual(len(vals[0]), 2) + + def test_values_on_tuple(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + results = resolve_path(doc, parse_path("x")) + vals = apply_builtin("values", results) + self.assertEqual(len(vals), 1) + self.assertEqual(len(vals[0]), 3) + + def test_values_on_body(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + vals = apply_builtin("values", [doc.body]) + self.assertEqual(len(vals), 1) + self.assertEqual(len(vals[0]), 2) + + def test_values_on_dict(self): + vals = apply_builtin("values", [{"a": 1, "b": 2}]) + self.assertEqual(len(vals), 1) + self.assertEqual(sorted(vals[0]), [1, 2]) + + +class TestLengthBuiltin(TestCase): + def test_length_on_tuple(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + results = resolve_path(doc, parse_path("x")) + lengths = apply_builtin("length", results) + self.assertEqual(lengths, [3]) + + def test_length_on_object(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + results = resolve_path(doc, parse_path("x")) + lengths = apply_builtin("length", results) + self.assertEqual(lengths, [2]) + + def test_length_on_body(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + lengths = apply_builtin("length", [doc.body]) + self.assertEqual(lengths, [2]) + + def test_length_on_node_view(self): + doc = DocumentView.parse("x = 1\n") + results = resolve_path(doc, parse_path("x")) + lengths = apply_builtin("length", results) + self.assertEqual(lengths, [1]) + + def test_length_on_list(self): + lengths = apply_builtin("length", [[1, 2, 3]]) + self.assertEqual(lengths, [3]) + + def test_length_on_string(self): + lengths = apply_builtin("length", ["hello"]) + self.assertEqual(lengths, [5]) + + +class TestUnknownBuiltin(TestCase): + def test_unknown_raises(self): + with self.assertRaises(QuerySyntaxError): + apply_builtin("nope", [1]) diff --git a/test/unit/query/test_containers.py b/test/unit/query/test_containers.py new file mode 100644 index 00000000..b5b4db69 --- /dev/null +++ b/test/unit/query/test_containers.py @@ -0,0 +1,67 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.containers import ObjectView, TupleView +from hcl2.rules.containers import ObjectRule, TupleRule +from hcl2.walk import find_first + + +class TestTupleView(TestCase): + def test_elements(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + attr = doc.attribute("x") + tuple_node = find_first(attr.raw, TupleRule) + self.assertIsNotNone(tuple_node) + tv = TupleView(tuple_node) + self.assertEqual(len(tv), 3) + + def test_getitem(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + attr = doc.attribute("x") + tuple_node = find_first(attr.raw, TupleRule) + tv = TupleView(tuple_node) + elem = tv[0] + self.assertIsNotNone(elem) + + def test_elements_property(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + attr = doc.attribute("x") + tuple_node = find_first(attr.raw, TupleRule) + tv = TupleView(tuple_node) + elems = tv.elements + self.assertEqual(len(elems), 3) + + +class TestObjectView(TestCase): + def test_entries(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + attr = doc.attribute("x") + obj_node = find_first(attr.raw, ObjectRule) + self.assertIsNotNone(obj_node) + ov = ObjectView(obj_node) + entries = ov.entries + self.assertEqual(len(entries), 2) + + def test_keys(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + attr = doc.attribute("x") + obj_node = find_first(attr.raw, ObjectRule) + ov = ObjectView(obj_node) + self.assertEqual(ov.keys, ["a", "b"]) + + def test_get(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + attr = doc.attribute("x") + obj_node = find_first(attr.raw, ObjectRule) + ov = ObjectView(obj_node) + val = ov.get("a") + self.assertIsNotNone(val) + + def test_get_missing(self): + doc = DocumentView.parse("x = {\n a = 1\n}\n") + attr = doc.attribute("x") + obj_node = find_first(attr.raw, ObjectRule) + ov = ObjectView(obj_node) + val = ov.get("missing") + self.assertIsNone(val) diff --git a/test/unit/query/test_diff.py b/test/unit/query/test_diff.py new file mode 100644 index 00000000..c1832476 --- /dev/null +++ b/test/unit/query/test_diff.py @@ -0,0 +1,109 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +import json +from unittest import TestCase + +from hcl2.query.diff import DiffEntry, diff_dicts, format_diff_json, format_diff_text + + +class TestDiffDicts(TestCase): + def test_identical(self): + d = {"a": 1, "b": "hello"} + self.assertEqual(diff_dicts(d, d), []) + + def test_added_key(self): + left = {"a": 1} + right = {"a": 1, "b": 2} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].kind, "added") + self.assertEqual(entries[0].path, "b") + self.assertEqual(entries[0].right, 2) + + def test_removed_key(self): + left = {"a": 1, "b": 2} + right = {"a": 1} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].kind, "removed") + self.assertEqual(entries[0].path, "b") + self.assertEqual(entries[0].left, 2) + + def test_changed_value(self): + left = {"a": 1} + right = {"a": 2} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].kind, "changed") + self.assertEqual(entries[0].left, 1) + self.assertEqual(entries[0].right, 2) + + def test_nested_change(self): + left = {"a": {"b": 1}} + right = {"a": {"b": 2}} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].path, "a.b") + self.assertEqual(entries[0].kind, "changed") + + def test_list_added_element(self): + left = {"items": [1, 2]} + right = {"items": [1, 2, 3]} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].path, "items[2]") + self.assertEqual(entries[0].kind, "added") + + def test_list_removed_element(self): + left = {"items": [1, 2, 3]} + right = {"items": [1, 2]} + entries = diff_dicts(left, right) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0].path, "items[2]") + self.assertEqual(entries[0].kind, "removed") + + def test_empty_dicts(self): + self.assertEqual(diff_dicts({}, {}), []) + + def test_multiple_changes(self): + left = {"a": 1, "b": 2, "c": 3} + right = {"a": 1, "b": 99, "d": 4} + entries = diff_dicts(left, right) + kinds = {e.path: e.kind for e in entries} + self.assertEqual(kinds["b"], "changed") + self.assertEqual(kinds["c"], "removed") + self.assertEqual(kinds["d"], "added") + + +class TestFormatDiffText(TestCase): + def test_empty(self): + self.assertEqual(format_diff_text([]), "") + + def test_added(self): + entries = [DiffEntry(path="x", kind="added", right=42)] + text = format_diff_text(entries) + self.assertIn("+ x:", text) + self.assertIn("42", text) + + def test_removed(self): + entries = [DiffEntry(path="x", kind="removed", left="old")] + text = format_diff_text(entries) + self.assertIn("- x:", text) + self.assertIn("'old'", text) + + def test_changed(self): + entries = [DiffEntry(path="x", kind="changed", left=1, right=2)] + text = format_diff_text(entries) + self.assertIn("~ x:", text) + self.assertIn("->", text) + + +class TestFormatDiffJson(TestCase): + def test_json_output(self): + entries = [ + DiffEntry(path="a", kind="added", right=1), + DiffEntry(path="b", kind="removed", left=2), + ] + data = json.loads(format_diff_json(entries)) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]["kind"], "added") + self.assertEqual(data[1]["kind"], "removed") diff --git a/test/unit/query/test_expressions.py b/test/unit/query/test_expressions.py new file mode 100644 index 00000000..5d565497 --- /dev/null +++ b/test/unit/query/test_expressions.py @@ -0,0 +1,90 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.expressions import ConditionalView +from hcl2.query.path import parse_path +from hcl2.query.resolver import resolve_path + + +class TestConditionalView(TestCase): + def _parse(self, hcl): + return DocumentView.parse(hcl) + + def test_conditional_detected(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*")) + self.assertEqual(len(results), 1) + self.assertIsInstance(results[0], ConditionalView) + + def test_condition_property(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*")) + cond = results[0] + self.assertEqual(cond.condition.to_hcl().strip(), "true") + + def test_true_val_property(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*")) + cond = results[0] + self.assertEqual(cond.true_val.to_hcl().strip(), '"yes"') + + def test_false_val_property(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*")) + cond = results[0] + self.assertEqual(cond.false_val.to_hcl().strip(), '"no"') + + def test_resolve_condition_by_path(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*.condition")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].to_hcl().strip(), "true") + + def test_resolve_true_val_by_path(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*.true_val")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].to_hcl().strip(), '"yes"') + + def test_resolve_false_val_by_path(self): + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*.false_val")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].to_hcl().strip(), '"no"') + + def test_type_name(self): + from hcl2.query._base import view_type_name + + doc = self._parse('x = true ? "yes" : "no"\n') + results = resolve_path(doc, parse_path("*..conditional:*")) + self.assertEqual(view_type_name(results[0]), "conditional") + + def test_nested_conditional_in_block(self): + hcl = 'resource "aws" "main" {\n val = var.enabled ? "on" : "off"\n}\n' + doc = self._parse(hcl) + results = resolve_path(doc, parse_path("resource..conditional:*")) + self.assertEqual(len(results), 1) + self.assertIsInstance(results[0], ConditionalView) + + def test_pipe_to_condition(self): + from hcl2.query.pipeline import ( + classify_stage, + execute_pipeline, + split_pipeline, + ) + + doc = self._parse('x = true ? "yes" : "no"\n') + stages = [ + classify_stage(s) for s in split_pipeline("*..conditional:* | .condition") + ] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].to_hcl().strip(), "true") + + def test_conditional_with_complex_condition(self): + doc = self._parse('x = var.count > 0 ? "some" : "none"\n') + results = resolve_path(doc, parse_path("*..conditional:*.condition")) + self.assertEqual(len(results), 1) + # The condition is a binary op + self.assertIn(">", results[0].to_hcl()) diff --git a/test/unit/query/test_for_exprs.py b/test/unit/query/test_for_exprs.py new file mode 100644 index 00000000..b165acc9 --- /dev/null +++ b/test/unit/query/test_for_exprs.py @@ -0,0 +1,119 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.for_exprs import ForTupleView, ForObjectView +from hcl2.rules.for_expressions import ForTupleExprRule, ForObjectExprRule +from hcl2.walk import find_first + + +class TestForTupleView(TestCase): + def test_iterator_name(self): + doc = DocumentView.parse("x = [for item in var.list : item]\n") + node = find_first(doc.raw, ForTupleExprRule) + self.assertIsNotNone(node) + fv = ForTupleView(node) + self.assertEqual(fv.iterator_name, "item") + + def test_second_iterator_name_none(self): + doc = DocumentView.parse("x = [for item in var.list : item]\n") + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertIsNone(fv.second_iterator_name) + + def test_second_iterator_name(self): + doc = DocumentView.parse("x = [for k, v in var.map : v]\n") + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertEqual(fv.second_iterator_name, "v") + + def test_iterable(self): + doc = DocumentView.parse("x = [for item in var.list : item]\n") + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertIsNotNone(fv.iterable) + + def test_value_expr(self): + doc = DocumentView.parse("x = [for item in var.list : item]\n") + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertIsNotNone(fv.value_expr) + + def test_no_condition(self): + doc = DocumentView.parse("x = [for item in var.list : item]\n") + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertFalse(fv.has_condition) + self.assertIsNone(fv.condition) + + def test_with_condition(self): + doc = DocumentView.parse('x = [for item in var.list : item if item != ""]\n') + node = find_first(doc.raw, ForTupleExprRule) + fv = ForTupleView(node) + self.assertTrue(fv.has_condition) + self.assertIsNotNone(fv.condition) + + +class TestForObjectView(TestCase): + def test_iterator_name(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + self.assertIsNotNone(node) + fv = ForObjectView(node) + self.assertEqual(fv.iterator_name, "k") + + def test_key_expr(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertIsNotNone(fv.key_expr) + + def test_value_expr(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertIsNotNone(fv.value_expr) + + def test_no_ellipsis(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertFalse(fv.has_ellipsis) + + def test_with_ellipsis(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v...}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertTrue(fv.has_ellipsis) + + def test_second_iterator_name(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertEqual(fv.second_iterator_name, "v") + + def test_second_iterator_name_none(self): + doc = DocumentView.parse("x = {for item in var.list : item => item}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertIsNone(fv.second_iterator_name) + + def test_iterable(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertIsNotNone(fv.iterable) + + def test_no_condition(self): + doc = DocumentView.parse("x = {for k, v in var.map : k => v}\n") + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertFalse(fv.has_condition) + self.assertIsNone(fv.condition) + + def test_with_condition(self): + doc = DocumentView.parse('x = {for k, v in var.map : k => v if k != ""}\n') + node = find_first(doc.raw, ForObjectExprRule) + fv = ForObjectView(node) + self.assertTrue(fv.has_condition) + self.assertIsNotNone(fv.condition) diff --git a/test/unit/query/test_functions.py b/test/unit/query/test_functions.py new file mode 100644 index 00000000..42541842 --- /dev/null +++ b/test/unit/query/test_functions.py @@ -0,0 +1,46 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.functions import FunctionCallView +from hcl2.rules.functions import FunctionCallRule +from hcl2.walk import find_first + + +class TestFunctionCallView(TestCase): + def test_name(self): + doc = DocumentView.parse("x = length(var.list)\n") + node = find_first(doc.raw, FunctionCallRule) + self.assertIsNotNone(node) + fv = FunctionCallView(node) + self.assertEqual(fv.name, "length") + + def test_args(self): + doc = DocumentView.parse("x = length(var.list)\n") + node = find_first(doc.raw, FunctionCallRule) + fv = FunctionCallView(node) + self.assertEqual(len(fv.args), 1) + + def test_no_args(self): + doc = DocumentView.parse("x = timestamp()\n") + node = find_first(doc.raw, FunctionCallRule) + fv = FunctionCallView(node) + self.assertEqual(len(fv.args), 0) + + def test_no_ellipsis(self): + doc = DocumentView.parse("x = length(var.list)\n") + node = find_first(doc.raw, FunctionCallRule) + fv = FunctionCallView(node) + self.assertFalse(fv.has_ellipsis) + + def test_ellipsis(self): + doc = DocumentView.parse("x = length(var.list...)\n") + node = find_first(doc.raw, FunctionCallRule) + fv = FunctionCallView(node) + self.assertTrue(fv.has_ellipsis) + + def test_multiple_args(self): + doc = DocumentView.parse('x = coalesce(var.a, var.b, "default")\n') + node = find_first(doc.raw, FunctionCallRule) + fv = FunctionCallView(node) + self.assertEqual(len(fv.args), 3) diff --git a/test/unit/query/test_introspect.py b/test/unit/query/test_introspect.py new file mode 100644 index 00000000..075318e9 --- /dev/null +++ b/test/unit/query/test_introspect.py @@ -0,0 +1,90 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.introspect import build_schema, describe_results + + +class TestDescribeResults(TestCase): + def test_describe_block(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + blocks = doc.blocks("resource") + result = describe_results(blocks) + self.assertIn("results", result) + self.assertEqual(len(result["results"]), 1) + desc = result["results"][0] + self.assertEqual(desc["type"], "BlockView") + self.assertIn("properties", desc) + self.assertIn("methods", desc) + self.assertIn("block_type", desc["summary"]) + + def test_describe_attribute(self): + doc = DocumentView.parse("x = 1\n") + attrs = doc.attributes("x") + result = describe_results(attrs) + desc = result["results"][0] + self.assertEqual(desc["type"], "AttributeView") + self.assertIn("name", desc["summary"]) + + def test_describe_primitive(self): + result = describe_results([42]) + desc = result["results"][0] + self.assertEqual(desc["type"], "int") + self.assertIn("42", desc["value"]) + + +class TestBuildSchema(TestCase): + def test_schema_has_views(self): + schema = build_schema() + self.assertIn("views", schema) + self.assertIn("DocumentView", schema["views"]) + self.assertIn("BlockView", schema["views"]) + self.assertIn("AttributeView", schema["views"]) + self.assertIn("NodeView", schema["views"]) + + def test_schema_has_eval_namespace(self): + schema = build_schema() + self.assertIn("eval_namespace", schema) + self.assertIn("builtins", schema["eval_namespace"]) + self.assertIn("variables", schema["eval_namespace"]) + self.assertIn("len", schema["eval_namespace"]["builtins"]) + + def test_schema_view_has_properties(self): + schema = build_schema() + doc_schema = schema["views"]["DocumentView"] + self.assertIn("properties", doc_schema) + self.assertIn("body", doc_schema["properties"]) + + def test_schema_view_has_methods(self): + schema = build_schema() + doc_schema = schema["views"]["DocumentView"] + self.assertIn("methods", doc_schema) + + def test_schema_view_wraps(self): + schema = build_schema() + block_schema = schema["views"]["BlockView"] + self.assertEqual(block_schema["wraps"], "BlockRule") + + def test_schema_nodeview_no_wraps(self): + schema = build_schema() + nv_schema = schema["views"]["NodeView"] + self.assertNotIn("wraps", nv_schema) + + def test_describe_body_view_no_summary(self): + doc = DocumentView.parse("x = 1\n") + result = describe_results([doc.body]) + desc = result["results"][0] + self.assertEqual(desc["type"], "BodyView") + self.assertNotIn("summary", desc) + + def test_describe_document_view(self): + doc = DocumentView.parse("x = 1\n") + result = describe_results([doc]) + desc = result["results"][0] + self.assertEqual(desc["type"], "DocumentView") + + def test_schema_static_methods(self): + schema = build_schema() + doc_schema = schema["views"]["DocumentView"] + # DocumentView has parse and parse_file static methods + self.assertIn("static_methods", doc_schema) diff --git a/test/unit/query/test_path.py b/test/unit/query/test_path.py new file mode 100644 index 00000000..d35990aa --- /dev/null +++ b/test/unit/query/test_path.py @@ -0,0 +1,185 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.path import PathSegment, QuerySyntaxError, parse_path + + +class TestParsePath(TestCase): + def test_simple(self): + segments = parse_path("resource") + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0], PathSegment("resource", False, None)) + + def test_dotted(self): + segments = parse_path("resource.aws_instance.main") + self.assertEqual(len(segments), 3) + self.assertEqual(segments[0].name, "resource") + self.assertEqual(segments[1].name, "aws_instance") + self.assertEqual(segments[2].name, "main") + + def test_wildcard(self): + segments = parse_path("*") + self.assertEqual(segments[0], PathSegment("*", False, None)) + + def test_select_all(self): + segments = parse_path("variable[*]") + self.assertEqual(segments[0], PathSegment("variable", True, None)) + + def test_index(self): + segments = parse_path("variable[0]") + self.assertEqual(segments[0], PathSegment("variable", False, 0)) + + def test_complex(self): + segments = parse_path("resource.aws_instance[*].tags") + self.assertEqual(len(segments), 3) + self.assertEqual(segments[0].name, "resource") + self.assertTrue(segments[1].select_all) + self.assertEqual(segments[2].name, "tags") + + def test_empty_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_path("") + + def test_recursive_descent(self): + segments = parse_path("a..b") + self.assertEqual(len(segments), 2) + self.assertEqual(segments[0], PathSegment("a", False, None)) + self.assertEqual(segments[1], PathSegment("b", False, None, recursive=True)) + + def test_recursive_with_index(self): + segments = parse_path("resource..tags[*]") + self.assertEqual(len(segments), 2) + self.assertEqual(segments[1].name, "tags") + self.assertTrue(segments[1].recursive) + self.assertTrue(segments[1].select_all) + + def test_recursive_in_middle(self): + segments = parse_path("a.b..c.d") + self.assertEqual(len(segments), 4) + self.assertFalse(segments[0].recursive) + self.assertFalse(segments[1].recursive) + self.assertTrue(segments[2].recursive) + self.assertFalse(segments[3].recursive) + + def test_triple_dot_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_path("a...b") + + def test_recursive_at_end_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_path("a..") + + def test_leading_dot_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_path(".a") + + def test_invalid_segment_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_path("123invalid") + + def test_hyphen_in_name(self): + segments = parse_path("local-exec") + self.assertEqual(segments[0].name, "local-exec") + + def test_index_large(self): + segments = parse_path("items[42]") + self.assertEqual(segments[0].index, 42) + + def test_type_filter(self): + segments = parse_path("function_call:length") + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0].name, "length") + self.assertEqual(segments[0].type_filter, "function_call") + + def test_type_filter_with_index(self): + segments = parse_path("function_call:length[0]") + self.assertEqual(segments[0].name, "length") + self.assertEqual(segments[0].type_filter, "function_call") + self.assertEqual(segments[0].index, 0) + + def test_type_filter_with_wildcard(self): + segments = parse_path("function_call:*[*]") + self.assertEqual(segments[0].name, "*") + self.assertEqual(segments[0].type_filter, "function_call") + self.assertTrue(segments[0].select_all) + + def test_type_filter_in_recursive(self): + segments = parse_path("*..function_call:length") + self.assertEqual(len(segments), 2) + self.assertTrue(segments[1].recursive) + self.assertEqual(segments[1].type_filter, "function_call") + self.assertEqual(segments[1].name, "length") + + def test_no_type_filter(self): + segments = parse_path("length") + self.assertIsNone(segments[0].type_filter) + + def test_skip_labels(self): + segments = parse_path("block~") + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0].name, "block") + self.assertTrue(segments[0].skip_labels) + + def test_skip_labels_with_bracket(self): + segments = parse_path("resource~[*]") + self.assertEqual(segments[0].name, "resource") + self.assertTrue(segments[0].skip_labels) + self.assertTrue(segments[0].select_all) + + def test_skip_labels_with_select(self): + segments = parse_path("block~[select(.ami)]") + self.assertEqual(segments[0].name, "block") + self.assertTrue(segments[0].skip_labels) + self.assertIsNotNone(segments[0].predicate) + + def test_skip_labels_in_path(self): + segments = parse_path("block~.ami") + self.assertEqual(len(segments), 2) + self.assertTrue(segments[0].skip_labels) + self.assertFalse(segments[1].skip_labels) + + def test_no_skip_labels_by_default(self): + segments = parse_path("block") + self.assertFalse(segments[0].skip_labels) + + def test_select_with_trailing_star(self): + segments = parse_path("variable[select(.default)][*]") + self.assertEqual(segments[0].name, "variable") + self.assertIsNotNone(segments[0].predicate) + self.assertTrue(segments[0].select_all) + self.assertIsNone(segments[0].index) + + def test_select_with_trailing_index(self): + segments = parse_path("variable[select(.default)][0]") + self.assertEqual(segments[0].name, "variable") + self.assertIsNotNone(segments[0].predicate) + self.assertFalse(segments[0].select_all) + self.assertEqual(segments[0].index, 0) + + def test_select_no_trailing_bracket(self): + segments = parse_path("variable[select(.default)]") + self.assertIsNotNone(segments[0].predicate) + self.assertTrue(segments[0].select_all) + self.assertIsNone(segments[0].index) + + def test_optional_suffix(self): + segments = parse_path("x?") + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0].name, "x") + + def test_optional_with_bracket(self): + segments = parse_path("x[*]?") + self.assertEqual(len(segments), 1) + self.assertEqual(segments[0].name, "x") + self.assertTrue(segments[0].select_all) + + def test_optional_after_select(self): + segments = parse_path("*[select(.x)]?") + self.assertEqual(len(segments), 1) + self.assertIsNotNone(segments[0].predicate) + + def test_optional_produces_same_as_without(self): + seg_plain = parse_path("resource") + seg_opt = parse_path("resource?") + self.assertEqual(seg_plain[0].name, seg_opt[0].name) + self.assertEqual(seg_plain[0].select_all, seg_opt[0].select_all) diff --git a/test/unit/query/test_pipeline.py b/test/unit/query/test_pipeline.py new file mode 100644 index 00000000..92b4a30e --- /dev/null +++ b/test/unit/query/test_pipeline.py @@ -0,0 +1,325 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.path import QuerySyntaxError +from hcl2.query.pipeline import ( + BuiltinStage, + ConstructStage, + PathStage, + SelectStage, + classify_stage, + execute_pipeline, + split_pipeline, +) + + +class TestSplitPipeline(TestCase): + def test_single_stage(self): + self.assertEqual(split_pipeline("resource"), ["resource"]) + + def test_multi_stage(self): + self.assertEqual( + split_pipeline("resource[*] | .aws_instance | .tags"), + ["resource[*]", ".aws_instance", ".tags"], + ) + + def test_bracket_aware(self): + # Pipe inside brackets should not split + result = split_pipeline("x[*] | y") + self.assertEqual(result, ["x[*]", "y"]) + + def test_paren_aware(self): + result = split_pipeline("select(.a | .b) | y") + # The pipe inside parens should not split + # Actually this would be select(.a | .b) and y + # But our grammar doesn't support pipes in predicates, + # this is just testing depth tracking + self.assertEqual(len(result), 2) + + def test_quote_aware(self): + result = split_pipeline('"a | b" | y') + self.assertEqual(len(result), 2) + + def test_empty_stage_error(self): + with self.assertRaises(QuerySyntaxError): + split_pipeline("x | | y") + + def test_trailing_pipe_error(self): + with self.assertRaises(QuerySyntaxError): + split_pipeline("x |") + + def test_leading_pipe_error(self): + with self.assertRaises(QuerySyntaxError): + split_pipeline("| x") + + def test_empty_pipeline_error(self): + with self.assertRaises(QuerySyntaxError): + split_pipeline("") + + def test_whitespace_stripped(self): + result = split_pipeline(" x | y ") + self.assertEqual(result, ["x", "y"]) + + +class TestClassifyStage(TestCase): + def test_path_stage(self): + stage = classify_stage("resource.aws_instance") + self.assertIsInstance(stage, PathStage) + self.assertEqual(len(stage.segments), 2) + + def test_builtin_keys(self): + stage = classify_stage("keys") + self.assertIsInstance(stage, BuiltinStage) + self.assertEqual(stage.name, "keys") + + def test_builtin_values(self): + stage = classify_stage("values") + self.assertIsInstance(stage, BuiltinStage) + self.assertEqual(stage.name, "values") + + def test_builtin_length(self): + stage = classify_stage("length") + self.assertIsInstance(stage, BuiltinStage) + self.assertEqual(stage.name, "length") + + def test_select_stage(self): + stage = classify_stage("select(.name)") + self.assertIsInstance(stage, SelectStage) + self.assertIsNotNone(stage.predicate) + + def test_select_with_comparison(self): + stage = classify_stage('select(.name == "foo")') + self.assertIsInstance(stage, SelectStage) + + def test_path_with_wildcard(self): + stage = classify_stage("*[*]") + self.assertIsInstance(stage, PathStage) + + +class TestExecutePipeline(TestCase): + def _make_doc(self, hcl): + return DocumentView.parse(hcl) + + def test_single_stage_identity(self): + doc = self._make_doc("x = 1\n") + stage = classify_stage("x") + results = execute_pipeline(doc, [stage]) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "x") + + def test_multi_stage_chaining(self): + doc = self._make_doc('resource "aws_instance" "main" {\n ami = "test"\n}\n') + # Pipe unwraps blocks to body, so chain with body attributes + stages = [ + classify_stage(s) + for s in split_pipeline("resource.aws_instance.main | .ami") + ] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 1) + + def test_empty_intermediate(self): + doc = self._make_doc("x = 1\n") + stages = [classify_stage(s) for s in split_pipeline("nonexistent | .foo")] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 0) + + def test_pipe_with_wildcard(self): + doc = self._make_doc("x = 1\ny = 2\nz = 3\n") + stages = [classify_stage(s) for s in split_pipeline("*[*] | length")] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 3) + # Each attribute has length 1 + self.assertEqual(results, [1, 1, 1]) + + def test_pipe_builtin(self): + doc = self._make_doc("x = {\n a = 1\n b = 2\n}\n") + stages = [classify_stage(s) for s in split_pipeline("x | keys")] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 1) + self.assertEqual(sorted(results[0]), ["a", "b"]) + + def test_pipe_select(self): + doc = self._make_doc('variable "a" {\n default = 1\n}\nvariable "b" {}\n') + stages = [ + classify_stage(s) for s in split_pipeline("variable[*] | select(.default)") + ] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 1) + + def test_backward_compat_no_pipe(self): + """Single structural path still works through pipeline.""" + doc = self._make_doc('resource "aws_instance" "main" {\n ami = "test"\n}\n') + stages = [classify_stage("resource.aws_instance.main.ami")] + results = execute_pipeline(doc, stages) + self.assertEqual(len(results), 1) + + +class TestPropertyAccessPipeStages(TestCase): + """Test property accessor pipe stages (Option B).""" + + def _run(self, hcl, query): + doc = DocumentView.parse(hcl) + stages = [classify_stage(s) for s in split_pipeline(query)] + return execute_pipeline(doc, stages) + + def test_block_type_property(self): + r = self._run( + 'resource "aws" "x" {\n ami = 1\n}\n', + "resource[*] | .block_type", + ) + self.assertEqual(r, ["resource"]) + + def test_name_labels_property(self): + r = self._run( + 'resource "aws" "x" {\n ami = 1\n}\n', + "resource[*] | .name_labels", + ) + self.assertEqual(r, [["aws", "x"]]) + + def test_labels_property(self): + r = self._run( + 'resource "aws" "x" {\n ami = 1\n}\n', + "resource[*] | .labels", + ) + self.assertEqual(r, [["resource", "aws", "x"]]) + + def test_attribute_name_property(self): + r = self._run("x = 1\ny = 2\n", "*[*] | .name") + self.assertEqual(sorted(r), ["x", "y"]) + + def test_function_call_name_property(self): + r = self._run( + 'x = substr("hello", 0, 3)\n', + "*..function_call:*[*] | .name", + ) + self.assertEqual(r, ["substr"]) + + def test_property_then_builtin(self): + """Property access result feeds into a builtin.""" + r = self._run( + 'resource "aws" "x" {\n ami = 1\n}\n', + "resource[*] | .labels | length", + ) + self.assertEqual(r, [3]) + + def test_structural_still_works_after_pipe(self): + """Structural path resolution still works through pipes.""" + r = self._run( + 'resource "aws" "x" {\n ami = "test"\n}\n', + "resource.aws.x | .ami", + ) + self.assertEqual(len(r), 1) + + def test_type_qualifier_filter_in_pipe(self): + """Type qualifier in pipe stage filters by value type.""" + r = self._run( + "a = {x = 1}\nb = [1, 2]\nc = 3\n", + "*[*] | object:*", + ) + self.assertEqual(len(r), 1) + self.assertEqual(type(r[0]).__name__, "ObjectView") + + def test_type_qualifier_tuple_in_pipe(self): + r = self._run( + "a = {x = 1}\nb = [1, 2]\nc = 3\n", + "*[*] | tuple:*", + ) + self.assertEqual(len(r), 1) + self.assertEqual(type(r[0]).__name__, "TupleView") + + +class TestOptionalTolerance(TestCase): + """Test that trailing ? is tolerated in pipeline stages.""" + + def test_classify_stage_optional(self): + stage = classify_stage("resource?") + self.assertIsInstance(stage, PathStage) + + def test_classify_stage_optional_with_bracket(self): + stage = classify_stage("x[*]?") + self.assertIsInstance(stage, PathStage) + self.assertTrue(stage.segments[0].select_all) + + def test_classify_builtin_optional(self): + stage = classify_stage("keys?") + self.assertIsInstance(stage, BuiltinStage) + self.assertEqual(stage.name, "keys") + + def test_classify_select_optional(self): + stage = classify_stage("select(.name)?") + # select(.name)? — ? stripped first, then select() detected + self.assertIsInstance(stage, SelectStage) + + def test_brace_aware_split(self): + """Pipes inside braces should not split.""" + result = split_pipeline("x[*] | {source, cpu}") + self.assertEqual(len(result), 2) + self.assertEqual(result[1], "{source, cpu}") + + +class TestConstructStage(TestCase): + """Test object construction ``{field1, field2}`` pipeline stage.""" + + def _run(self, hcl, query): + doc = DocumentView.parse(hcl) + stages = [classify_stage(s) for s in split_pipeline(query)] + return execute_pipeline(doc, stages) + + def test_classify_construct(self): + stage = classify_stage("{source, cpu}") + self.assertIsInstance(stage, ConstructStage) + self.assertEqual(len(stage.fields), 2) + self.assertEqual(stage.fields[0][0], "source") + self.assertEqual(stage.fields[1][0], "cpu") + + def test_classify_construct_renamed(self): + stage = classify_stage("{mod: .source, vcpu: .cpu}") + self.assertIsInstance(stage, ConstructStage) + self.assertEqual(stage.fields[0][0], "mod") + self.assertEqual(stage.fields[1][0], "vcpu") + + def test_execute_construct_shorthand(self): + r = self._run( + 'resource "aws" "x" {\n ami = "test"\n count = 2\n}\n', + "resource.aws.x | {ami, count}", + ) + self.assertEqual(len(r), 1) + self.assertIsInstance(r[0], dict) + self.assertIn("ami", r[0]) + self.assertIn("count", r[0]) + # Values should be flat, not nested dicts like {"ami": {"ami": ...}} + self.assertNotIsInstance(r[0]["ami"], dict) + self.assertEqual(r[0]["ami"], '"test"') + self.assertEqual(r[0]["count"], 2) + + def test_execute_construct_renamed(self): + r = self._run( + 'resource "aws" "x" {\n ami = "test"\n}\n', + "resource[*] | {type: .block_type, name: .name_labels}", + ) + self.assertEqual(len(r), 1) + self.assertEqual(r[0]["type"], "resource") + self.assertEqual(r[0]["name"], ["aws", "x"]) + + def test_construct_missing_field(self): + r = self._run( + "x = 1\n", + "x | {value, nonexistent}", + ) + self.assertEqual(len(r), 1) + self.assertIsNone(r[0]["nonexistent"]) + + def test_construct_with_select(self): + r = self._run( + "a = 1\nb = 2\nc = 3\n", + "*[select(.value > 1)] | {name, value}", + ) + self.assertEqual(len(r), 2) + names = sorted(d["name"] for d in r) + self.assertEqual(names, ["b", "c"]) + + def test_construct_with_index(self): + stage = classify_stage("{first: .items[0]}") + self.assertIsInstance(stage, ConstructStage) + self.assertEqual(stage.fields[0][0], "first") diff --git a/test/unit/query/test_predicate.py b/test/unit/query/test_predicate.py new file mode 100644 index 00000000..52123436 --- /dev/null +++ b/test/unit/query/test_predicate.py @@ -0,0 +1,611 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.path import QuerySyntaxError, parse_path +from hcl2.query.predicate import ( + Accessor, + AllExpr, + AndExpr, + AnyExpr, + Comparison, + HasExpr, + NotExpr, + OrExpr, + Token, + evaluate_predicate, + parse_predicate, + tokenize, +) +from hcl2.query.resolver import resolve_path + + +class TestTokenize(TestCase): + def test_dot_and_word(self): + tokens = tokenize(".foo") + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].kind, "DOT") + self.assertEqual(tokens[1].kind, "WORD") + self.assertEqual(tokens[1].value, "foo") + + def test_comparison(self): + tokens = tokenize('.name == "bar"') + kinds = [t.kind for t in tokens] + self.assertEqual(kinds, ["DOT", "WORD", "OP", "STRING"]) + + def test_number(self): + tokens = tokenize(".x > 42") + self.assertEqual(tokens[2].kind, "OP") + self.assertEqual(tokens[3].kind, "NUMBER") + self.assertEqual(tokens[3].value, "42") + + def test_float_number(self): + tokens = tokenize(".x > 3.14") + self.assertEqual(tokens[3].value, "3.14") + + def test_brackets(self): + tokens = tokenize(".items[0]") + kinds = [t.kind for t in tokens] + self.assertEqual(kinds, ["DOT", "WORD", "LBRACKET", "NUMBER", "RBRACKET"]) + + def test_boolean_keywords(self): + tokens = tokenize(".a and .b or not .c") + words = [t.value for t in tokens if t.kind == "WORD"] + self.assertEqual(words, ["a", "and", "b", "or", "not", "c"]) + + def test_all_operators(self): + for op in ["==", "!=", "<", ">", "<=", ">="]: + tokens = tokenize(f".x {op} 1") + self.assertEqual(tokens[2].kind, "OP") + self.assertEqual(tokens[2].value, op) + + def test_unexpected_char_raises(self): + with self.assertRaises(QuerySyntaxError): + tokenize("@invalid") + + +class TestParsePredicate(TestCase): + def test_existence(self): + pred = parse_predicate(".name") + self.assertIsInstance(pred, Comparison) + self.assertIsNone(pred.operator) + self.assertEqual(pred.accessor.parts, ["name"]) + + def test_equality_string(self): + pred = parse_predicate('.name == "foo"') + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.operator, "==") + self.assertEqual(pred.value, "foo") + + def test_equality_int(self): + pred = parse_predicate(".count == 5") + self.assertEqual(pred.operator, "==") + self.assertEqual(pred.value, 5) + + def test_less_than(self): + pred = parse_predicate(".count < 10") + self.assertEqual(pred.operator, "<") + self.assertEqual(pred.value, 10) + + def test_boolean_true(self): + pred = parse_predicate(".enabled == true") + self.assertEqual(pred.value, True) + + def test_boolean_false(self): + pred = parse_predicate(".enabled == false") + self.assertEqual(pred.value, False) + + def test_null(self): + pred = parse_predicate(".x == null") + self.assertIsNone(pred.value) + + def test_dotted_accessor(self): + pred = parse_predicate(".tags.Name") + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.parts, ["tags", "Name"]) + + def test_indexed_accessor(self): + pred = parse_predicate(".items[0]") + self.assertEqual(pred.accessor.parts, ["items"]) + self.assertEqual(pred.accessor.index, 0) + + def test_and(self): + pred = parse_predicate(".a and .b") + self.assertIsInstance(pred, AndExpr) + self.assertEqual(len(pred.children), 2) + + def test_or(self): + pred = parse_predicate(".a or .b") + self.assertIsInstance(pred, OrExpr) + self.assertEqual(len(pred.children), 2) + + def test_not(self): + pred = parse_predicate("not .a") + self.assertIsInstance(pred, NotExpr) + + def test_combined_and_or(self): + pred = parse_predicate(".a and .b or .c") + # Should parse as (.a and .b) or .c due to precedence + self.assertIsInstance(pred, OrExpr) + self.assertIsInstance(pred.children[0], AndExpr) + + def test_empty_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_predicate("") + + def test_no_leading_dot_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_predicate("name") + + def test_extra_tokens_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_predicate('.name == "foo" extra') + + +class TestEvaluatePredicate(TestCase): + def _make_doc(self, hcl): + return DocumentView.parse(hcl) + + def test_existence_true(self): + doc = self._make_doc('variable "a" {\n default = 1\n}\n') + blocks = doc.blocks("variable") + pred = parse_predicate(".default") + self.assertTrue(evaluate_predicate(pred, blocks[0])) + + def test_existence_false(self): + doc = self._make_doc('variable "a" {}\n') + blocks = doc.blocks("variable") + pred = parse_predicate(".default") + self.assertFalse(evaluate_predicate(pred, blocks[0])) + + def test_equality_block_type(self): + doc = self._make_doc('resource "aws_instance" "main" {}\n') + blocks = doc.blocks() + pred = parse_predicate('.block_type == "resource"') + self.assertTrue(evaluate_predicate(pred, blocks[0])) + + def test_equality_block_type_mismatch(self): + doc = self._make_doc('resource "aws_instance" "main" {}\n') + blocks = doc.blocks() + pred = parse_predicate('.block_type == "variable"') + self.assertFalse(evaluate_predicate(pred, blocks[0])) + + def test_attribute_name(self): + doc = self._make_doc("x = 1\ny = 2\n") + attrs = doc.body.attributes() + pred = parse_predicate('.name == "x"') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + self.assertFalse(evaluate_predicate(pred, attrs[1])) + + def test_attribute_value(self): + doc = self._make_doc("x = 1\ny = 2\n") + attrs = doc.body.attributes() + pred = parse_predicate(".value == 1") + self.assertTrue(evaluate_predicate(pred, attrs[0])) + self.assertFalse(evaluate_predicate(pred, attrs[1])) + + def test_not_predicate(self): + doc = self._make_doc("x = 1\ny = 2\n") + attrs = doc.body.attributes() + pred = parse_predicate('not .name == "x"') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + self.assertTrue(evaluate_predicate(pred, attrs[1])) + + def test_and_predicate(self): + doc = self._make_doc("x = 1\ny = 2\n") + attrs = doc.body.attributes() + pred = parse_predicate('.name == "x" and .value == 1') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + self.assertFalse(evaluate_predicate(pred, attrs[1])) + + def test_or_predicate(self): + doc = self._make_doc("x = 1\ny = 2\nz = 3\n") + attrs = doc.body.attributes() + pred = parse_predicate('.name == "x" or .name == "y"') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + self.assertTrue(evaluate_predicate(pred, attrs[1])) + self.assertFalse(evaluate_predicate(pred, attrs[2])) + + def test_greater_than(self): + doc = self._make_doc("x = 5\ny = 15\n") + attrs = doc.body.attributes() + pred = parse_predicate(".value > 10") + self.assertFalse(evaluate_predicate(pred, attrs[0])) + self.assertTrue(evaluate_predicate(pred, attrs[1])) + + def test_type_accessor_block(self): + doc = self._make_doc('resource "aws_instance" "main" {}\n') + blocks = doc.blocks() + pred = parse_predicate('.type == "block"') + self.assertTrue(evaluate_predicate(pred, blocks[0])) + + def test_type_accessor_attribute(self): + doc = self._make_doc("x = 1\n") + attrs = doc.body.attributes() + pred = parse_predicate('.type == "attribute"') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_type_accessor_object(self): + doc = self._make_doc("x = {\n a = 1\n}\n") + attr = doc.attribute("x") + # value_node is ExprTerm wrapping ObjectRule + from hcl2.query._base import view_for + from hcl2.rules.expressions import ExprTermRule + + vn = attr.value_node + if isinstance(vn._node, ExprTermRule): + inner = view_for(vn._node.expression) + else: + inner = vn + pred = parse_predicate('.type == "object"') + self.assertTrue(evaluate_predicate(pred, inner)) + + def test_type_accessor_mismatch(self): + doc = self._make_doc("x = 1\n") + attrs = doc.body.attributes() + pred = parse_predicate('.type == "block"') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_type_accessor_document(self): + doc = self._make_doc("x = 1\n") + pred = parse_predicate('.type == "document"') + self.assertTrue(evaluate_predicate(pred, doc)) + + def test_type_accessor_tuple(self): + doc = self._make_doc("x = [1, 2]\n") + attr = doc.attribute("x") + from hcl2.query._base import view_for + from hcl2.rules.expressions import ExprTermRule + + vn = attr.value_node + if isinstance(vn._node, ExprTermRule): + inner = view_for(vn._node.expression) + else: + inner = vn + pred = parse_predicate('.type == "tuple"') + self.assertTrue(evaluate_predicate(pred, inner)) + + +class TestKeywordComparison(TestCase): + """Test that HCL keywords (true/false/null) compare correctly.""" + + def test_keyword_true_matches_true(self): + doc = DocumentView.parse("x = true\n") + attrs = doc.body.attributes() + pred = parse_predicate(".value == true") + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_keyword_true_not_matches_string(self): + doc = DocumentView.parse("x = true\n") + attrs = doc.body.attributes() + pred = parse_predicate('.value == "true"') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_keyword_false_matches_false(self): + doc = DocumentView.parse("x = false\n") + attrs = doc.body.attributes() + pred = parse_predicate(".value == false") + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_keyword_null_matches_null(self): + doc = DocumentView.parse("x = null\n") + attrs = doc.body.attributes() + pred = parse_predicate(".value == null") + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_conditional_true_val_keyword(self): + doc = DocumentView.parse("x = a == b ? true : false\n") + results = resolve_path(doc, parse_path("*..conditional:*")) + pred = parse_predicate(".true_val == true") + self.assertTrue(evaluate_predicate(pred, results[0])) + + def test_conditional_false_val_keyword(self): + doc = DocumentView.parse("x = a == b ? true : false\n") + results = resolve_path(doc, parse_path("*..conditional:*")) + pred = parse_predicate(".false_val == false") + self.assertTrue(evaluate_predicate(pred, results[0])) + + +class TestSelectInPath(TestCase): + """Test [select()] bracket syntax in structural paths.""" + + def test_select_bracket_in_path(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + results = resolve_path(doc, parse_path('*[select(.name == "x")]')) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "x") + + def test_select_bracket_existence(self): + doc = DocumentView.parse('variable "a" {\n default = 1\n}\nvariable "b" {}\n') + results = resolve_path(doc, parse_path("variable[select(.default)]")) + self.assertEqual(len(results), 1) + + def test_select_bracket_no_match(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + results = resolve_path(doc, parse_path('*[select(.name == "z")]')) + self.assertEqual(len(results), 0) + + def test_select_bracket_with_type_qualifier(self): + doc = DocumentView.parse('x = substr("hello", 0, 3)\ny = upper("a")\n') + results = resolve_path(doc, parse_path("*..function_call:*[select(.args[2])]")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "substr") + + +class TestAccessorBuiltin(TestCase): + """Test ``| builtin`` syntax in predicate accessors.""" + + def test_parse_pipe_length(self): + pred = parse_predicate(".args | length > 2") + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "length") + self.assertEqual(pred.operator, ">") + self.assertEqual(pred.value, 2) + + def test_parse_pipe_keys(self): + pred = parse_predicate(".tags | keys") + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "keys") + self.assertIsNone(pred.operator) + + def test_parse_invalid_builtin(self): + with self.assertRaises(QuerySyntaxError): + parse_predicate(".args | bogus") + + def test_tokenize_pipe(self): + tokens = tokenize(".args | length") + kinds = [t.kind for t in tokens] + self.assertIn("PIPE", kinds) + + def test_evaluate_length_gt(self): + doc = DocumentView.parse('x = substr("hello", 0, 3)\n') + funcs = resolve_path(doc, parse_path("*..function_call:*")) + func = funcs[0] + pred = parse_predicate(".args | length > 2") + self.assertTrue(evaluate_predicate(pred, func)) + pred2 = parse_predicate(".args | length > 5") + self.assertFalse(evaluate_predicate(pred2, func)) + + def test_evaluate_length_eq(self): + doc = DocumentView.parse('x = substr("hello", 0, 3)\n') + funcs = resolve_path(doc, parse_path("*..function_call:*")) + func = funcs[0] + pred = parse_predicate(".args | length == 3") + self.assertTrue(evaluate_predicate(pred, func)) + + +class TestAnyAll(TestCase): + """Test ``any(accessor; pred)`` and ``all(accessor; pred)``.""" + + def test_parse_any(self): + pred = parse_predicate('any(.elements; .type == "function_call")') + self.assertIsInstance(pred, AnyExpr) + self.assertEqual(pred.accessor.parts, ["elements"]) + self.assertIsInstance(pred.predicate, Comparison) + + def test_parse_all(self): + pred = parse_predicate('all(.items; .name == "x")') + self.assertIsInstance(pred, AllExpr) + self.assertEqual(pred.accessor.parts, ["items"]) + + def test_parse_any_with_boolean_combinators(self): + pred = parse_predicate( + 'any(.elements; .type == "function_call" or .type == "tuple")' + ) + self.assertIsInstance(pred, AnyExpr) + self.assertIsInstance(pred.predicate, OrExpr) + + def test_evaluate_any_true(self): + doc = DocumentView.parse("x = [1, f(a), 3]\n") + tuples = resolve_path(doc, parse_path("*..tuple:*")) + pred = parse_predicate('any(.elements; .type == "function_call")') + self.assertTrue(evaluate_predicate(pred, tuples[0])) + + def test_evaluate_any_false(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + tuples = resolve_path(doc, parse_path("*..tuple:*")) + pred = parse_predicate('any(.elements; .type == "function_call")') + self.assertFalse(evaluate_predicate(pred, tuples[0])) + + def test_evaluate_all_true(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + tuples = resolve_path(doc, parse_path("*..tuple:*")) + pred = parse_predicate('all(.elements; .type == "node")') + self.assertTrue(evaluate_predicate(pred, tuples[0])) + + def test_evaluate_all_false(self): + doc = DocumentView.parse("x = [1, f(a), 3]\n") + tuples = resolve_path(doc, parse_path("*..tuple:*")) + pred = parse_predicate('all(.elements; .type == "node")') + self.assertFalse(evaluate_predicate(pred, tuples[0])) + + def test_any_on_none_is_false(self): + doc = DocumentView.parse("x = 1\n") + attrs = resolve_path(doc, parse_path("x")) + pred = parse_predicate('any(.nonexistent; .type == "node")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_all_on_none_is_true(self): + doc = DocumentView.parse("x = 1\n") + attrs = resolve_path(doc, parse_path("x")) + pred = parse_predicate('all(.nonexistent; .type == "node")') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_any_with_not(self): + pred = parse_predicate('not any(.elements; .type == "function_call")') + self.assertIsInstance(pred, NotExpr) + self.assertIsInstance(pred.child, AnyExpr) + + +class TestStringFunctions(TestCase): + """Test string functions in predicate accessors.""" + + def test_parse_contains(self): + pred = parse_predicate('.source | contains("docker")') + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "contains") + self.assertEqual(pred.accessor.builtin_arg, "docker") + + def test_parse_test(self): + pred = parse_predicate('.ami | test("^ami-[0-9]+")') + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "test") + self.assertEqual(pred.accessor.builtin_arg, "^ami-[0-9]+") + + def test_parse_startswith(self): + pred = parse_predicate('.name | startswith("prod-")') + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "startswith") + self.assertEqual(pred.accessor.builtin_arg, "prod-") + + def test_parse_endswith(self): + pred = parse_predicate('.path | endswith("/api")') + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "endswith") + self.assertEqual(pred.accessor.builtin_arg, "/api") + + def test_evaluate_contains_true(self): + doc = DocumentView.parse('source = "docker_application_v2"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | contains("docker")') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_contains_false(self): + doc = DocumentView.parse('source = "some_module"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | contains("docker")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_test_true(self): + doc = DocumentView.parse('ami = "ami-12345"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | test("^ami-[0-9]+")') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_test_false(self): + doc = DocumentView.parse('ami = "xyz-12345"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | test("^ami-[0-9]+")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_startswith_true(self): + doc = DocumentView.parse('name = "prod-api"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | startswith("prod-")') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_startswith_false(self): + doc = DocumentView.parse('name = "staging-api"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | startswith("prod-")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_endswith_true(self): + doc = DocumentView.parse('path = "some/path/api"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | endswith("api")') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_evaluate_endswith_false(self): + doc = DocumentView.parse('path = "some/path/web"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | endswith("api")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_contains_on_none_returns_false(self): + doc = DocumentView.parse("x = 1\n") + attrs = doc.body.attributes() + pred = parse_predicate('.nonexistent | contains("x")') + self.assertFalse(evaluate_predicate(pred, attrs[0])) + + def test_test_invalid_regex_raises(self): + doc = DocumentView.parse('x = "hello"\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | test("[invalid")') + with self.assertRaises(QuerySyntaxError): + evaluate_predicate(pred, attrs[0]) + + def test_combined_contains_and_comparison(self): + doc = DocumentView.parse('source = "docker_app"\ncount = 3\n') + attrs = doc.body.attributes() + pred = parse_predicate('.value | contains("docker") and .name == "source"') + # This should parse as: (.value | contains("docker")) and (.name == "source") + # Actually it parses the contains as a bare accessor result, then "and" + # Let's use a simpler combined test + pred = parse_predicate('.name == "source"') + self.assertTrue(evaluate_predicate(pred, attrs[0])) + + def test_unknown_string_function_raises(self): + with self.assertRaises(QuerySyntaxError): + parse_predicate('.value | bogus("x")') + + +class TestPostfixNot(TestCase): + """Test postfix ``| not`` in predicate accessors.""" + + def test_parse_postfix_not(self): + pred = parse_predicate(".tags | not") + self.assertIsInstance(pred, Comparison) + self.assertEqual(pred.accessor.builtin, "not") + + def test_postfix_not_false_when_exists(self): + doc = DocumentView.parse('resource "aws" "x" {\n tags = {}\n}\n') + blocks = doc.blocks() + pred = parse_predicate(".tags | not") + self.assertFalse(evaluate_predicate(pred, blocks[0])) + + def test_postfix_not_true_when_missing(self): + doc = DocumentView.parse('resource "aws" "x" {}\n') + blocks = doc.blocks() + pred = parse_predicate(".tags | not") + self.assertTrue(evaluate_predicate(pred, blocks[0])) + + def test_postfix_not_equivalent_to_prefix(self): + doc = DocumentView.parse('variable "a" {\n default = 1\n}\nvariable "b" {}\n') + blocks = doc.blocks("variable") + # "not .default" and ".default | not" should be equivalent + pred_prefix = parse_predicate("not .default") + pred_postfix = parse_predicate(".default | not") + for block in blocks: + self.assertEqual( + evaluate_predicate(pred_prefix, block), + evaluate_predicate(pred_postfix, block), + ) + + +class TestHasExpr(TestCase): + """Test ``has("key")`` predicate.""" + + def test_parse_has(self): + pred = parse_predicate('has("tags")') + self.assertIsInstance(pred, HasExpr) + self.assertEqual(pred.key, "tags") + + def test_has_true(self): + doc = DocumentView.parse('resource "aws" "x" {\n tags = {}\n}\n') + blocks = doc.blocks() + pred = parse_predicate('has("tags")') + self.assertTrue(evaluate_predicate(pred, blocks[0])) + + def test_has_false(self): + doc = DocumentView.parse('resource "aws" "x" {}\n') + blocks = doc.blocks() + pred = parse_predicate('has("tags")') + self.assertFalse(evaluate_predicate(pred, blocks[0])) + + def test_has_equivalent_to_bare_accessor(self): + doc = DocumentView.parse('variable "a" {\n default = 1\n}\nvariable "b" {}\n') + blocks = doc.blocks("variable") + pred_has = parse_predicate('has("default")') + pred_bare = parse_predicate(".default") + for block in blocks: + self.assertEqual( + evaluate_predicate(pred_has, block), + evaluate_predicate(pred_bare, block), + ) + + def test_has_with_not(self): + doc = DocumentView.parse('resource "aws" "x" {}\n') + blocks = doc.blocks() + pred = parse_predicate('not has("tags")') + self.assertTrue(evaluate_predicate(pred, blocks[0])) diff --git a/test/unit/query/test_resolver.py b/test/unit/query/test_resolver.py new file mode 100644 index 00000000..63b38e49 --- /dev/null +++ b/test/unit/query/test_resolver.py @@ -0,0 +1,341 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.body import DocumentView +from hcl2.query.path import PathSegment, parse_path +from hcl2.query.resolver import resolve_path + + +class TestResolvePathStructural(TestCase): + def test_simple_attribute(self): + doc = DocumentView.parse("x = 1\n") + results = resolve_path(doc, parse_path("x")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "x") + + def test_block_type(self): + doc = DocumentView.parse('resource "type" "name" {}\n') + results = resolve_path(doc, parse_path("resource")) + self.assertEqual(len(results), 1) + + def test_block_type_with_label(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + results = resolve_path(doc, parse_path("resource.aws_instance")) + self.assertEqual(len(results), 1) + + def test_block_full_path(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + results = resolve_path(doc, parse_path("resource.aws_instance.main")) + self.assertEqual(len(results), 1) + + def test_block_attribute(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + results = resolve_path(doc, parse_path("resource.aws_instance.main.ami")) + self.assertEqual(len(results), 1) + + def test_wildcard_blocks(self): + doc = DocumentView.parse('resource "a" "b" {}\nvariable "c" {}\n') + results = resolve_path(doc, parse_path("*")) + self.assertEqual(len(results), 2) + + def test_select_all(self): + doc = DocumentView.parse('variable "a" {}\nvariable "b" {}\n') + results = resolve_path(doc, parse_path("variable[*]")) + self.assertEqual(len(results), 2) + + def test_index(self): + doc = DocumentView.parse('variable "a" {}\nvariable "b" {}\n') + results = resolve_path(doc, parse_path("variable[0]")) + self.assertEqual(len(results), 1) + + def test_no_match(self): + doc = DocumentView.parse("x = 1\n") + results = resolve_path(doc, parse_path("nonexistent")) + self.assertEqual(len(results), 0) + + def test_empty_segments(self): + doc = DocumentView.parse("x = 1\n") + results = resolve_path(doc, []) + self.assertEqual(len(results), 1) # returns root + + def test_label_mismatch(self): + doc = DocumentView.parse('resource "aws_instance" "main" {}\n') + results = resolve_path(doc, parse_path("resource.aws_s3_bucket")) + self.assertEqual(len(results), 0) + + def test_no_label_block(self): + doc = DocumentView.parse("locals {\n x = 1\n}\n") + results = resolve_path(doc, parse_path("locals.x")) + self.assertEqual(len(results), 1) + + def test_wildcard_labels(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {}\nresource "aws_s3_bucket" "data" {}\n' + ) + results = resolve_path(doc, parse_path("resource[*].*")) + self.assertEqual(len(results), 2) + + def test_attribute_unwrap_to_object(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + results = resolve_path(doc, parse_path("x.a")) + self.assertEqual(len(results), 1) + + def test_attribute_unwrap_to_object_wildcard(self): + doc = DocumentView.parse("x = {\n a = 1\n b = 2\n}\n") + results = resolve_path(doc, parse_path("x.*")) + self.assertEqual(len(results), 2) + + def test_tuple_select_all(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + results = resolve_path( + doc, + [ + PathSegment(name="x", select_all=False, index=None), + PathSegment(name="*", select_all=True, index=None), + ], + ) + self.assertEqual(len(results), 3) + + def test_tuple_index(self): + doc = DocumentView.parse("x = [1, 2, 3]\n") + results = resolve_path( + doc, + [ + PathSegment(name="x", select_all=False, index=None), + PathSegment(name="*", select_all=False, index=1), + ], + ) + self.assertEqual(len(results), 1) + + def test_tuple_index_out_of_range(self): + doc = DocumentView.parse("x = [1, 2]\n") + results = resolve_path( + doc, + [ + PathSegment(name="x", select_all=False, index=None), + PathSegment(name="*", select_all=False, index=99), + ], + ) + self.assertEqual(len(results), 0) + + def test_tuple_no_match_without_index(self): + doc = DocumentView.parse("x = [1, 2]\n") + results = resolve_path( + doc, + [ + PathSegment(name="x", select_all=False, index=None), + PathSegment(name="foo", select_all=False, index=None), + ], + ) + self.assertEqual(len(results), 0) + + def test_object_key_no_match(self): + doc = DocumentView.parse("x = {\n a = 1\n}\n") + results = resolve_path(doc, parse_path("x.nonexistent")) + self.assertEqual(len(results), 0) + + def test_wildcard_body_includes_attributes(self): + doc = DocumentView.parse("x = 1\ny = 2\n") + results = resolve_path(doc, parse_path("*")) + self.assertEqual(len(results), 2) + + def test_index_out_of_range_on_blocks(self): + doc = DocumentView.parse('variable "a" {}\n') + results = resolve_path(doc, parse_path("variable[99]")) + self.assertEqual(len(results), 0) + + def test_resolve_on_unknown_node_type(self): + doc = DocumentView.parse("x = 1\n") + attr = doc.attribute("x") + value_view = attr.value_node + results = resolve_path( + value_view, [PathSegment(name="foo", select_all=False, index=None)] + ) + self.assertEqual(len(results), 0) + + def test_block_labels_consumed_then_body(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + results = resolve_path(doc, parse_path("resource.aws_instance.main.ami")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "ami") + + +class TestResolveRecursive(TestCase): + def test_recursive_find_nested_attr(self): + hcl = 'resource "type" "name" {\n ami = "test"\n}\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("resource..ami")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "ami") + + def test_recursive_deeply_nested(self): + hcl = ( + 'resource "type" "name" {\n' + ' provisioner "local-exec" {\n' + ' command = "echo"\n' + " }\n" + "}\n" + ) + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("resource..command")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "command") + + def test_recursive_multiple_matches(self): + hcl = ( + 'resource "a" "x" {\n ami = "1"\n}\n' + 'resource "b" "y" {\n ami = "2"\n}\n' + ) + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..ami")) + self.assertEqual(len(results), 2) + + def test_recursive_no_match(self): + hcl = 'resource "type" "name" {\n ami = "test"\n}\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("resource..nonexistent")) + self.assertEqual(len(results), 0) + + def test_recursive_from_root(self): + hcl = 'resource "type" "name" {\n ami = "test"\n}\n' + doc = DocumentView.parse(hcl) + # ".." from root should search everything + results = resolve_path( + doc, + [PathSegment(name="ami", select_all=False, index=None, recursive=True)], + ) + self.assertEqual(len(results), 1) + + def test_recursive_with_select_all(self): + hcl = ( + 'resource "a" "x" {\n tag = "1"\n}\n' + 'resource "b" "y" {\n tag = "2"\n}\n' + ) + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..tag[*]")) + self.assertEqual(len(results), 2) + + +class TestTypeFilter(TestCase): + def test_recursive_function_call_by_name(self): + hcl = 'x = length(var.list)\ny = upper("hello")\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:length")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "length") + + def test_recursive_function_call_wildcard(self): + hcl = 'x = length(var.list)\ny = upper("hello")\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:*[*]")) + self.assertEqual(len(results), 2) + + def test_type_filter_attribute(self): + hcl = 'resource "a" "b" {}\nx = 1\n' + doc = DocumentView.parse(hcl) + results = resolve_path( + doc, + [ + PathSegment( + name="*", select_all=True, index=None, type_filter="attribute" + ) + ], + ) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "x") + + def test_type_filter_block(self): + hcl = 'resource "a" "b" {}\nx = 1\n' + doc = DocumentView.parse(hcl) + results = resolve_path( + doc, + [PathSegment(name="*", select_all=True, index=None, type_filter="block")], + ) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].block_type, "resource") + + def test_type_filter_no_match(self): + hcl = "x = 1\n" + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:length")) + self.assertEqual(len(results), 0) + + +class TestFunctionCallResolver(TestCase): + def test_function_call_args(self): + hcl = "x = length(var.list)\n" + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:length")) + self.assertEqual(len(results), 1) + # Navigate to args + args = resolve_path(results[0], parse_path("args")) + self.assertEqual(len(args), 1) + + def test_function_call_args_select_all(self): + hcl = 'x = join(",", var.list)\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:join")) + self.assertEqual(len(results), 1) + args = resolve_path( + results[0], + [PathSegment(name="args", select_all=True, index=None)], + ) + self.assertEqual(len(args), 2) + + def test_function_call_args_index(self): + hcl = 'x = join(",", var.list)\n' + doc = DocumentView.parse(hcl) + results = resolve_path(doc, parse_path("*..function_call:join")) + self.assertEqual(len(results), 1) + args = resolve_path( + results[0], + [PathSegment(name="args", select_all=False, index=0)], + ) + self.assertEqual(len(args), 1) + + +class TestSkipLabels(TestCase): + """Test the ``~`` (skip labels) operator.""" + + def test_skip_labels_basic(self): + doc = DocumentView.parse( + 'resource "aws_instance" "main" {\n ami = "test"\n}\n' + ) + results = resolve_path(doc, parse_path("resource~.ami")) + self.assertEqual(len(results), 1) + self.assertEqual(results[0].name, "ami") + + def test_skip_labels_wildcard(self): + doc = DocumentView.parse( + 'resource "a" "x" {\n ami = 1\n}\n' 'resource "b" "y" {\n ami = 2\n}\n' + ) + results = resolve_path(doc, parse_path("resource~[*]")) + self.assertEqual(len(results), 2) + + def test_skip_labels_with_select(self): + doc = DocumentView.parse('block "a" {\n x = 1\n}\nblock "b" {\n y = 2\n}\n') + results = resolve_path(doc, parse_path("block~[select(.x)]")) + self.assertEqual(len(results), 1) + + def test_skip_labels_delegates_to_body(self): + doc = DocumentView.parse('resource "aws" "main" {\n tags = {}\n}\n') + # Without ~ : need to consume labels + r1 = resolve_path(doc, parse_path("resource.aws.main.tags")) + self.assertEqual(len(r1), 1) + # With ~ : skip labels directly + r2 = resolve_path(doc, parse_path("resource~.tags")) + self.assertEqual(len(r2), 1) + + def test_no_skip_labels_matches_labels(self): + doc = DocumentView.parse('resource "aws_instance" "main" {\n ami = 1\n}\n') + # Without ~, "aws_instance" matches the label + results = resolve_path(doc, parse_path("resource.aws_instance")) + self.assertEqual(len(results), 1) diff --git a/test/unit/query/test_safe_eval.py b/test/unit/query/test_safe_eval.py new file mode 100644 index 00000000..3bc071cc --- /dev/null +++ b/test/unit/query/test_safe_eval.py @@ -0,0 +1,127 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.query.safe_eval import ( + UnsafeExpressionError, + safe_eval, + validate_expression, +) + + +class TestValidateExpression(TestCase): + def test_simple_attribute(self): + validate_expression("x.foo") + + def test_method_call(self): + validate_expression("x.blocks('resource')") + + def test_safe_builtin(self): + validate_expression("len(x)") + + def test_lambda(self): + validate_expression("sorted(x, key=lambda b: b.name)") + + def test_comparison(self): + validate_expression("x == 1") + + def test_boolean_ops(self): + validate_expression("x and y or not z") + + def test_subscript(self): + validate_expression("x[0]") + + def test_constant(self): + validate_expression("42") + + def test_rejects_import(self): + with self.assertRaises(UnsafeExpressionError): + validate_expression("__import__('os')") + + def test_rejects_exec(self): + with self.assertRaises(UnsafeExpressionError): + validate_expression("exec('code')") + + def test_rejects_eval(self): + with self.assertRaises(UnsafeExpressionError): + validate_expression("eval('code')") + + def test_rejects_comprehension(self): + with self.assertRaises(UnsafeExpressionError): + validate_expression("[x for x in y]") + + def test_syntax_error(self): + with self.assertRaises(UnsafeExpressionError): + validate_expression("def foo(): pass") + + +class TestSafeEval(TestCase): + def test_attribute_access(self): + class Obj: + name = "test_value" + + result = safe_eval("x.name", {"x": Obj()}) + self.assertEqual(result, "test_value") + + def test_method_call(self): + result = safe_eval("x.upper()", {"x": "hello"}) + self.assertEqual(result, "HELLO") + + def test_len(self): + result = safe_eval("len(x)", {"x": [1, 2, 3]}) + self.assertEqual(result, 3) + + def test_sorted(self): + result = safe_eval("sorted(x)", {"x": [3, 1, 2]}) + self.assertEqual(result, [1, 2, 3]) + + def test_sorted_with_key(self): + result = safe_eval( + "sorted(x, key=lambda i: -i)", + {"x": [3, 1, 2]}, + ) + self.assertEqual(result, [3, 2, 1]) + + def test_subscript(self): + result = safe_eval("x[1]", {"x": [10, 20, 30]}) + self.assertEqual(result, 20) + + def test_filter_lambda(self): + result = safe_eval( + "list(filter(lambda i: i > 1, x))", + {"x": [1, 2, 3]}, + ) + self.assertEqual(result, [2, 3]) + + def test_boolean_ops(self): + result = safe_eval("x and y", {"x": True, "y": False}) + self.assertFalse(result) + + def test_comparison(self): + result = safe_eval("x == 42", {"x": 42}) + self.assertTrue(result) + + def test_restricted_no_builtins(self): + with self.assertRaises(Exception): + safe_eval("open('/etc/passwd')", {}) + + def test_max_depth(self): + # Build deeply nested attribute access + expr = "x" + ".a" * 25 + with self.assertRaises(UnsafeExpressionError) as ctx: + validate_expression(expr) + self.assertIn("depth", str(ctx.exception)) + + def test_max_node_count(self): + # Build expression with many nodes via a wide function call + # f(1,2,...,210) has 210 Constant + 210 arg nodes + Call + Name + Expression > 200 + args = ", ".join(["1"] * 210) + expr = f"len([{args}])" + with self.assertRaises(UnsafeExpressionError) as ctx: + validate_expression(expr) + self.assertIn("node count", str(ctx.exception)) + + def test_rejects_non_attr_non_name_call(self): + # (lambda: 1)() — Call where func is a Lambda, not Name/Attribute + with self.assertRaises(UnsafeExpressionError) as ctx: + validate_expression("(lambda: 1)()") + self.assertIn("Only method calls", str(ctx.exception)) diff --git a/test/unit/rules/test_whitespace.py b/test/unit/rules/test_whitespace.py index 49fde824..d182b789 100644 --- a/test/unit/rules/test_whitespace.py +++ b/test/unit/rules/test_whitespace.py @@ -49,35 +49,41 @@ def test_to_list_bare_newline_returns_none(self): def test_to_list_line_comment(self): rule = _make_nlc("// my comment\n") result = rule.to_list() - self.assertEqual(result, ["my comment"]) + self.assertEqual(result, [{"value": "my comment"}]) def test_to_list_hash_comment(self): rule = _make_nlc("# my comment\n") result = rule.to_list() - self.assertEqual(result, ["my comment"]) + self.assertEqual(result, [{"value": "my comment"}]) def test_to_list_block_comment(self): rule = _make_nlc("/* block comment */\n") result = rule.to_list() - self.assertEqual(result, ["block comment"]) + self.assertEqual(result, [{"value": "block comment"}]) def test_to_list_line_comment_ending_in_block_close(self): """A // comment ending in */ should preserve the */ suffix.""" rule = _make_nlc("// comment ending in */\n") result = rule.to_list() - self.assertEqual(result, ["comment ending in */"]) + self.assertEqual(result, [{"value": "comment ending in */"}]) def test_to_list_hash_comment_ending_in_block_close(self): """A # comment ending in */ should preserve the */ suffix.""" rule = _make_nlc("# comment ending in */\n") result = rule.to_list() - self.assertEqual(result, ["comment ending in */"]) + self.assertEqual(result, [{"value": "comment ending in */"}]) + + def test_to_list_multiline_block_comment(self): + """A multiline block comment should be a single value.""" + rule = _make_nlc("/* \nline one\nline two\n*/\n") + result = rule.to_list() + self.assertEqual(result, [{"value": "line one\nline two"}]) def test_to_list_multiple_comments(self): rule = _make_nlc("// first\n// second\n") result = rule.to_list() - self.assertIn("first", result) - self.assertIn("second", result) + self.assertIn({"value": "first"}, result) + self.assertIn({"value": "second"}, result) def test_token_property(self): token = NL_OR_COMMENT("\n") @@ -119,7 +125,7 @@ def test_inline_comments_collects_from_children(self): rule = ConcreteInlineComment([NAME("x"), comment]) result = rule.inline_comments() - self.assertEqual(result, ["hello"]) + self.assertEqual(result, [{"value": "hello"}]) def test_inline_comments_skips_bare_newlines(self): newline = _make_nlc("\n") @@ -133,7 +139,7 @@ def test_inline_comments_recursive(self): inner = ConcreteInlineComment([comment]) outer = ConcreteInlineComment([inner]) result = outer.inline_comments() - self.assertEqual(result, ["inner"]) + self.assertEqual(result, [{"value": "inner"}]) def test_inline_comments_empty(self): diff --git a/test/unit/test_api.py b/test/unit/test_api.py index 9f6c9dbd..7a4bde0d 100644 --- a/test/unit/test_api.py +++ b/test/unit/test_api.py @@ -18,6 +18,7 @@ reconstruct, transform, serialize, + query, ) from hcl2.deserializer import DeserializerOptions from hcl2.formatter import FormatterOptions @@ -279,3 +280,22 @@ def test_dumps_on_non_dict_returns_empty(self): def test_from_json_raises_on_invalid_json(self): with self.assertRaises(Exception): from_json("{not valid json") + + +class TestQuery(TestCase): + def test_query_string(self): + from hcl2.query.body import DocumentView + + result = query(SIMPLE_HCL) + self.assertIsInstance(result, DocumentView) + attr = result.attribute("x") + self.assertIsNotNone(attr) + + def test_query_file_object(self): + from hcl2.query.body import DocumentView + + f = StringIO(SIMPLE_HCL) + result = query(f) + self.assertIsInstance(result, DocumentView) + attr = result.attribute("x") + self.assertIsNotNone(attr) diff --git a/test/unit/test_walk.py b/test/unit/test_walk.py new file mode 100644 index 00000000..ec81bb48 --- /dev/null +++ b/test/unit/test_walk.py @@ -0,0 +1,167 @@ +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.rules.base import AttributeRule, BlockRule, BodyRule, StartRule +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE, NL_OR_COMMENT +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext +from hcl2.walk import ( + ancestors, + find_all, + find_by_predicate, + find_first, + walk, + walk_rules, + walk_semantic, +) + + +class StubExpression(ExpressionRule): + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_expr_term(value): + return ExprTermRule([StubExpression(value)]) + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_attribute(name, value): + return AttributeRule([_make_identifier(name), EQ(), _make_expr_term(value)]) + + +def _make_block(labels, body_children=None): + body = BodyRule(body_children or []) + children = list(labels) + [LBRACE(), body, RBRACE()] + return BlockRule(children) + + +class TestWalk(TestCase): + def test_walk_single_node(self): + attr = _make_attribute("x", 1) + nodes = list(walk(attr)) + self.assertIn(attr, nodes) + self.assertTrue(len(nodes) > 1) + + def test_walk_skips_none(self): + attr = _make_attribute("x", 1) + nodes = list(walk(attr)) + self.assertTrue(all(n is not None for n in nodes)) + + def test_walk_includes_tokens(self): + from hcl2.rules.abstract import LarkToken + + attr = _make_attribute("x", 1) + nodes = list(walk(attr)) + has_token = any(isinstance(n, LarkToken) for n in nodes) + self.assertTrue(has_token) + + +class TestWalkRules(TestCase): + def test_only_rules(self): + from hcl2.rules.abstract import LarkRule, LarkToken + + attr = _make_attribute("x", 1) + rules = list(walk_rules(attr)) + for r in rules: + self.assertIsInstance(r, LarkRule) + self.assertNotIsInstance(r, LarkToken) + + +class TestWalkSemantic(TestCase): + def test_no_whitespace(self): + nlc = _make_nlc("\n") + body = BodyRule([nlc, _make_attribute("x", 1)]) + rules = list(walk_semantic(body)) + for r in rules: + self.assertNotIsInstance(r, NewLineOrCommentRule) + + def test_finds_attribute(self): + body = BodyRule([_make_attribute("x", 1)]) + rules = list(walk_semantic(body)) + self.assertTrue(any(isinstance(r, AttributeRule) for r in rules)) + + +class TestFindAll(TestCase): + def test_finds_all_attributes(self): + body = BodyRule([_make_attribute("x", 1), _make_attribute("y", 2)]) + start = StartRule([body]) + attrs = list(find_all(start, AttributeRule)) + self.assertEqual(len(attrs), 2) + + def test_finds_nested(self): + BodyRule([_make_attribute("inner", 1)]) # unused but creates parent refs + block = _make_block( + [_make_identifier("resource")], [_make_attribute("outer", 2)] + ) + outer_body = BodyRule([block]) + start = StartRule([outer_body]) + attrs = list(find_all(start, AttributeRule)) + self.assertEqual(len(attrs), 1) # only outer, inner is in block's body + + def test_finds_blocks(self): + block = _make_block([_make_identifier("resource")]) + body = BodyRule([block]) + start = StartRule([body]) + blocks = list(find_all(start, BlockRule)) + self.assertEqual(len(blocks), 1) + + +class TestFindFirst(TestCase): + def test_finds_first(self): + body = BodyRule([_make_attribute("x", 1), _make_attribute("y", 2)]) + start = StartRule([body]) + attr = find_first(start, AttributeRule) + self.assertIsNotNone(attr) + self.assertEqual(attr.identifier.serialize(), "x") + + def test_returns_none(self): + body = BodyRule([]) + start = StartRule([body]) + result = find_first(start, AttributeRule) + self.assertIsNone(result) + + +class TestFindByPredicate(TestCase): + def test_predicate(self): + attr1 = _make_attribute("x", 1) + attr2 = _make_attribute("y", 2) + body = BodyRule([attr1, attr2]) + found = list( + find_by_predicate( + body, + lambda n: isinstance(n, AttributeRule) + and n.identifier.serialize() == "x", + ) + ) + self.assertEqual(len(found), 1) + self.assertIs(found[0], attr1) + + +class TestAncestors(TestCase): + def test_parent_chain(self): + attr = _make_attribute("x", 1) + body = BodyRule([attr]) + start = StartRule([body]) + chain = list(ancestors(attr)) + self.assertEqual(chain[0], body) + self.assertEqual(chain[1], start) + + def test_empty_for_root(self): + body = BodyRule([]) + start = StartRule([body]) + chain = list(ancestors(start)) + self.assertEqual(len(chain), 0)