From d04a4c78b4f4bf44870d6b84cf22321b61757e07 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 11 Mar 2026 20:07:04 +0100 Subject: [PATCH] Fix pylint issues: line length in strings.py, top-level import in test_specialized.py Co-Authored-By: Claude Opus 4.6 --- hcl2/deserializer.py | 25 + hcl2/hcl2.lark | 31 +- hcl2/reconstructor.py | 54 +++ hcl2/rules/directives.py | 429 ++++++++++++++++++ hcl2/rules/strings.py | 57 ++- hcl2/rules/tokens.py | 7 + hcl2/transformer.py | 118 +++++ .../specialized/template_directives.json | 14 + .../specialized/template_directives.tf | 12 + .../template_directives_reconstructed.tf | 12 + .../template_directives_reserialized.json | 14 + test/integration/test_specialized.py | 54 +++ test/unit/rules/test_directives.py | 187 ++++++++ 13 files changed, 1005 insertions(+), 9 deletions(-) create mode 100644 hcl2/rules/directives.py create mode 100644 test/integration/specialized/template_directives.json create mode 100644 test/integration/specialized/template_directives.tf create mode 100644 test/integration/specialized/template_directives_reconstructed.tf create mode 100644 test/integration/specialized/template_directives_reserialized.json create mode 100644 test/unit/rules/test_directives.py diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 5e282503..1944c8cf 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,4 +1,5 @@ """Deserialize Python dicts (or JSON) into LarkElement trees.""" + import json import re from abc import ABC, abstractmethod @@ -189,6 +190,13 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule: return IdentifierRule([NAME(value)]) def _deserialize_string(self, value: str) -> StringRule: + # If the string contains template directives, delegate to parser + inner = value[1:-1] if value.startswith('"') and value.endswith('"') else value + # Check for unescaped %{ (i.e. %{ not preceded by another %) + stripped = inner.replace("%%{", "") + if "%{" in stripped: + return self._deserialize_string_via_parser(value) + result = [] # split string into individual parts based on lark grammar # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] @@ -210,6 +218,23 @@ def _deserialize_string(self, value: str) -> StringRule: return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + def _deserialize_string_via_parser(self, value: str) -> StringRule: + """Deserialize a string containing template directives by parsing it.""" + # Ensure the value is quoted + if not (value.startswith('"') and value.endswith('"')): + value = f'"{value}"' + snippet = f"temp = {value}" + parsed_tree = _get_parser().parse(snippet) + rules_tree = self._transformer.transform(parsed_tree) + # Extract the string from: start -> body -> attribute -> expression -> string + expr = rules_tree.body.children[0].expression + # The expression is an ExprTermRule wrapping a StringRule + for child in expr.children: + if isinstance(child, StringRule): + return child + # Fallback: shouldn't happen, but return as-is + return expr # type: ignore[return-value] + def _deserialize_string_part(self, value: str) -> StringPartRule: if value.startswith("$${") and value.endswith("}"): return StringPartRule([ESCAPED_INTERPOLATION(value)]) diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index a5c5fba6..6a061202 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -10,12 +10,16 @@ IF : "if" IN : "in" FOR : "for" FOR_EACH : "for_each" +ELSE : "else" +ENDIF : "endif" +ENDFOR : "endfor" // Literals NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +ESCAPED_DIRECTIVE.2: /%%\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)(?!%%\{)(?!%\{)[^"\\]|\\.|(?:\$(?!\$?\{))|(?:%(?!%?\{)))+/ DECIMAL : "0".."9" NEGATIVE_DECIMAL : "-" DECIMAL EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ @@ -51,11 +55,16 @@ COMMA : "," DOT : "." EQ : /[ \t]*=(?!=|>)/ COLON : /[ \t]*:(?!:)/ -DBLQUOTE : "\"" +DBLQUOTE : /\\?"/ +TEMPLATE_STRING.3 : /\\\\"(?:[^"\\\\]|\\\\.)*\\\\"/ // Interpolation INTERP_START : "${" +// Template Directives +DIRECTIVE_START : "%{" +STRIP_MARKER : "~" + // Splat Operators ATTR_SPLAT : ".*" FULL_SPLAT_START : "[*]" @@ -90,19 +99,32 @@ new_line_or_comment: ( NL_OR_COMMENT )+ // Basic literals and identifiers identifier : NAME -keyword: IN | FOR | IF | FOR_EACH +keyword: IN | FOR | IF | FOR_EACH | ELSE | ENDIF | ENDFOR int_lit: INT_LITERAL float_lit: FLOAT_LITERAL string: DBLQUOTE string_part* DBLQUOTE string_part: STRING_CHARS | ESCAPED_INTERPOLATION + | ESCAPED_DIRECTIVE | interpolation + | template_if_start + | template_else + | template_endif + | template_for_start + | template_endfor // Expressions ?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional | or_expr interpolation: INTERP_START expression RBRACE +// Template directives (flat rules — transformer assembles if/for structure) +template_if_start: DIRECTIVE_START STRIP_MARKER? IF expression STRIP_MARKER? RBRACE +template_else: DIRECTIVE_START STRIP_MARKER? ELSE STRIP_MARKER? RBRACE +template_endif: DIRECTIVE_START STRIP_MARKER? ENDIF STRIP_MARKER? RBRACE +template_for_start: DIRECTIVE_START STRIP_MARKER? FOR identifier (COMMA identifier)? IN expression STRIP_MARKER? RBRACE +template_endfor: DIRECTIVE_START STRIP_MARKER? ENDFOR STRIP_MARKER? RBRACE + // Operator precedence ladder (lowest to highest) // Each level uses left recursion for left-associativity. // Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain @@ -160,6 +182,7 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit | string + | template_string | tuple | object | identifier @@ -173,6 +196,8 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | for_tuple_expr | for_object_expr +template_string : TEMPLATE_STRING + // Collections tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 2a9ca708..242ac09f 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,10 +1,20 @@ """Reconstruct HCL2 text from a Lark Tree AST.""" + from typing import List, Optional, Union from lark import Tree, Token from hcl2.rules import tokens from hcl2.rules.base import BlockRule from hcl2.rules.containers import ObjectElemRule +from hcl2.rules.directives import ( + TemplateIfRule, + TemplateForRule, + TemplateIfStartRule, + TemplateElseRule, + TemplateEndifRule, + TemplateForStartRule, + TemplateEndforRule, +) from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule @@ -100,6 +110,50 @@ def _should_add_space_before( return False return True + # Template directive spacing: %{~ keyword ~} patterns + _directive_rules = ( + TemplateIfStartRule.lark_name(), + TemplateElseRule.lark_name(), + TemplateEndifRule.lark_name(), + TemplateForStartRule.lark_name(), + TemplateEndforRule.lark_name(), + TemplateIfRule.lark_name(), + TemplateForRule.lark_name(), + ) + if parent_rule_name in _directive_rules: + # Space after DIRECTIVE_START (before keyword or strip marker) + if self._last_token_name == tokens.DIRECTIVE_START.lark_name(): + # No space before strip marker + if token_type == tokens.STRIP_MARKER.lark_name(): + return False + return True + # Space after STRIP_MARKER (before keyword) + if self._last_token_name == tokens.STRIP_MARKER.lark_name(): + # After strip marker: space before keyword, no space before RBRACE + if token_type == tokens.RBRACE.lark_name(): + return False + return True + # Space after keywords + if self._last_token_name in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ]: + return True + # Space before IN keyword (after identifier) + if token_type == tokens.IN.lark_name(): + return True + # Space before STRIP_MARKER (before closing }) + if token_type == tokens.STRIP_MARKER.lark_name(): + return True + # Space before RBRACE (closing directive, no strip marker) + if token_type == tokens.RBRACE.lark_name(): + return True + # Space after COMMA in for directives + if self._last_token_name == tokens.COMMA.lark_name(): + return True + return False + if token_type in [ tokens.FOR.lark_name(), tokens.IN.lark_name(), diff --git a/hcl2/rules/directives.py b/hcl2/rules/directives.py new file mode 100644 index 00000000..ff9cc532 --- /dev/null +++ b/hcl2/rules/directives.py @@ -0,0 +1,429 @@ +"""Rule classes for HCL2 template directives (%{if}, %{for}).""" + +from typing import Any, List, Optional, Tuple + +from lark.tree import Meta + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( + DIRECTIVE_START, + STRIP_MARKER, + IF, + ELSE, + ENDIF, + FOR, + IN, + ENDFOR, + COMMA, + RBRACE, + StaticStringToken, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +def _is_strip(child) -> bool: + """Check if a child is a STRIP_MARKER token.""" + return isinstance(child, StaticStringToken) and child.lark_name() == "STRIP_MARKER" + + +def _strip_prefix(is_strip: bool) -> str: + """Return strip-marker prefix string for directive serialization.""" + return "~ " if is_strip else " " + + +def _strip_suffix(is_strip: bool) -> str: + """Return strip-marker suffix string for directive serialization.""" + return " ~" if is_strip else " " + + +def _insert_strip_optionals(children: List, indexes: List[int]): + """Insert None placeholders at positions where optional STRIP_MARKER may appear.""" + for index in sorted(indexes): + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not _is_strip(child): + children.insert(index, None) + + +class TemplateIfStartRule(LarkRule): + """Rule for %{if condition} opening directive.""" + + _children_layout: Tuple[ + DIRECTIVE_START, + Optional[STRIP_MARKER], + IF, + ExpressionRule, + Optional[STRIP_MARKER], + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + _insert_strip_optionals(children, [1, 4]) + super().__init__(children, meta) + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_if_start" + + @property + def strip_open(self) -> bool: + """Check if there's a strip marker after %{.""" + return self._children[1] is not None + + @property + def condition(self) -> ExpressionRule: + """Return the condition expression.""" + return self._children[3] + + @property + def strip_close(self) -> bool: + """Check if there's a strip marker before }.""" + return self._children[4] is not None + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize to %{ if EXPR } or %{~ if EXPR ~}.""" + with context.modify(inside_dollar_string=True): + cond_str = self.condition.serialize(options, context) + prefix = _strip_prefix(self.strip_open) + suffix = _strip_suffix(self.strip_close) + return f"%{{{prefix}if {cond_str}{suffix}}}" + + +class TemplateElseRule(LarkRule): + """Rule for %{else} directive.""" + + _children_layout: Tuple[ + DIRECTIVE_START, + Optional[STRIP_MARKER], + ELSE, + Optional[STRIP_MARKER], + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + _insert_strip_optionals(children, [1, 3]) + super().__init__(children, meta) + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_else" + + @property + def strip_open(self) -> bool: + """Check if there's a strip marker after %{.""" + return self._children[1] is not None + + @property + def strip_close(self) -> bool: + """Check if there's a strip marker before }.""" + return self._children[3] is not None + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize to %{ else } or %{~ else ~}.""" + prefix = _strip_prefix(self.strip_open) + suffix = _strip_suffix(self.strip_close) + return f"%{{{prefix}else{suffix}}}" + + +class TemplateEndifRule(LarkRule): + """Rule for %{endif} directive.""" + + _children_layout: Tuple[ + DIRECTIVE_START, + Optional[STRIP_MARKER], + ENDIF, + Optional[STRIP_MARKER], + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + _insert_strip_optionals(children, [1, 3]) + super().__init__(children, meta) + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_endif" + + @property + def strip_open(self) -> bool: + """Check if there's a strip marker after %{.""" + return self._children[1] is not None + + @property + def strip_close(self) -> bool: + """Check if there's a strip marker before }.""" + return self._children[3] is not None + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize to %{ endif } or %{~ endif ~}.""" + prefix = _strip_prefix(self.strip_open) + suffix = _strip_suffix(self.strip_close) + return f"%{{{prefix}endif{suffix}}}" + + +class TemplateForStartRule(LarkRule): + """Rule for %{for VAR in EXPR} opening directive.""" + + _children_layout: Tuple[ + DIRECTIVE_START, + Optional[STRIP_MARKER], + FOR, + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + IN, + ExpressionRule, + Optional[STRIP_MARKER], + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + self._setup_optionals(children) + super().__init__(children, meta) + + def _setup_optionals(self, children: List): + """Insert None placeholders for optional strip markers and second iterator. + + Parser output varies: + [DIRECTIVE_START, STRIP?, FOR, id, (COMMA, id)?, IN, expr, STRIP?, RBRACE] + Target layout (10 positions): + [0:DIRECTIVE_START, 1:STRIP?, 2:FOR, 3:id, 4:COMMA?, 5:id?, 6:IN, 7:expr, 8:STRIP?, 9:RBRACE] + """ + # Step 1: Insert strip_open placeholder at position 1 + _insert_strip_optionals(children, [1]) + + # Step 2: Handle optional comma + second identifier + # After step 1, FOR is at index 2, first identifier at 3. + # Count identifiers before IN to distinguish iterator(s) from collection + ids_before_in = [] + for child in children: + if isinstance(child, StaticStringToken) and child.lark_name() == "IN": + break + if isinstance(child, IdentifierRule): + ids_before_in.append(child) + if len(ids_before_in) < 2: + # No second iterator — insert None for COMMA and second id at 4, 5 + children.insert(4, None) + children.insert(5, None) + + # Step 3: Insert strip_close placeholder at position 8 + _insert_strip_optionals(children, [8]) + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_for_start" + + @property + def strip_open(self) -> bool: + """Check if there's a strip marker after %{.""" + return self._children[1] is not None + + @property + def strip_close(self) -> bool: + """Check if there's a strip marker before }.""" + return self._children[8] is not None + + @property + def iterator(self) -> IdentifierRule: + """Return the first iterator identifier.""" + return self._children[3] + + @property + def key_iterator(self) -> Optional[IdentifierRule]: + """Return the second iterator identifier, or None.""" + return self._children[5] + + @property + def collection(self) -> ExpressionRule: + """Return the collection expression after IN.""" + return self._children[7] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize to %{ for VAR in EXPR } or %{~ for VAR in EXPR ~}.""" + prefix = _strip_prefix(self.strip_open) + suffix = _strip_suffix(self.strip_close) + with context.modify(inside_dollar_string=True): + iter_str = self.iterator.serialize(options, context) + if self.key_iterator is not None: + iter_str += f", {self.key_iterator.serialize(options, context)}" + coll_str = self.collection.serialize(options, context) + return f"%{{{prefix}for {iter_str} in {coll_str}{suffix}}}" + + +class TemplateEndforRule(LarkRule): + """Rule for %{endfor} directive.""" + + _children_layout: Tuple[ + DIRECTIVE_START, + Optional[STRIP_MARKER], + ENDFOR, + Optional[STRIP_MARKER], + RBRACE, + ] + + def __init__(self, children, meta: Optional[Meta] = None): + _insert_strip_optionals(children, [1, 3]) + super().__init__(children, meta) + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_endfor" + + @property + def strip_open(self) -> bool: + """Check if there's a strip marker after %{.""" + return self._children[1] is not None + + @property + def strip_close(self) -> bool: + """Check if there's a strip marker before }.""" + return self._children[3] is not None + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize to %{ endfor } or %{~ endfor ~}.""" + prefix = _strip_prefix(self.strip_open) + suffix = _strip_suffix(self.strip_close) + return f"%{{{prefix}endfor{suffix}}}" + + +class TemplateIfRule(LarkRule): + """Assembled rule for a complete %{if}...%{else}...%{endif} template. + + This is NOT produced by the parser directly — it is assembled by the + transformer from flat TemplateIfStartRule/TemplateElseRule/TemplateEndifRule + and interleaved StringPartRule children. + """ + + _children_layout: Tuple[ + TemplateIfStartRule, + # ... variable number of body StringPartRules ... + # Optional[TemplateElseRule], + # ... variable number of else body StringPartRules ... + TemplateEndifRule, + ] + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_if" + + def __init__( # pylint: disable=R0917 + self, + if_start: TemplateIfStartRule, + if_body: list, + else_rule: Optional[TemplateElseRule], + else_body: Optional[list], + endif: TemplateEndifRule, + meta: Optional[Meta] = None, + ): + self._if_start = if_start + self._if_body = if_body + self._else_rule = else_rule + self._else_body = else_body or [] + self._endif = endif + + # Build children list for to_lark + children = [if_start, *if_body] + if else_rule is not None: + children.extend([else_rule, *self._else_body]) + children.append(endif) + super().__init__(children, meta) + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize the full if/else/endif directive.""" + result = self._if_start.serialize(options, context) + for part in self._if_body: + result += part.serialize(options, context) + if self._else_rule is not None: + result += self._else_rule.serialize(options, context) + for part in self._else_body: + result += part.serialize(options, context) + result += self._endif.serialize(options, context) + return result + + def to_lark(self): + """Convert back to flat sequence of Lark trees for reconstruction.""" + result_children = [] + result_children.extend(self._if_start.to_lark().children) + for part in self._if_body: + result_children.append(part.to_lark()) + if self._else_rule is not None: + result_children.extend(self._else_rule.to_lark().children) + for part in self._else_body: + result_children.append(part.to_lark()) + result_children.extend(self._endif.to_lark().children) + from lark import Tree # pylint: disable=C0415 + + return Tree("template_if", result_children, meta=self._meta) + + +class TemplateForRule(LarkRule): + """Assembled rule for a complete %{for}...%{endfor} template.""" + + _children_layout: Tuple[ + TemplateForStartRule, + # ... variable number of body StringPartRules ... + TemplateEndforRule, + ] + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_for" + + def __init__( + self, + for_start: TemplateForStartRule, + body: list, + endfor: TemplateEndforRule, + meta: Optional[Meta] = None, + ): + self._for_start = for_start + self._body = body + self._endfor = endfor + + children = [for_start, *body, endfor] + super().__init__(children, meta) + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize the full for/endfor directive.""" + result = self._for_start.serialize(options, context) + for part in self._body: + result += part.serialize(options, context) + result += self._endfor.serialize(options, context) + return result + + def to_lark(self): + """Convert back to flat sequence of Lark trees for reconstruction.""" + result_children = [] + result_children.extend(self._for_start.to_lark().children) + for part in self._body: + result_children.append(part.to_lark()) + result_children.extend(self._endfor.to_lark().children) + from lark import Tree # pylint: disable=C0415 + + return Tree("template_for", result_children, meta=self._meta) diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index 35e6feea..c71aeb87 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -11,6 +11,8 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, + ESCAPED_DIRECTIVE, + TEMPLATE_STRING, HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) @@ -50,10 +52,14 @@ def serialize( class StringPartRule(LarkRule): - """Rule for a single part of a string (literal text, escape, or interpolation).""" - - _children_layout: Tuple[ - Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule] + """Rule for a single part of a string (literal text, escape, interpolation, or directive).""" + + # Content may be a plain token (STRING_CHARS, ESCAPED_INTERPOLATION, + # ESCAPED_DIRECTIVE), an InterpolationRule, or a template directive rule + # (TemplateIfRule, TemplateForRule, and flat variants). Forward refs are + # quoted to avoid circular imports. + _children_layout: Tuple[ # type: ignore[type-arg] + Union[STRING_CHARS, ESCAPED_INTERPOLATION, ESCAPED_DIRECTIVE, InterpolationRule] ] @staticmethod @@ -62,8 +68,8 @@ def lark_name() -> str: return "string_part" @property - def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: - """Return the content element (string chars, escape, or interpolation).""" + def content(self): + """Return the content element (string chars, escape, interpolation, or directive).""" return self._children[0] def serialize( @@ -185,3 +191,42 @@ def serialize( if options.strip_string_quotes: return inner return '"' + inner + '"' + + +class TemplateStringRule(LarkRule): + """Rule for escaped-quote-delimited strings in template expressions (\\\"...\\\" ).""" + + _children_layout: Tuple[TEMPLATE_STRING] + + @staticmethod + def lark_name() -> str: + """Return the grammar rule name.""" + return "template_string" + + @property + def raw_value(self) -> str: + """Return the raw token value including escaped quotes.""" + return str(self._children[0].value) + + @property + def inner_value(self) -> str: + """Return the string content without the escaped quote delimiters.""" + raw = self.raw_value + # Strip leading \" and trailing \" + if raw.startswith('\\"') and raw.endswith('\\"'): + return raw[2:-2] + return raw + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + """Serialize preserving escaped-quote delimiters for round-trip fidelity. + + Inside template directive expressions, strings are delimited by \\" + rather than plain ". We preserve these as \\" in serialized form so + the deserializer can reconstruct them correctly. + """ + raw = self.raw_value + if options.strip_string_quotes: + return self.inner_value + return raw diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index dab5ac4f..c182e62c 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -93,6 +93,8 @@ def serialize_conversion(self) -> Callable[[Any], str]: NAME = StringToken["NAME"] # type: ignore STRING_CHARS = StringToken["STRING_CHARS"] # type: ignore ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] # type: ignore +ESCAPED_DIRECTIVE = StringToken["ESCAPED_DIRECTIVE"] # type: ignore +TEMPLATE_STRING = StringToken["TEMPLATE_STRING"] # type: ignore BINARY_OP = StringToken["BINARY_OP"] # type: ignore HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] # type: ignore HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] # type: ignore @@ -111,6 +113,8 @@ def serialize_conversion(self) -> Callable[[Any], str]: LSQB = StaticStringToken[("LSQB", "[")] # type: ignore RSQB = StaticStringToken[("RSQB", "]")] # type: ignore INTERP_START = StaticStringToken[("INTERP_START", "${")] # type: ignore +DIRECTIVE_START = StaticStringToken[("DIRECTIVE_START", "%{")] # type: ignore +STRIP_MARKER = StaticStringToken[("STRIP_MARKER", "~")] # type: ignore DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] # type: ignore ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] # type: ignore FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] # type: ignore @@ -118,6 +122,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: IN = StaticStringToken[("IN", "in")] # type: ignore IF = StaticStringToken[("IF", "if")] # type: ignore FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] # type: ignore +ELSE = StaticStringToken[("ELSE", "else")] # type: ignore +ENDIF = StaticStringToken[("ENDIF", "endif")] # type: ignore +ENDFOR = StaticStringToken[("ENDFOR", "endfor")] # type: ignore # pylint: enable=invalid-name diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 7b067462..73146514 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -1,4 +1,5 @@ """Transform Lark parse trees into typed LarkElement rule trees.""" + # pylint: disable=missing-function-docstring,unused-argument from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -54,6 +55,16 @@ StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, + TemplateStringRule, +) +from hcl2.rules.directives import ( + TemplateIfRule, + TemplateForRule, + TemplateIfStartRule, + TemplateElseRule, + TemplateEndifRule, + TemplateForStartRule, + TemplateEndforRule, ) from hcl2.rules.tokens import ( NAME, @@ -87,6 +98,11 @@ def __default_token__(self, token: Token) -> StringToken: # original alignment. The reconstructor skips its own space insertion # when the EQ token already carries leading whitespace. + # Don't convert STRING_CHARS or ESCAPED_* tokens to static tokens. + # E.g., STRING_CHARS("=") must stay STRING_CHARS, not become EQ. + if token.type in ("STRING_CHARS", "ESCAPED_INTERPOLATION", "ESCAPED_DIRECTIVE"): + return StringToken[token.type](value) # type: ignore[misc] + if value in StaticStringToken.classes_by_value: return StaticStringToken.classes_by_value[value]() return StringToken[token.type](value) # type: ignore[misc] @@ -148,8 +164,86 @@ def float_lit(self, meta: Meta, args) -> FloatLitRule: @v_args(meta=True) def string(self, meta: Meta, args) -> StringRule: + # Assemble flat directive parts into nested TemplateIfRule/TemplateForRule + args = self._assemble_directives(list(args), meta) return StringRule(args, meta) + def _assemble_directives(self, parts, meta: Meta): + """Assemble flat directive string_parts into nested template rules. + + Scans for TemplateIfStartRule/TemplateForStartRule within StringPartRules + and collects children up to matching endif/endfor, handling nesting. + """ + result = [] + i = 0 + while i < len(parts): + assembled, i = self._try_assemble_nested(parts, i, meta) + if assembled is not None: + result.append(StringPartRule([assembled], meta)) + else: + result.append(parts[i]) + i += 1 + return result + + def _try_assemble_nested(self, parts, idx, meta): + """If parts[idx] starts a directive, assemble and return (rule, next_idx). + + Returns (None, idx) if parts[idx] is not a directive opener. + """ + part = parts[idx] + if isinstance(part, StringPartRule): + content = part.content + if isinstance(content, TemplateIfStartRule): + return self._assemble_template_if(parts, idx, meta) + if isinstance(content, TemplateForStartRule): + return self._assemble_template_for(parts, idx, meta) + return None, idx + + def _collect_body(self, parts, start, end_types, meta): + """Collect body parts from start until a StringPartRule with end_types content. + + Recursively assembles nested directives. Returns (body_list, end_content, next_idx). + """ + body: list = [] + i = start + while i < len(parts): + part = parts[i] + if isinstance(part, StringPartRule) and isinstance(part.content, end_types): + return body, part.content, i + 1 + assembled, i = self._try_assemble_nested(parts, i, meta) + if assembled is not None: + body.append(StringPartRule([assembled], meta)) + else: + body.append(parts[i]) + i += 1 + return body, None, i + + def _assemble_template_if(self, parts, start_idx, meta: Meta): + """Assemble a TemplateIfRule from flat parts starting at start_idx.""" + if_start = parts[start_idx].content + # Collect if-body until else or endif + if_body, end, i = self._collect_body( + parts, start_idx + 1, (TemplateElseRule, TemplateEndifRule), meta + ) + else_rule = None + else_body = None + if isinstance(end, TemplateElseRule): + else_rule = end + else_body, end, i = self._collect_body(parts, i, (TemplateEndifRule,), meta) + if not isinstance(end, TemplateEndifRule): + raise RuntimeError("Unterminated template if directive") + return TemplateIfRule(if_start, if_body, else_rule, else_body, end, meta), i + + def _assemble_template_for(self, parts, start_idx, meta: Meta): + """Assemble a TemplateForRule from flat parts starting at start_idx.""" + for_start = parts[start_idx].content + body, end, i = self._collect_body( + parts, start_idx + 1, (TemplateEndforRule,), meta + ) + if not isinstance(end, TemplateEndforRule): + raise RuntimeError("Unterminated template for directive") + return TemplateForRule(for_start, body, end, meta), i + @v_args(meta=True) def string_part(self, meta: Meta, args) -> StringPartRule: return StringPartRule(args, meta) @@ -304,3 +398,27 @@ def for_intro(self, meta: Meta, args) -> ForIntroRule: @v_args(meta=True) def for_cond(self, meta: Meta, args) -> ForCondRule: return ForCondRule(args, meta) + + @v_args(meta=True) + def template_if_start(self, meta: Meta, args) -> TemplateIfStartRule: + return TemplateIfStartRule(args, meta) + + @v_args(meta=True) + def template_else(self, meta: Meta, args) -> TemplateElseRule: + return TemplateElseRule(args, meta) + + @v_args(meta=True) + def template_endif(self, meta: Meta, args) -> TemplateEndifRule: + return TemplateEndifRule(args, meta) + + @v_args(meta=True) + def template_for_start(self, meta: Meta, args) -> TemplateForStartRule: + return TemplateForStartRule(args, meta) + + @v_args(meta=True) + def template_endfor(self, meta: Meta, args) -> TemplateEndforRule: + return TemplateEndforRule(args, meta) + + @v_args(meta=True) + def template_string(self, meta: Meta, args) -> TemplateStringRule: + return TemplateStringRule(args, meta) diff --git a/test/integration/specialized/template_directives.json b/test/integration/specialized/template_directives.json new file mode 100644 index 00000000..e1f149d4 --- /dev/null +++ b/test/integration/specialized/template_directives.json @@ -0,0 +1,14 @@ +{ + "basic_if": "\"prefix%{ if var.enabled }yes%{ endif }suffix\"", + "if_else": "\"%{ if var.enabled }yes%{ else }no%{ endif }\"", + "strip_markers": "\"%{~ if var.enabled ~}yes%{~ endif ~}\"", + "strip_partial": "\"%{~ if var.enabled }yes%{ endif ~}\"", + "basic_for": "\"%{ for item in var.list }${item}%{ endfor }\"", + "for_key_value": "\"%{ for k, v in var.map }${k}=${v}%{ endfor }\"", + "issue_247": "\"kms%{ if var.id != \\\"primary\\\" }-${var.id}%{ endif }\"", + "mixed": "\"${var.prefix}%{ if var.suffix }-${var.suffix}%{ endif }\"", + "nested_if": "\"%{ if a }%{ if b }both%{ endif }%{ endif }\"", + "escaped_directive": "\"use %%{literal} not directives\"", + "for_with_interp": "\"%{ for x in var.items }item=${x}, %{ endfor }\"", + "if_strip_else_strip": "\"%{~ if cond ~}a%{~ else ~}b%{~ endif ~}\"" +} diff --git a/test/integration/specialized/template_directives.tf b/test/integration/specialized/template_directives.tf new file mode 100644 index 00000000..276cd878 --- /dev/null +++ b/test/integration/specialized/template_directives.tf @@ -0,0 +1,12 @@ +basic_if = "prefix%{ if var.enabled }yes%{ endif }suffix" +if_else = "%{ if var.enabled }yes%{ else }no%{ endif }" +strip_markers = "%{~ if var.enabled ~}yes%{~ endif ~}" +strip_partial = "%{~ if var.enabled }yes%{ endif ~}" +basic_for = "%{ for item in var.list }${item}%{ endfor }" +for_key_value = "%{ for k, v in var.map }${k}=${v}%{ endfor }" +issue_247 = "kms%{ if var.id != \"primary\" }-${var.id}%{ endif }" +mixed = "${var.prefix}%{ if var.suffix }-${var.suffix}%{ endif }" +nested_if = "%{ if a }%{ if b }both%{ endif }%{ endif }" +escaped_directive = "use %%{literal} not directives" +for_with_interp = "%{ for x in var.items }item=${x}, %{ endfor }" +if_strip_else_strip = "%{~ if cond ~}a%{~ else ~}b%{~ endif ~}" diff --git a/test/integration/specialized/template_directives_reconstructed.tf b/test/integration/specialized/template_directives_reconstructed.tf new file mode 100644 index 00000000..276cd878 --- /dev/null +++ b/test/integration/specialized/template_directives_reconstructed.tf @@ -0,0 +1,12 @@ +basic_if = "prefix%{ if var.enabled }yes%{ endif }suffix" +if_else = "%{ if var.enabled }yes%{ else }no%{ endif }" +strip_markers = "%{~ if var.enabled ~}yes%{~ endif ~}" +strip_partial = "%{~ if var.enabled }yes%{ endif ~}" +basic_for = "%{ for item in var.list }${item}%{ endfor }" +for_key_value = "%{ for k, v in var.map }${k}=${v}%{ endfor }" +issue_247 = "kms%{ if var.id != \"primary\" }-${var.id}%{ endif }" +mixed = "${var.prefix}%{ if var.suffix }-${var.suffix}%{ endif }" +nested_if = "%{ if a }%{ if b }both%{ endif }%{ endif }" +escaped_directive = "use %%{literal} not directives" +for_with_interp = "%{ for x in var.items }item=${x}, %{ endfor }" +if_strip_else_strip = "%{~ if cond ~}a%{~ else ~}b%{~ endif ~}" diff --git a/test/integration/specialized/template_directives_reserialized.json b/test/integration/specialized/template_directives_reserialized.json new file mode 100644 index 00000000..e1f149d4 --- /dev/null +++ b/test/integration/specialized/template_directives_reserialized.json @@ -0,0 +1,14 @@ +{ + "basic_if": "\"prefix%{ if var.enabled }yes%{ endif }suffix\"", + "if_else": "\"%{ if var.enabled }yes%{ else }no%{ endif }\"", + "strip_markers": "\"%{~ if var.enabled ~}yes%{~ endif ~}\"", + "strip_partial": "\"%{~ if var.enabled }yes%{ endif ~}\"", + "basic_for": "\"%{ for item in var.list }${item}%{ endfor }\"", + "for_key_value": "\"%{ for k, v in var.map }${k}=${v}%{ endfor }\"", + "issue_247": "\"kms%{ if var.id != \\\"primary\\\" }-${var.id}%{ endif }\"", + "mixed": "\"${var.prefix}%{ if var.suffix }-${var.suffix}%{ endif }\"", + "nested_if": "\"%{ if a }%{ if b }both%{ endif }%{ endif }\"", + "escaped_directive": "\"use %%{literal} not directives\"", + "for_with_interp": "\"%{ for x in var.items }item=${x}, %{ endfor }\"", + "if_strip_else_strip": "\"%{~ if cond ~}a%{~ else ~}b%{~ endif ~}\"" +} diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py index 1415f307..399b486a 100644 --- a/test/integration/test_specialized.py +++ b/test/integration/test_specialized.py @@ -4,6 +4,7 @@ (operator precedence, Builder round-trip) with dedicated golden files in test/integration/special/. """ + # pylint: disable=C0103,C0114,C0115,C0116 import json @@ -15,6 +16,7 @@ _parse_and_serialize, _deserialize_and_reserialize, _deserialize_and_reconstruct, + _direct_reconstruct, ) from hcl2.deserializer import BaseDeserializer, DeserializerOptions @@ -96,6 +98,58 @@ def _deserialize_and_reconstruct_with_options( return reconstructor.reconstruct(lark_tree) +class TestTemplateDirectives(TestCase): + """Test template directives (%{if}, %{for}) parsing, serialization, and round-trip. + + Covers: basic if/else/endif, for/endfor, strip markers, escaped quotes in + directive expressions (issue #247), nested directives, and escaped directives. + """ + + maxDiff = None + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_hcl_to_json(self): + """HCL with directives -> JSON serialization matches expected.""" + hcl_text = self._load_special("template_directives", ".tf") + actual = _parse_and_serialize(hcl_text) + expected = json.loads(self._load_special("template_directives", ".json")) + self.assertEqual(actual, expected) + + def test_direct_reconstruct(self): + """HCL -> IR -> Lark -> HCL matches original.""" + hcl_text = self._load_special("template_directives", ".tf") + actual = _direct_reconstruct(hcl_text) + self.assertMultiLineEqual(actual, hcl_text) + + def test_json_reserialization(self): + """JSON -> deserialize -> reserialize matches expected.""" + hcl_text = self._load_special("template_directives", ".tf") + serialized = _parse_and_serialize(hcl_text) + actual = _deserialize_and_reserialize(serialized) + expected = json.loads( + self._load_special("template_directives_reserialized", ".json") + ) + self.assertEqual(actual, expected) + + def test_json_to_hcl(self): + """JSON -> deserialize -> reconstruct matches expected HCL.""" + hcl_text = self._load_special("template_directives", ".tf") + serialized = _parse_and_serialize(hcl_text) + actual = _deserialize_and_reconstruct(serialized) + expected = self._load_special("template_directives_reconstructed", ".tf") + self.assertMultiLineEqual(actual, expected) + + def test_full_round_trip(self): + """HCL -> JSON -> HCL -> JSON produces identical JSON.""" + hcl_text = self._load_special("template_directives", ".tf") + serialized = _parse_and_serialize(hcl_text) + reconstructed = _deserialize_and_reconstruct(serialized) + reserialized = _parse_and_serialize(reconstructed) + self.assertEqual(reserialized, serialized) + + class TestHeredocs(TestCase): """Test heredoc serialization, flattening, restoration, and round-trips. diff --git a/test/unit/rules/test_directives.py b/test/unit/rules/test_directives.py new file mode 100644 index 00000000..bb2be42e --- /dev/null +++ b/test/unit/rules/test_directives.py @@ -0,0 +1,187 @@ +"""Unit tests for template directive rule classes.""" + +# pylint: disable=C0103,C0114,C0115,C0116 +from unittest import TestCase + +from hcl2.rules.directives import ( + TemplateIfStartRule, + TemplateElseRule, + TemplateEndifRule, + TemplateForStartRule, + TemplateEndforRule, + TemplateIfRule, + TemplateForRule, +) +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringPartRule +from hcl2.rules.tokens import ( + NAME, + DIRECTIVE_START, + STRIP_MARKER, + RBRACE, + IF, + ELSE, + ENDIF, + FOR, + IN, + ENDFOR, + COMMA, + STRING_CHARS, +) + + +class TestTemplateIfStartRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateIfStartRule.lark_name(), "template_if_start") + + def test_serialize_basic(self): + cond = IdentifierRule([NAME("cond")]) + rule = TemplateIfStartRule([DIRECTIVE_START(), IF(), cond, RBRACE()]) + self.assertEqual(rule.serialize(), "%{ if cond }") + + def test_serialize_strip_markers(self): + cond = IdentifierRule([NAME("cond")]) + rule = TemplateIfStartRule( + [DIRECTIVE_START(), STRIP_MARKER(), IF(), cond, STRIP_MARKER(), RBRACE()] + ) + self.assertEqual(rule.serialize(), "%{~ if cond ~}") + + def test_condition_property(self): + cond = IdentifierRule([NAME("x")]) + rule = TemplateIfStartRule([DIRECTIVE_START(), IF(), cond, RBRACE()]) + self.assertIs(rule.condition, cond) + + +class TestTemplateElseRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateElseRule.lark_name(), "template_else") + + def test_serialize_basic(self): + rule = TemplateElseRule([DIRECTIVE_START(), ELSE(), RBRACE()]) + self.assertEqual(rule.serialize(), "%{ else }") + + def test_serialize_strip_markers(self): + rule = TemplateElseRule( + [DIRECTIVE_START(), STRIP_MARKER(), ELSE(), STRIP_MARKER(), RBRACE()] + ) + self.assertEqual(rule.serialize(), "%{~ else ~}") + + +class TestTemplateEndifRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateEndifRule.lark_name(), "template_endif") + + def test_serialize_basic(self): + rule = TemplateEndifRule([DIRECTIVE_START(), ENDIF(), RBRACE()]) + self.assertEqual(rule.serialize(), "%{ endif }") + + +class TestTemplateForStartRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateForStartRule.lark_name(), "template_for_start") + + def test_serialize_basic(self): + iterator = IdentifierRule([NAME("item")]) + collection = IdentifierRule([NAME("items")]) + rule = TemplateForStartRule( + [DIRECTIVE_START(), FOR(), iterator, IN(), collection, RBRACE()] + ) + self.assertEqual(rule.serialize(), "%{ for item in items }") + + def test_serialize_key_value(self): + key = IdentifierRule([NAME("k")]) + val = IdentifierRule([NAME("v")]) + collection = IdentifierRule([NAME("map")]) + rule = TemplateForStartRule( + [DIRECTIVE_START(), FOR(), key, COMMA(), val, IN(), collection, RBRACE()] + ) + self.assertEqual(rule.serialize(), "%{ for k, v in map }") + + def test_serialize_strip_markers(self): + iterator = IdentifierRule([NAME("x")]) + collection = IdentifierRule([NAME("xs")]) + rule = TemplateForStartRule( + [ + DIRECTIVE_START(), + STRIP_MARKER(), + FOR(), + iterator, + IN(), + collection, + STRIP_MARKER(), + RBRACE(), + ] + ) + self.assertEqual(rule.serialize(), "%{~ for x in xs ~}") + + +class TestTemplateEndforRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateEndforRule.lark_name(), "template_endfor") + + def test_serialize_basic(self): + rule = TemplateEndforRule([DIRECTIVE_START(), ENDFOR(), RBRACE()]) + self.assertEqual(rule.serialize(), "%{ endfor }") + + +class TestTemplateIfRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateIfRule.lark_name(), "template_if") + + def test_serialize_basic(self): + cond = IdentifierRule([NAME("cond")]) + if_start = TemplateIfStartRule([DIRECTIVE_START(), IF(), cond, RBRACE()]) + body = [StringPartRule([STRING_CHARS("yes")])] + endif = TemplateEndifRule([DIRECTIVE_START(), ENDIF(), RBRACE()]) + rule = TemplateIfRule(if_start, body, None, None, endif) + self.assertEqual(rule.serialize(), "%{ if cond }yes%{ endif }") + + def test_serialize_with_else(self): + cond = IdentifierRule([NAME("cond")]) + if_start = TemplateIfStartRule([DIRECTIVE_START(), IF(), cond, RBRACE()]) + if_body = [StringPartRule([STRING_CHARS("yes")])] + else_rule = TemplateElseRule([DIRECTIVE_START(), ELSE(), RBRACE()]) + else_body = [StringPartRule([STRING_CHARS("no")])] + endif = TemplateEndifRule([DIRECTIVE_START(), ENDIF(), RBRACE()]) + rule = TemplateIfRule(if_start, if_body, else_rule, else_body, endif) + self.assertEqual(rule.serialize(), "%{ if cond }yes%{ else }no%{ endif }") + + def test_serialize_strip_markers(self): + cond = IdentifierRule([NAME("c")]) + if_start = TemplateIfStartRule( + [DIRECTIVE_START(), STRIP_MARKER(), IF(), cond, STRIP_MARKER(), RBRACE()] + ) + body = [StringPartRule([STRING_CHARS("x")])] + endif = TemplateEndifRule( + [DIRECTIVE_START(), STRIP_MARKER(), ENDIF(), STRIP_MARKER(), RBRACE()] + ) + rule = TemplateIfRule(if_start, body, None, None, endif) + self.assertEqual(rule.serialize(), "%{~ if c ~}x%{~ endif ~}") + + +class TestTemplateForRule(TestCase): + def test_lark_name(self): + self.assertEqual(TemplateForRule.lark_name(), "template_for") + + def test_serialize_basic(self): + iterator = IdentifierRule([NAME("item")]) + collection = IdentifierRule([NAME("items")]) + for_start = TemplateForStartRule( + [DIRECTIVE_START(), FOR(), iterator, IN(), collection, RBRACE()] + ) + body = [StringPartRule([STRING_CHARS("text")])] + endfor = TemplateEndforRule([DIRECTIVE_START(), ENDFOR(), RBRACE()]) + rule = TemplateForRule(for_start, body, endfor) + self.assertEqual(rule.serialize(), "%{ for item in items }text%{ endfor }") + + def test_serialize_key_value(self): + key = IdentifierRule([NAME("k")]) + val = IdentifierRule([NAME("v")]) + collection = IdentifierRule([NAME("m")]) + for_start = TemplateForStartRule( + [DIRECTIVE_START(), FOR(), key, COMMA(), val, IN(), collection, RBRACE()] + ) + body = [StringPartRule([STRING_CHARS("text")])] + endfor = TemplateEndforRule([DIRECTIVE_START(), ENDFOR(), RBRACE()]) + rule = TemplateForRule(for_start, body, endfor) + self.assertEqual(rule.serialize(), "%{ for k, v in m }text%{ endfor }")