From 55e85ca960762c9fcbbd54cc8d6309e4486f21b4 Mon Sep 17 00:00:00 2001 From: Bartosz Burda Date: Mon, 16 Mar 2026 13:23:43 +0100 Subject: [PATCH] fix: escape invalid XML in rustfmt checkstyle output rustfmt's --emit=checkstyle does not XML-escape special characters inside attribute values. Rust snippets containing &self, &mut, or generics like Vec produce invalid XML that breaks reviewdog. Add a Python filter between cargo fmt and reviewdog that escapes bare &, < and > inside XML attribute values while preserving already-valid entities. Closes #13 --- rust-lint-and-format-action/action.yml | 41 ++++- .../fix-checkstyle-xml.py | 63 ++++++++ .../merge-rustfmt-stderr.py | 141 ++++++++++++++++++ 3 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 rust-lint-and-format-action/fix-checkstyle-xml.py create mode 100644 rust-lint-and-format-action/merge-rustfmt-stderr.py diff --git a/rust-lint-and-format-action/action.yml b/rust-lint-and-format-action/action.yml index d7bfcfe..bc31b6f 100644 --- a/rust-lint-and-format-action/action.yml +++ b/rust-lint-and-format-action/action.yml @@ -68,23 +68,52 @@ runs: if [ "$FAIL_ON_FORMAT" = "true" ]; then SEVERITY_TRANSFORM="sed 's/severity=\"warning\"/severity=\"error\"/g'" FAIL_LEVEL="any" + STDERR_SEVERITY="error" else SEVERITY_TRANSFORM="cat" FAIL_LEVEL="none" + STDERR_SEVERITY="warning" fi + # rustfmt reports two kinds of issues: + # 1. Fixable formatting - emitted as checkstyle XML on stdout (exit 0) + # 2. Overflow/unformatted - emitted as diagnostics on stderr (exit 1) + # We capture stderr separately so we can merge both into the XML for + # reviewdog. Without "|| FMT_EXIT=$?", GHA's default "set -eo pipefail" + # would kill the step before reviewdog runs, losing ALL annotations. + FMT_EXIT=0 cargo fmt -- \ --emit=checkstyle \ --config error_on_unformatted=true,error_on_line_overflow=true,format_strings=true \ --config group_imports=StdExternalCrate \ --config imports_granularity=Crate \ --config hex_literal_case=Upper \ - 2>&1 | eval "$SEVERITY_TRANSFORM" | reviewdog \ - -f=checkstyle \ - -reporter=${{ inputs.reporter }} \ - -filter-mode=nofilter \ - -fail-level="$FAIL_LEVEL" \ - -level=error + 2>/tmp/rustfmt_stderr.log \ + | python3 "${{ github.action_path }}/fix-checkstyle-xml.py" \ + | eval "$SEVERITY_TRANSFORM" \ + > /tmp/checkstyle.xml \ + || FMT_EXIT=$? + + # Merge overflow/unformatted errors from stderr into checkstyle XML + # so reviewdog can annotate them inline on the PR + python3 "${{ github.action_path }}/merge-rustfmt-stderr.py" \ + /tmp/checkstyle.xml /tmp/rustfmt_stderr.log "$STDERR_SEVERITY" + + reviewdog \ + -f=checkstyle \ + -reporter=${{ inputs.reporter }} \ + -filter-mode=nofilter \ + -fail-level="$FAIL_LEVEL" \ + -level=error \ + < /tmp/checkstyle.xml + rm -f /tmp/checkstyle.xml /tmp/rustfmt_stderr.log + + # Propagate the original rustfmt failure after reviewdog has posted + # annotations - the step still fails, but the developer gets inline + # comments pointing to the problematic lines. + if [ "$FMT_EXIT" -ne 0 ]; then + exit "$FMT_EXIT" + fi - name: Clippy uses: giraffate/clippy-action@v1 diff --git a/rust-lint-and-format-action/fix-checkstyle-xml.py b/rust-lint-and-format-action/fix-checkstyle-xml.py new file mode 100644 index 0000000..678a979 --- /dev/null +++ b/rust-lint-and-format-action/fix-checkstyle-xml.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2026 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 + +"""Fix broken XML produced by rustfmt --emit=checkstyle. + +rustfmt's checkstyle emitter does not XML-escape special characters +inside attribute values. This breaks XML parsers (including reviewdog) +when Rust source snippets contain &, <, or > (e.g. &self, Vec). + +This script reads stdin line-by-line and escapes unescaped special +characters inside XML attribute values, producing valid XML on stdout. + +See: https://github.com/eclipse-opensovd/cicd-workflows/issues/13 +""" + +import re +import signal +import sys + +# Let the OS handle SIGPIPE (default behavior) instead of Python raising +# BrokenPipeError. This is needed when downstream consumers (reviewdog) +# close the pipe early. +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + + +def fix_attribute_value(match: re.Match) -> str: + """Escape special XML characters inside a single attribute value.""" + prefix = match.group(1) # everything up to and including opening quote + value = match.group(2) # the attribute value content + suffix = match.group(3) # the closing quote + + # Escape & that are not already part of a valid XML entity reference. + # Valid: & < > ' " {  + value = re.sub(r"&(?!amp;|lt;|gt;|apos;|quot;|#)", "&", value) + + # Escape < and > that appear inside attribute values (never valid there). + value = value.replace("<", "<").replace(">", ">") + + return f"{prefix}{value}{suffix}" + + +def fix_line(line: str) -> str: + """Fix all attribute values in a single XML line.""" + # Match attribute="value" or attribute='value' patterns. + # Uses a non-greedy match for the value to handle multiple attributes. + return re.sub(r"""(=\s*")([^"]*)(")""", fix_attribute_value, line) + + +def main() -> None: + for line in sys.stdin: + sys.stdout.write(fix_line(line)) + + +if __name__ == "__main__": + main() diff --git a/rust-lint-and-format-action/merge-rustfmt-stderr.py b/rust-lint-and-format-action/merge-rustfmt-stderr.py new file mode 100644 index 0000000..bc82d1c --- /dev/null +++ b/rust-lint-and-format-action/merge-rustfmt-stderr.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2026 The Contributors to Eclipse OpenSOVD (see CONTRIBUTORS) +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 + +"""Merge rustfmt stderr errors into checkstyle XML. + +Workaround for a rustfmt limitation: --emit=checkstyle only includes +fixable formatting issues in the XML output. Errors like line-width +overflow (error_on_line_overflow) and unformatted code +(error_on_unformatted) are printed to stderr only. + +This script parses those stderr messages and injects them as +entries into the checkstyle XML so reviewdog can annotate them inline. +If rustfmt starts including these errors in the XML, this script +becomes a no-op (no matching patterns in stderr). + +Usage: + merge-rustfmt-stderr.py [severity] + +See: https://github.com/eclipse-opensovd/cicd-workflows/issues/13 +""" + +import re +import sys +import xml.etree.ElementTree as ET + + +def strip_ansi(text: str) -> str: + """Remove ANSI escape sequences from text.""" + return re.sub(r"\x1b\[[0-9;]*m", "", text) + + +def parse_stderr_errors(stderr_path: str) -> list[dict]: + """Parse rustfmt stderr for error messages with file locations. + + Matches rustfmt's diagnostic format: + error[internal]: + --> ::: + + If rustfmt changes this format, the regex will simply not match and + no errors will be injected - the pipeline still fails via exit code, + just without inline annotations. + """ + errors = [] + try: + with open(stderr_path) as f: + lines = [strip_ansi(line.rstrip()) for line in f] + except (FileNotFoundError, OSError): + return errors + + i = 0 + while i < len(lines): + m = re.match(r"error\[.*?\]:\s*(.+)", lines[i]) + if m: + message = m.group(1) + for j in range(i + 1, min(i + 4, len(lines))): + loc = re.match(r"\s*-->\s*(.+?):(\d+):\d+", lines[j]) + if loc: + errors.append( + { + "file": loc.group(1), + "line": loc.group(2), + "message": message, + } + ) + break + i += 1 + + return errors + + +def inject_errors(xml_path: str, errors: list[dict], severity: str) -> None: + """Inject stderr errors into checkstyle XML file.""" + if not errors: + return + + try: + tree = ET.parse(xml_path) + root = tree.getroot() + except (ET.ParseError, FileNotFoundError) as exc: + print( + f"Warning: could not parse {xml_path} ({exc}), " + f"creating new XML with only stderr errors", + file=sys.stderr, + ) + root = ET.Element("checkstyle", version="4.3") + tree = ET.ElementTree(root) + + file_elements = {f.get("name"): f for f in root.findall("file")} + + for err in errors: + file_el = file_elements.get(err["file"]) + if file_el is None: + file_el = ET.SubElement(root, "file", name=err["file"]) + file_elements[err["file"]] = file_el + + ET.SubElement( + file_el, + "error", + { + "line": err["line"], + "severity": severity, + "message": err["message"], + }, + ) + + tree.write(xml_path, encoding="unicode", xml_declaration=True) + + +def main() -> None: + if len(sys.argv) < 3: + print( + f"Usage: {sys.argv[0]} [severity]", + file=sys.stderr, + ) + sys.exit(1) + + xml_path = sys.argv[1] + stderr_path = sys.argv[2] + severity = sys.argv[3] if len(sys.argv) > 3 else "error" + + errors = parse_stderr_errors(stderr_path) + if errors: + inject_errors(xml_path, errors, severity) + for err in errors: + print( + f"Injected stderr error into checkstyle XML: " + f"{err['file']}:{err['line']}: {err['message']}", + file=sys.stderr, + ) + + +if __name__ == "__main__": + main()