From ce80f773eb17c7f464b99fa0b641b464e4931589 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89amonn=20McManus?= <emcmanus@google.com>
Date: Wed, 17 Jun 2026 07:39:38 -0700
Subject: [PATCH] Abandon formatting of a Markdown Javadoc comment if it
 contains unsupported constructs.

This is much better than mangling the text of the comment. Of course, better still would be supporting the constructs, as hopefully we will eventually.

PiperOrigin-RevId: 933718210
---
 .../java/javadoc/JavadocFormatter.java        |  6 +-
 .../java/javadoc/JavadocLexer.java            | 14 +++-
 .../java/javadoc/MarkdownPositions.java       | 31 ++++++++-
 .../java/JavadocFormattingTest.java           | 68 ++++++++-----------
 4 files changed, 73 insertions(+), 46 deletions(-)
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java
index f801a4d75..7e2a4e0bd 100644
--- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java
+++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java
@@ -87,12 +87,10 @@ public static String formatJavadoc(String input, int blockIndent) {
           default ->
               throw new IllegalArgumentException("Input does not start with /** or ///: " + input);
         };
-    if (!classicJavadoc) {
-      input = "///" + markdownCommentText(input);
-    }
+    String inputForLexer = classicJavadoc ? input : ("///" + markdownCommentText(input));
     ImmutableList<Token> tokens;
     try {
-      tokens = lex(input, classicJavadoc);
+      tokens = lex(inputForLexer, classicJavadoc);
     } catch (LexException e) {
       return input;
     }
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java
index 34e754261..ab89e5d1c 100644
--- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java
+++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocLexer.java
@@ -83,7 +83,11 @@ static ImmutableList<Token> lex(String input, boolean classicJavadoc) throws Lex
     } else {
       checkArgument(input.startsWith("///"));
       input = input.substring("///".length());
-      markdownPositions = MarkdownPositions.parse(input);
+      try {
+        markdownPositions = MarkdownPositions.parse(input);
+      } catch (UnsupportedOperationException e) {
+        throw new LexException(e);
+      }
     }
     return new JavadocLexer(new CharStream(input), markdownPositions, classicJavadoc)
         .generateTokens();
@@ -709,5 +713,11 @@ private static Pattern closeTagPattern(String namePattern) {
     return compile(format("</(?:%s)\\b[^>]*>", namePattern), CASE_INSENSITIVE);
   }
 
-  static class LexException extends Exception {}
+  static class LexException extends Exception {
+    LexException() {}
+
+    LexException(Throwable cause) {
+      super(cause);
+    }
+  }
 }
diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java
index 0bbb56fee..f63597426 100644
--- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java
+++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java
@@ -36,14 +36,18 @@
 import java.util.regex.Pattern;
 import org.commonmark.ext.gfm.tables.TableBlock;
 import org.commonmark.ext.gfm.tables.TablesExtension;
+import org.commonmark.node.BlockQuote;
 import org.commonmark.node.BulletList;
 import org.commonmark.node.Code;
 import org.commonmark.node.FencedCodeBlock;
 import org.commonmark.node.Heading;
+import org.commonmark.node.IndentedCodeBlock;
+import org.commonmark.node.LinkReferenceDefinition;
 import org.commonmark.node.ListItem;
 import org.commonmark.node.Node;
 import org.commonmark.node.OrderedList;
 import org.commonmark.node.Paragraph;
+import org.commonmark.node.ThematicBreak;
 import org.commonmark.parser.IncludeSourceSpans;
 import org.commonmark.parser.Parser;
 
@@ -90,7 +94,7 @@ private static class TokenVisitor {
     void visit(Node node) {
       boolean alreadyVisitedChildren = false;
       switch (node) {
-        case Heading heading -> addSpan(heading, HEADER_OPEN_TOKEN, HEADER_CLOSE_TOKEN);
+        case Heading heading -> visitHeading(heading);
         case Paragraph paragraph -> addSpan(paragraph, PARAGRAPH_OPEN_TOKEN, PARAGRAPH_CLOSE_TOKEN);
         case BulletList bulletList -> addSpan(bulletList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN);
         case OrderedList orderedList -> addSpan(orderedList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN);
@@ -101,6 +105,14 @@ void visit(Node node) {
           alreadyVisitedChildren = true;
         }
         case Code code -> visitCodeSpan(code);
+        case BlockQuote blockQuote ->
+            throw new UnsupportedOperationException("Block quotes not supported");
+        case IndentedCodeBlock indentedCodeBlock ->
+            throw new UnsupportedOperationException("Indented code blocks not supported");
+        case ThematicBreak thematicBreak ->
+            throw new UnsupportedOperationException("Thematic breaks not supported");
+        case LinkReferenceDefinition linkReferenceDefinition ->
+            throw new UnsupportedOperationException("Link reference definitions not supported");
         // TODO: others
         default -> {}
       }
@@ -130,6 +142,19 @@ private boolean visitListItem(ListItem listItem) {
       };
     }
 
+    private void visitHeading(Heading heading) {
+      // There are two ways to spell a heading in Markdown:
+      // 1. With hashes: `# Heading` `## Subheading`
+      // 2. With dashes or underlines: `Heading\n=======` `Subheading\n-------`
+      // We currently only support the first kind. We can distinguish the two cases by checking if
+      // the heading text contains a newline.
+      String s = nodeString(heading);
+      if (s.contains("\n")) {
+        throw new UnsupportedOperationException("Unsupported heading: " + s);
+      }
+      addSpan(heading, HEADER_OPEN_TOKEN, HEADER_CLOSE_TOKEN);
+    }
+
     private void visitFencedCodeBlock(FencedCodeBlock fencedCodeBlock) {
       // Any indentation before the code block is part of FencedCodeBlock. This makes sense
       // because the lines inside the code block must also be indented by that amount. That
@@ -205,6 +230,10 @@ private int endPosition(Node node) {
       var last = node.getSourceSpans().getLast();
       return last.getInputIndex() + last.getLength();
     }
+
+    private String nodeString(Node node) {
+      return input.substring(startPosition(node), endPosition(node));
+    }
   }
 
   @Override
diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
index 6977ac748..dd79cb1b6 100644
--- a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
+++ b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
@@ -1860,17 +1860,14 @@ public void markdownThematicBreaks() {
         /// foo
         /// ***
         /// bar
-        class Test {}
-        """;
-    // TODO: the line break before `***` should be preserved.
-    // It's OK to introduce a blank line before `bar` since it is a new paragraph.
-    String expected =
-        """
-        /// foo ***
+        /// baz
+        ///
         ///
-        /// bar
         class Test {}
         """;
+    // TODO: thematic breaks are not supported. That means the input is unchanged. We can see this
+    // because the blank lines at the end are preserved.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
@@ -1883,21 +1880,14 @@ public void markdownSetextHeadings() {
         /// =======
         /// Phoebe B. Peabody-Beebe
         ///
+        ///
         /// Subheading
         /// ----------
         class Test {}
         """;
-    // TODO: the line breaks before the lines of repeated characters should be preserved.
-    //    Or, we could rewrite this style of heading as `# Heading`.
-    String expected =
-        """
-        /// Heading =======
-        ///
-        /// Phoebe B. Peabody-Beebe
-        ///
-        /// Subheading ----------
-        class Test {}
-        """;
+    // TODO: This kind of heading is not supported. That means the input is unchanged.
+    // We can see this because the extra blank line is preserved.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
@@ -1908,14 +1898,14 @@ public void markdownIndentedCodeBlocks() {
         """
         ///     code block
         ///     is indented
+        /// text after code block
+        ///
+        ///
         class Test {}
         """;
-    // TODO: the evil indented code block should be preserved.
-    String expected =
-        """
-        /// code block is indented
-        class Test {}
-        """;
+    // TODO: evil indented code blocks like this are not supported. That means the input is
+    // unchanged. We can see this because the blank lines at the end are preserved.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
@@ -1924,15 +1914,17 @@ public void markdownLinkReferenceDefinitions() {
     assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
     String input =
         """
-        /// [foo]
+        /// [foo] [bar]
+        ///
         /// [foo]: /url "title"
+        /// [bar]: /url2 "title2"
+        ///
+        ///
         class Test {}
         """;
-    String expected =
-        """
-        /// [foo] [foo]: /url "title"
-        class Test {}
-        """;
+    // TODO: link reference definitions are not supported. That means the input is unchanged. We can
+    // see this because the blank lines at the end are preserved.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
@@ -1965,16 +1957,13 @@ public void markdownBlockQuotes() {
         """
         /// > foo
         /// > bar
-        class Test {}
-        """;
-    // TODO: the block quote should be preserved, and ideally bar would be joined to foo.
-    String expected =
-        """
-        /// >
         ///
-        /// foo > bar
+        ///
         class Test {}
         """;
+    // TODO: block quotes are not supported. That means the input is unchanged. We can see this
+    // because the extra blank lines at the end are preserved.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
@@ -2014,10 +2003,11 @@ public void markdownAutolinks() {
     assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
     String input =
         """
-        /// <http://{@code example.com> <br> is not inside @code}.
+        /// <http://{@code:example.com> <br> is not inside @code}.
         class Test {}
         """;
     // TODO: the <br> should trigger a line break since it is not in fact inside {@code ...}.
+    // This is something of a corner case, though.
     String expected = input;
     doFormatTest(input, expected);
   }