diff --git a/samples/GeneratedDialectConsumer/GeneratedDialectConsumer.csproj b/samples/GeneratedDialectConsumer/GeneratedDialectConsumer.csproj index 2db0d68..9c0743e 100644 --- a/samples/GeneratedDialectConsumer/GeneratedDialectConsumer.csproj +++ b/samples/GeneratedDialectConsumer/GeneratedDialectConsumer.csproj @@ -10,6 +10,7 @@ + diff --git a/src/MLIR.ODS/AssemblyFormatParser.cs b/src/MLIR.ODS/AssemblyFormatParser.cs index 8ac5fee..aa80893 100644 --- a/src/MLIR.ODS/AssemblyFormatParser.cs +++ b/src/MLIR.ODS/AssemblyFormatParser.cs @@ -131,7 +131,7 @@ private LiteralChunk ParseLiteral() Advance(); string value = _source.Substring(start, _pos - start); Expect('`'); - return new LiteralChunk(value); + return new LiteralChunk(Literal.Parse(value)); } private VariableChunk ParseVariable() diff --git a/src/MLIR.ODS/Model/AssemblyFormat/LiteralChunk.cs b/src/MLIR.ODS/Model/AssemblyFormat/LiteralChunk.cs index befc2c4..5383c75 100644 --- a/src/MLIR.ODS/Model/AssemblyFormat/LiteralChunk.cs +++ b/src/MLIR.ODS/Model/AssemblyFormat/LiteralChunk.cs @@ -1,3 +1,6 @@ +using System; +using System.Collections.Generic; + namespace MLIR.ODS.Model.AssemblyFormat; /// @@ -7,14 +10,14 @@ namespace MLIR.ODS.Model.AssemblyFormat; public sealed class LiteralChunk : Chunk { /// - /// The literal value. + /// The parsed literal value(s). /// - public string Value { get; } + public IReadOnlyList Value { get; } /// /// Creates a literal chunk. /// - public LiteralChunk(string value) + public LiteralChunk(IReadOnlyList value) { Value = value; } @@ -26,6 +29,117 @@ public LiteralChunk(string value) /// public abstract class Literal { + /// + /// Parses the raw text extracted from a backtick-delimited ODS literal into a list of + /// instances. + /// + /// + /// Supported constructs: + /// + /// Empty string → + /// \n escape → + /// One or more spaces → + /// -> and other punctuation → + /// Identifier-like text → + /// + /// + /// The raw content between the backticks. + /// A non-empty list of instances. + /// Thrown when contains an unexpected character. + public static IReadOnlyList Parse(string text) + { + if (text.Length == 0) + return new[] { (Literal)new EmptyLiteral() }; + + var result = new List(); + int pos = 0; + while (pos < text.Length) + { + char c = text[pos]; + + // Newline escape sequence: \n + if (c == '\\' && pos + 1 < text.Length && text[pos + 1] == 'n') + { + result.Add(new NewlineLiteral()); + pos += 2; + continue; + } + + // Consecutive spaces → single WhitespaceLiteral + if (c == ' ') + { + int start = pos; + while (pos < text.Length && text[pos] == ' ') + pos++; + result.Add(new WhitespaceLiteral(text.Substring(start, pos - start))); + continue; + } + + // Arrow: -> (must be checked before single-char Minus) + if (c == '-' && pos + 1 < text.Length && text[pos + 1] == '>') + { + result.Add(new PunctuationLiteral(Text.TokenKind.Arrow)); + pos += 2; + continue; + } + + // Single-character punctuation + Text.TokenKind punctKind; + if (TryGetPunctuationKind(c, out punctKind)) + { + result.Add(new PunctuationLiteral(punctKind)); + pos++; + continue; + } + + // Keyword / identifier + if (IsIdentifierStart(c)) + { + int start = pos; + while (pos < text.Length && IsIdentifierChar(text[pos])) + pos++; + result.Add(new KeywordLiteral(text.Substring(start, pos - start))); + continue; + } + + throw new FormatException( + $"Unexpected character '{c}' in literal at position {pos}."); + } + + return result; + } + + private static bool TryGetPunctuationKind(char c, out Text.TokenKind kind) + { + switch (c) + { + case ',': kind = Text.TokenKind.Comma; return true; + case '(': kind = Text.TokenKind.LParen; return true; + case ')': kind = Text.TokenKind.RParen; return true; + case '[': kind = Text.TokenKind.LBracket; return true; + case ']': kind = Text.TokenKind.RBracket; return true; + case '{': kind = Text.TokenKind.LBrace; return true; + case '}': kind = Text.TokenKind.RBrace; return true; + case '<': kind = Text.TokenKind.LessThan; return true; + case '>': kind = Text.TokenKind.GreaterThan; return true; + case '?': kind = Text.TokenKind.Question; return true; + case '*': kind = Text.TokenKind.Star; return true; + case '+': kind = Text.TokenKind.Plus; return true; + case '-': kind = Text.TokenKind.Minus; return true; + case '.': kind = Text.TokenKind.Dot; return true; + case ':': kind = Text.TokenKind.Colon; return true; + case '=': kind = Text.TokenKind.Equal; return true; + case '@': kind = Text.TokenKind.At; return true; + case '#': kind = Text.TokenKind.Hash; return true; + default: kind = default; return false; + } + } + + private static bool IsIdentifierStart(char c) => + char.IsLetter(c) || c == '_'; + + private static bool IsIdentifierChar(char c) => + char.IsLetterOrDigit(c) || c == '_'; } /// @@ -65,3 +179,14 @@ public sealed class NewlineLiteral : Literal public sealed class EmptyLiteral : Literal { } + +/// +/// A literal representing one or more space characters in the assembly format output. +/// +public sealed class WhitespaceLiteral(string spaces) : Literal +{ + /// + /// The whitespace string (one or more space characters). + /// + public string Spaces { get; } = spaces; +} diff --git a/tests/DialectTests/DialectTests.csproj b/tests/DialectTests/DialectTests.csproj index 978ab2d..197d33b 100644 --- a/tests/DialectTests/DialectTests.csproj +++ b/tests/DialectTests/DialectTests.csproj @@ -16,6 +16,7 @@ + diff --git a/tests/MLIR.Generators.Tests/AssemblyFormatParserTests.cs b/tests/MLIR.Generators.Tests/AssemblyFormatParserTests.cs index 87af021..23d0036 100644 --- a/tests/MLIR.Generators.Tests/AssemblyFormatParserTests.cs +++ b/tests/MLIR.Generators.Tests/AssemblyFormatParserTests.cs @@ -4,6 +4,7 @@ namespace MLIR.Generators.Tests; using MLIR.ODS; using MLIR.ODS.Model; using MLIR.ODS.Model.AssemblyFormat; +using MLIR.Text; using Xunit; public sealed class AssemblyFormatParserTests @@ -37,7 +38,7 @@ public void ParsesVariableLiteralTypeAndAttrDict() var variable = Assert.IsType(model.Elements[0]); Assert.Equal("value", variable.Name); var literal = Assert.IsType(model.Elements[1]); - Assert.Equal(":", literal.Value); + Assert.Equal(TokenKind.Colon, Assert.IsType(Assert.Single(literal.Value)).TokenKind); var typeDir = Assert.IsType(model.Elements[2]); var operand = Assert.IsType(typeDir.Operand); Assert.Equal("value", operand.Name); @@ -55,7 +56,7 @@ public void ParsesTwoVariablesWithLiteral() Assert.Equal(4, model.Elements.Count); Assert.Equal("lhs", Assert.IsType(model.Elements[0]).Name); - Assert.Equal(",", Assert.IsType(model.Elements[1]).Value); + Assert.Equal(TokenKind.Comma, Assert.IsType(Assert.Single(Assert.IsType(model.Elements[1]).Value)).TokenKind); Assert.Equal("rhs", Assert.IsType(model.Elements[2]).Name); Assert.IsType(model.Elements[3]); } @@ -71,7 +72,7 @@ public void ParsesFunctionalTypeDirective() Assert.Equal(4, model.Elements.Count); Assert.Equal("inputs", Assert.IsType(model.Elements[0]).Name); - Assert.Equal(":", Assert.IsType(model.Elements[1]).Value); + Assert.Equal(TokenKind.Colon, Assert.IsType(Assert.Single(Assert.IsType(model.Elements[1]).Value)).TokenKind); var ft = Assert.IsType(model.Elements[2]); Assert.Equal("inputs", Assert.IsType(ft.Inputs).Name); Assert.Equal("results", Assert.IsType(ft.Outputs).Name); @@ -92,7 +93,7 @@ public void ParsesOptionalGroupWithVariableAnchor() Assert.Equal("rhs", group.AnchorName); Assert.Null(group.ElseElements); Assert.Equal(2, group.ThenElements.Count); - Assert.Equal(",", Assert.IsType(group.ThenElements[0]).Value); + Assert.Equal(TokenKind.Comma, Assert.IsType(Assert.Single(Assert.IsType(group.ThenElements[0]).Value)).TokenKind); var anchor = Assert.IsType(group.ThenElements[1]); Assert.Equal("rhs", anchor.Name); Assert.True(anchor.IsAnchor); @@ -116,13 +117,13 @@ public void ParsesOptionalGroupWithElseBranchAndDirectiveAnchor() // Then branch: `:` type($value)^ Assert.Equal(2, group.ThenElements.Count); - Assert.Equal(":", Assert.IsType(group.ThenElements[0]).Value); + Assert.Equal(TokenKind.Colon, Assert.IsType(Assert.Single(Assert.IsType(group.ThenElements[0]).Value)).TokenKind); var typeDir = Assert.IsType(group.ThenElements[1]); Assert.Equal("value", Assert.IsType(typeDir.Operand).Name); // Else branch: `:` qualified(type($fallback)) Assert.Equal(2, group.ElseElements!.Count); - Assert.Equal(":", Assert.IsType(group.ElseElements[0]).Value); + Assert.Equal(TokenKind.Colon, Assert.IsType(Assert.Single(Assert.IsType(group.ElseElements[0]).Value)).TokenKind); var qualDir = Assert.IsType(group.ElseElements[1]); var innerType = Assert.IsType(qualDir.Operand); Assert.Equal("fallback", Assert.IsType(innerType.Operand).Name); @@ -332,4 +333,145 @@ public void ThrowsOnUnknownDirectiveOperand() { Assert.Throws(() => AssemblyFormatParser.Parse("type(unknown)")); } + + // ----------------------------------------------------------------------- + // Literal.Parse examples from the problem statement + // ----------------------------------------------------------------------- + + [Fact] + public void LiteralParse_Comma() + { + var literals = Literal.Parse(","); + var p = Assert.IsType(Assert.Single(literals)); + Assert.Equal(TokenKind.Comma, p.TokenKind); + } + + [Fact] + public void LiteralParse_LParen() + { + var literals = Literal.Parse("("); + var p = Assert.IsType(Assert.Single(literals)); + Assert.Equal(TokenKind.LParen, p.TokenKind); + } + + [Fact] + public void LiteralParse_RParen() + { + var literals = Literal.Parse(")"); + var p = Assert.IsType(Assert.Single(literals)); + Assert.Equal(TokenKind.RParen, p.TokenKind); + } + + [Fact] + public void LiteralParse_Arrow() + { + var literals = Literal.Parse("->"); + var p = Assert.IsType(Assert.Single(literals)); + Assert.Equal(TokenKind.Arrow, p.TokenKind); + } + + [Fact] + public void LiteralParse_LBracket() + { + var literals = Literal.Parse("["); + var p = Assert.IsType(Assert.Single(literals)); + Assert.Equal(TokenKind.LBracket, p.TokenKind); + } + + [Fact] + public void LiteralParse_Keyword_else() + { + var literals = Literal.Parse("else"); + var k = Assert.IsType(Assert.Single(literals)); + Assert.Equal("else", k.Spelling); + } + + [Fact] + public void LiteralParse_Keyword_to() + { + var literals = Literal.Parse("to"); + var k = Assert.IsType(Assert.Single(literals)); + Assert.Equal("to", k.Spelling); + } + + [Fact] + public void LiteralParse_Keyword_in() + { + var literals = Literal.Parse("in"); + var k = Assert.IsType(Assert.Single(literals)); + Assert.Equal("in", k.Spelling); + } + + [Fact] + public void LiteralParse_Newline() + { + var literals = Literal.Parse(@"\n"); + Assert.IsType(Assert.Single(literals)); + } + + [Fact] + public void LiteralParse_Empty() + { + var literals = Literal.Parse(""); + Assert.IsType(Assert.Single(literals)); + } + + [Fact] + public void LiteralParse_SingleSpace() + { + var literals = Literal.Parse(" "); + var w = Assert.IsType(Assert.Single(literals)); + Assert.Equal(" ", w.Spaces); + } + + [Fact] + public void LiteralParse_TwoSpaces() + { + var literals = Literal.Parse(" "); + var w = Assert.IsType(Assert.Single(literals)); + Assert.Equal(" ", w.Spaces); + } + + [Fact] + public void LiteralParse_TwoNewlines() + { + var literals = Literal.Parse(@"\n\n"); + Assert.Equal(2, literals.Count); + Assert.IsType(literals[0]); + Assert.IsType(literals[1]); + } + + [Fact] + public void LiteralParse_ArrowLParen() + { + var literals = Literal.Parse("->("); + Assert.Equal(2, literals.Count); + Assert.Equal(TokenKind.Arrow, Assert.IsType(literals[0]).TokenKind); + Assert.Equal(TokenKind.LParen, Assert.IsType(literals[1]).TokenKind); + } + + [Fact] + public void LiteralParse_RParenComma() + { + var literals = Literal.Parse("),"); + Assert.Equal(2, literals.Count); + Assert.Equal(TokenKind.RParen, Assert.IsType(literals[0]).TokenKind); + Assert.Equal(TokenKind.Comma, Assert.IsType(literals[1]).TokenKind); + } + + [Fact] + public void LiteralParse_Keyword_x() + { + var literals = Literal.Parse("x"); + var k = Assert.IsType(Assert.Single(literals)); + Assert.Equal("x", k.Spelling); + } + + [Fact] + public void LiteralParse_Keyword_yield() + { + var literals = Literal.Parse("yield"); + var k = Assert.IsType(Assert.Single(literals)); + Assert.Equal("yield", k.Spelling); + } } diff --git a/tests/MLIR.Generators.Tests/DialectImporterTests.cs b/tests/MLIR.Generators.Tests/DialectImporterTests.cs index d2fac9d..11b2d5f 100644 --- a/tests/MLIR.Generators.Tests/DialectImporterTests.cs +++ b/tests/MLIR.Generators.Tests/DialectImporterTests.cs @@ -3,6 +3,7 @@ namespace MLIR.Generators.Tests; using System.Linq; using MLIR.ODS; using MLIR.ODS.Model.AssemblyFormat; +using MLIR.Text; using TableGen; using Xunit; @@ -73,10 +74,10 @@ public void ImportsActualOdsStyleDialectAndOperationRecords() Assert.Collection( addiOp.AssemblyFormat!.Elements, e => Assert.Equal("lhs", Assert.IsType(e).Name), - e => Assert.Equal(",", Assert.IsType(e).Value), + e => Assert.Equal(TokenKind.Comma, Assert.IsType(Assert.Single(Assert.IsType(e).Value)).TokenKind), e => Assert.Equal("rhs", Assert.IsType(e).Name), e => Assert.IsType(e), - e => Assert.Equal(":", Assert.IsType(e).Value), + e => Assert.Equal(TokenKind.Colon, Assert.IsType(Assert.Single(Assert.IsType(e).Value)).TokenKind), e => Assert.Equal("result", Assert.IsType(Assert.IsType(e).Operand).Name)); Assert.True(addiOp.HasCustomAssemblyFormat); }