From a6bb9cb6f0854adf6479a8eb6f370613da9b701f Mon Sep 17 00:00:00 2001 From: Dudi Date: Thu, 11 May 2023 15:13:48 +0300 Subject: [PATCH 01/28] init cs support --- Makefile | 3 + src/builders/cs.js | 324 ++++++++++++++++++++++++++ src/canopy.js | 1 + templates/cs/Actions.cs | 7 + templates/cs/CacheRecord.cs | 9 + templates/cs/Label.cs | 5 + templates/cs/ParseError.cs | 5 + templates/cs/Parser.cs | 65 ++++++ templates/cs/TreeNode.cs | 48 ++++ test/cs/ChoicesTest.cs | 131 +++++++++++ test/cs/NodeActionsTest.cs | 356 ++++++++++++++++++++++++++++ test/cs/PredicatesTest.cs | 176 ++++++++++++++ test/cs/QuantifiersTest.cs | 337 +++++++++++++++++++++++++++ test/cs/SequencesTest.cs | 395 ++++++++++++++++++++++++++++++++ test/cs/TerminalsTest.cs | 231 +++++++++++++++++++ test/cs/canopytest.csproj | 17 ++ test/cs/helpers/ElementsSpec.cs | 33 +++ test/cs/helpers/Node.cs | 15 ++ test/cs/helpers/NodeSpec.cs | 53 +++++ 19 files changed, 2211 insertions(+) create mode 100644 src/builders/cs.js create mode 100644 templates/cs/Actions.cs create mode 100644 templates/cs/CacheRecord.cs create mode 100644 templates/cs/Label.cs create mode 100644 templates/cs/ParseError.cs create mode 100644 templates/cs/Parser.cs create mode 100644 templates/cs/TreeNode.cs create mode 100644 test/cs/ChoicesTest.cs create mode 100644 test/cs/NodeActionsTest.cs create mode 100644 test/cs/PredicatesTest.cs create mode 100644 test/cs/QuantifiersTest.cs create mode 100644 test/cs/SequencesTest.cs create mode 100644 test/cs/TerminalsTest.cs create mode 100644 test/cs/canopytest.csproj create mode 100644 test/cs/helpers/ElementsSpec.cs create mode 100644 test/cs/helpers/Node.cs create mode 100644 test/cs/helpers/NodeSpec.cs diff --git a/Makefile b/Makefile index 1178f78..fbec4b1 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,9 @@ test-all: test-java test-js test-python test-ruby test-java: $(test_grammars:%.peg=%/Grammar.java) cd test/java && mvn clean test +test-java: $(test_grammars:%.peg=%/Grammar.cs) + cd test/cs && dotnet test + test-js: test/javascript/node_modules $(test_grammars:%.peg=%.js) cd test/javascript && npm test diff --git a/src/builders/cs.js b/src/builders/cs.js new file mode 100644 index 0000000..211941b --- /dev/null +++ b/src/builders/cs.js @@ -0,0 +1,324 @@ +'use strict' + +const { sep } = require('path') +const Base = require('./base') + +const TYPES = { + address: 'TreeNode', + chunk: 'String', + elements: 'List', + index: 'int', + max: 'int' +} + +class Builder extends Base { + constructor (...args) { + super(...args) + this._labels = new Set() + } + + _tab () { + return ' ' + } + + _initBuffer (pathname) { + let namespace = pathname.split(sep) + namespace.pop() + return 'package ' + namespace.join('.') + ';\n\n' + } + + _quote (string) { + string = string.replace(/\\/g, '\\\\') + .replace(/"/g, '\\"') + .replace(/\x08/g, '\\b') + .replace(/\t/g, '\\t') + .replace(/\n/g, '\\n') + .replace(/\f/g, '\\f') + .replace(/\r/g, '\\r') + + return '"' + string + '"' + } + + comment (lines) { + lines = lines.map((line) => ' * ' + line) + return ['/**'].concat(lines).concat([' */']) + } + + package_ (name, actions, block) { + this._grammarName = name + + this._newBuffer('cs', 'Actions') + this._template('cs', 'Actions.cs', { actions }) + + this._newBuffer('cs', 'CacheRecord') + this._template('cs', 'CacheRecord.cs') + + block() + } + + syntaxNodeClass_ () { + let name = 'TreeNode' + + this._newBuffer('cs', name) + this._template('cs', 'TreeNode.cs', { name }) + + return name + } + + grammarModule_ (block) { + this._newBuffer('cs', 'Grammar') + + this._line('using System.Collections') + this._line('using System.Collections.Generic') + //this._line('import java.util.List') in System.Collections.Generic + // this._line('import java.util.Map') gonna use dictionary + this._line('using System.Text.RegularExpressions') + this._newline() + + this._line('abstract class Grammar {', false) + this._indent(() => { + this.assign_('static TreeNode ' + this.nullNode_(), 'new TreeNode()') + this._newline() + + this._line('int inputSize, offset, failure') + this._line('String input') + this._line('List expected') + this._line('Dictionary> cache') + this._line('Actions actions') + this._newline() + block() + }) + this._line('}', false) + } + + compileRegex_ (charClass, name) { + let regex = charClass.regex, + source = regex.source.replace(/^\^/, '\\A') + this.assign_('private static Regex ' + name, 'new Regex(' + this._quote(source) + ')') + charClass.constName = name + } + + parserClass_ (root) { + this._newBuffer('cs', 'ParseError') + this._template('cs', 'ParseError.cs') + + let grammar = this._quote(this._grammarName) + let name = this._grammarName.replace(/\./g, '') + this._newBuffer('cs', name) + this._template('cs', 'Parser.cs', { grammar, root, name }) + + let labels = [...this._labels].sort() + + this._newBuffer('cs', 'Label') + this._template('cs', 'Label.cs', { labels }) + } + + class_ (name, parent, block) { + this._newline() + this._line('class ' + name + ' : ' + parent + ' {', false) + this._scope(block, name) + this._line('}', false) + } + + constructor_ (args, block) { + this._line(this._currentScope.name + '(String text, int offset, List elements) {', false) + this._indent(() => { + this._line('base(text, offset, elements)') + block() + }) + this._line('}', false) + } + + method_ (name, args, block) { + this._newline() + this._line('TreeNode ' + name + '() {', false) + this._scope(block) + this._line('}', false) + } + + cache_ (name, block) { + this._labels.add(name) + + let temp = this.localVars_({ address: this.nullNode_(), index: 'offset' }), + address = temp.address, + offset = temp.index + + this.assign_('Dictionary rule', 'cache[Label.' + name + ']') + this.if_('rule == null', () => { + this.assign_('rule', 'new Dictionary()') + this.assign_('cache[Label.' + name + ']',rule) + }) + this.if_('rule.ContainsKey(offset)', () => { + this.assign_(address, 'rule[offset].node') + this.assign_('offset', 'rule[offset].tail') + }, () => { + block(address) + this.assign_('rule[' + offset + ']', 'new CacheRecord(' + address + ', offset)') + }) + this._return(address) + } + + attribute_ (name, value) { + this._labels.add(name) + this.assign_('labelled[Label.' + name + ']', value) + } + + localVars_ (vars) { + let names = {} + for (let name in vars) + names[name] = this.localVar_(name, vars[name]) + return names + } + + localVar_ (name, value) { + let varName = this._varName(name) + + if (value === undefined) value = this.nullNode_() + this.assign_(TYPES[name] + ' ' + varName, value) + + return varName + } + + chunk_ (length) { + let input = 'input', + ofs = 'offset', + temp = this.localVars_({ chunk: this.null_(), max: ofs + ' + ' + length }) + + this.if_(temp.max + ' <= inputSize', () => { + this._line(temp.chunk + ' = ' + input + '.Substring(' + ofs + ', ' + temp.max + '- 1 - ' + ofs + ')') + }) + return temp.chunk + } + + syntaxNode_ (address, start, end, elements, action, nodeClass) { + let args + + if (action) { + action = 'actions.' + action + args = ['input', start, end] + } else { + action = 'new ' + (nodeClass || 'TreeNode') + args = ['input.Substring(' + start + ', ' + end + '- 1 - ' + start +')', start] + } + args.push(elements || this.emptyList_()) + + this.assign_(address, action + '(' + args.join(', ') + ')') + this.assign_('offset', end) + } + + ifNode_ (address, block, else_) { + this.if_(address + ' != ' + this.nullNode_(), block, else_) + } + + unlessNode_ (address, block, else_) { + this.if_(address + ' == ' + this.nullNode_(), block, else_) + } + + ifNull_ (elements, block, else_) { + this.if_(elements + ' == null', block, else_) + } + + extendNode_ (address, nodeType) { + // TODO + } + + failure_ (address, expected) { + let rule = this._quote(this._grammarName + '::' + this._ruleName) + expected = this._quote(expected) + + this.assign_(address, this.nullNode_()) + + this.if_('offset > failure', () => { + this.assign_('failure', 'offset') + this.assign_('expected', 'new ArrayList()') + }) + this.if_('offset == failure', () => { + this.append_('expected', 'new String[] { ' + rule + ', ' + expected + ' }') + }) + } + + jump_ (address, rule) { + this.assign_(address, '_read_' + rule + '()') + } + + _conditional (kwd, condition, block, else_) { + this._line(kwd + ' (' + condition + ') {', false) + this._indent(block) + if (else_) { + this._line('} else {', false) + this._indent(else_) + } + this._line('}', false) + } + + if_ (condition, block, else_) { + this._conditional('if', condition, block, else_) + } + + loop_ (block) { + this._conditional('while', 'true', block) + } + + break_ () { + this._line('break') + } + + sizeInRange_ (address, [min, max]) { + if (max === -1) { + return address + '.Count >= ' + min + } else if (max === 0) { + return address + '.Count == ' + min + } else { + return address + '.Count >= ' + min + ' && ' + address + '.Count <= ' + max + } + } + + stringMatch_ (expression, string) { + return expression + ' != null && ' + expression + '.Equals(' + this._quote(string) + ')' + } + + stringMatchCI_ (expression, string) { + return expression + ' != null && ' + expression + '.ToLower().Equals(' + this._quote(string) + '.ToLower())' + } + + regexMatch_ (regex, string) { + return string + ' != null && ' + regex + '.matcher(' + string + ').matches()' + } + + arrayLookup_ (expression, offset) { + return expression + '.GetValue(' + offset + ')' + } + + append_ (list, value, index) { + if (index === undefined) + this._line(list + '.Add(' + value + ')') + else + this._line(list + '.Insert(' + index + ', ' + value + ')') + } + + hasChars_ () { + return 'offset < inputSize' + } + + nullNode_ () { + return 'FAILURE' + } + + offset_ () { + return 'offset' + } + + emptyList_ (size) { + return 'new ArrayList(' + (size || '') + ')' + } + + _emptyString () { + return '""' + } + + null_ () { + return 'null' + } +} + +module.exports = Builder diff --git a/src/canopy.js b/src/canopy.js index f696dcc..ba57742 100644 --- a/src/canopy.js +++ b/src/canopy.js @@ -5,6 +5,7 @@ const Compiler = require('./compiler') module.exports = { builders: { java: require('./builders/java'), + cs: require('./builders/cs'), javascript: require('./builders/javascript'), python: require('./builders/python'), ruby: require('./builders/ruby') diff --git a/templates/cs/Actions.cs b/templates/cs/Actions.cs new file mode 100644 index 0000000..a0cbdd4 --- /dev/null +++ b/templates/cs/Actions.cs @@ -0,0 +1,7 @@ +using System.Collections.Generic; + +public interface Actions { +{{#each actions}} + public TreeNode {{this}}(String input, int start, int end, List elements); +{{/each}} +} diff --git a/templates/cs/CacheRecord.cs b/templates/cs/CacheRecord.cs new file mode 100644 index 0000000..7cb6c20 --- /dev/null +++ b/templates/cs/CacheRecord.cs @@ -0,0 +1,9 @@ +class CacheRecord { + TreeNode node; + int tail; + + CacheRecord(TreeNode node, int tail) { + this.node = node; + this.tail = tail; + } +} diff --git a/templates/cs/Label.cs b/templates/cs/Label.cs new file mode 100644 index 0000000..8eb7757 --- /dev/null +++ b/templates/cs/Label.cs @@ -0,0 +1,5 @@ +public enum Label { +{{#each labels}} + {{this}}{{#unless @last}},{{/unless}} +{{/each}} +} diff --git a/templates/cs/ParseError.cs b/templates/cs/ParseError.cs new file mode 100644 index 0000000..e53e134 --- /dev/null +++ b/templates/cs/ParseError.cs @@ -0,0 +1,5 @@ +public class ParseError : Exception { + public ParseError(String message) { + base(message); + } +} diff --git a/templates/cs/Parser.cs b/templates/cs/Parser.cs new file mode 100644 index 0000000..f86dffb --- /dev/null +++ b/templates/cs/Parser.cs @@ -0,0 +1,65 @@ +using System.Collections.Generic; +using System.Collections; + + +public class {{name}} extends Grammar { + public {{name}}(String input, Actions actions) { + this.input = input; + this.inputSize = input.Length; + this.actions = actions; + this.offset = 0; + this.cache = new Dictionary>(); + this.failure = 0; + this.expected = new ArrayList(); + } + + public static TreeNode parse(String input, Actions actions) { + {{name}} parser = new {{name}}(input, actions); + return parser.parse(); + } + + public static TreeNode parse(String input){ + return parse(input, null); + } + + private static String formatError(String input, int offset, List expected) { + String[] lines = input.Split("\n"); + int lineNo = 0, position = 0; + + while (position <= offset) { + position += lines[lineNo].Length + 1; + lineNo += 1; + } + + String line = lines[lineNo - 1]; + String message = "Line " + lineNo + ": expected one of:\n\n"; + + foreach (String[] pair in expected) { + message += " - " + pair[1] + " from " + pair[0] + "\n"; + } + + String number = "" + lineNo; + while (number.Length < 6) number = " " + number; + message += "\n" + number + " | " + line + "\n"; + + position -= line.Length + 10; + + while (position < offset) { + message += " "; + position += 1; + } + return message + "^"; + } + + private TreeNode parse(){ + TreeNode tree = _read_{{root}}(); + if (tree != FAILURE && offset == inputSize) { + return tree; + } + if (expected.Count <= 0) { + failure = offset; + expected.Add(new String[] { {{{grammar}}}, "" }); + } + throw new ParseError(formatError(input, failure, expected)); + } +} diff --git a/templates/cs/TreeNode.cs b/templates/cs/TreeNode.cs new file mode 100644 index 0000000..7e3c48f --- /dev/null +++ b/templates/cs/TreeNode.cs @@ -0,0 +1,48 @@ +using System.Collections.Generic; +using System.Collections; + + +public class {{name}} : IEnumerable<{{name}}> { + public String text; + public int offset; + public List<{{name}}> elements; + + Dictionary labelled; + + public {{name}}() { + this("", -1, new ArrayList<{{name}}>(0)); + } + + public {{name}}(String text, int offset, List<{{name}}> elements) { + this.text = text; + this.offset = offset; + this.elements = elements; + this.labelled = new Dictionary(); + } + + public {{name}} get(Label key) { + return labelled[key]; + } + + public IEnumerable<{{name}}> iterator() { + foreach(var items in elements) + { + // Returning the element after every iteration + yield return items; + } + } + + public IEnumerable<{{name}}> GetEnumerator() { + foreach(var items in elements) + { + // Returning the element after every iteration + yield return items; + } + } + System.Collections.IEnumerator + System.Collections.IEnumerable.GetEnumerator() + { + // Invoke IEnumerator GetEnumerator() above. + return GetEnumerator(); + } +} diff --git a/test/cs/ChoicesTest.cs b/test/cs/ChoicesTest.cs new file mode 100644 index 0000000..a00cfd3 --- /dev/null +++ b/test/cs/ChoicesTest.cs @@ -0,0 +1,131 @@ +//package canopy.choices; +using System.Collections.Generic; +using System.Collections; + +using Microsoft.VisualStudio.TestTools.UnitTesting; + +//import helpers.Node; +//import helpers.NodeSpec; + +using test.grammars.choices.Label; +using test.grammars.choices.ParseError; +using test.grammars.choices.Choices; +using test.grammars.choices.TreeNode; +[TestClass] +public class ChoiceStringsTest : ParseHelper { + [TestMethod] + void parsesAnyOfTheChoiceOptions(){ + expect(Choices.parse("choice-abc: a")).toMatch(node("a", 12)); + expect(Choices.parse("choice-abc: b")).toMatch(node("b", 12)); + expect(Choices.parse("choice-abc: c")).toMatch(node("c", 12)); + } + + [TestMethod] + [ExpectedException(typeof(ParseError), + "Expected a ParseError")] + void rejectsInputMatchingNoneOfTheOptions() { + Choices.parse("choice-abc: d"); + } + + [TestMethod] + [ExpectedException(typeof(ParseError), + "Expected a ParseError")] + void rejectsSuperstringsOfTheOptions() { + Choices.parse("choice-abc: ab"); + } + + [TestMethod] + void parsesAChoiceAsPartOfASequence(){ + expect(Choices.parse("choice-seq: repeat")).toMatch( + node("repeat", 12) + .elem(node("re", 12).noElems()) + .elem(node("peat", 14).noElems()) + ); + } + + [TestMethod] + [ExpectedException(typeof(ParseError), + "Expected a ParseError")] + void doesNotBacktrackIfLaterRulesFail() { + Choices.parse("choice-seq: reppeat"); + } +} + +class ChoiceRepetitionTest : ParseHelper { + [TestMethod] + void parsesADifferentOptionOnEachIteration(){ + expect(Choices.parse("choice-rep: abcabba")).toMatch( + node("abcabba", 12) + .elem(node("a", 12).noElems()) + .elem(node("b", 13).noElems()) + .elem(node("c", 14).noElems()) + .elem(node("a", 15).noElems()) + .elem(node("b", 16).noElems()) + .elem(node("b", 17).noElems()) + .elem(node("a", 18).noElems()) + ); + } + + [TestMethod] + [ExpectedException(typeof(ParseError), + "Expected a ParseError")] + void rejectsIfAnyIterationDoesNotMatchTheOptions() { + Choices.parse("choice-rep: abcadba"); + } +} + +class ChoiceSequenceTest : ParseHelper { + [TestMethod] + void parsesOneBranchOfTheChoice(){ + expect(Choices.parse("choice-bind: ab")).toMatch( + node("ab", 13) + .elem(node("a", 13).noElems()) + .elem(node("b", 14).noElems()) + ); + } + + [TestMethod] + [ExpectedException(typeof(ParseError), + "Expected a ParseError")] + void testBindsSequencesTighterThanChoices() { + Choices.parse("choice-bind: abef"); + } +} + +class ParseHelper { + Node