diff --git a/examples/simple/math.js b/examples/simple/math.js index 020e32b..f5fe0ff 100644 --- a/examples/simple/math.js +++ b/examples/simple/math.js @@ -11,8 +11,8 @@ export function multiply(a, b) { } export function divide(a, b) { -// if (b === 0) { -// throw new Error('Cannot divide by zero'); -// } + if (b === 0) { + throw new Error('Cannot divide by zero'); + } return a / b; } \ No newline at end of file diff --git a/examples/tricky/index.js b/examples/tricky/index.js index 7b953c5..98329ac 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -1,3 +1,31 @@ -({ - FETCH_CONTEXT: () => (/* binding */ FETCH_CONTEXT), -}); \ No newline at end of file +////OK +//(x=>3); +//(x=>({})); +//((x)=>3); +//((x)=>({})); +//(x)=>{}; +//(x,y,z)=>({}); +//(x,y) => 3; +//x=>3; +//[x=>3]; +//x=>({}); +//(x=>3)(3); +//x=>{}; +//async function* foo(a, b, ...c) {} +//function* foo(a) {} +//(function foo() {}()) +//((x)=>3)(3); +//(x,y,z,a,b,c,d)=>3; +//(function() {}) +//({ x: y => 3 }); +// +//// ERROR +////function foo(a=1) {} +//{ x: y => 3 }; + + +//for (let i=0; i, - pub comments: Vec, + pub source_type: SourceType, } #[derive(Debug, Clone, PartialEq)] @@ -11,183 +20,266 @@ pub enum SourceType { Module, } -#[derive(Debug, Clone)] +/// Represents a JavaScript statement +#[derive(Debug, Clone, PartialEq)] pub enum Statement { - Empty, - Block(Vec), - Expression(Expression), - If { - test: Expression, - consequent: Box, - alternate: Option>, - }, - Loop(LoopStatement), + ExpressionStatement(ExpressionStatement), + BlockStatement(BlockStatement), + EmptyStatement, + DebuggerStatement, + WithStatement(WithStatement), + ReturnStatement(ReturnStatement), + LabeledStatement(LabeledStatement), + BreakStatement(BreakStatement), + ContinueStatement(ContinueStatement), + IfStatement(IfStatement), + SwitchStatement(SwitchStatement), + ThrowStatement(ThrowStatement), + TryStatement(TryStatement), + WhileStatement(WhileStatement), + DoWhileStatement(DoWhileStatement), + ForStatement(ForStatement), + ForInStatement(ForInStatement), + ForOfStatement(ForOfStatement), Declaration(Declaration), - Return(Option), - Labeled { - label: Box, - body: Box, - }, - Break(Option>), - Continue(Option>), - Try { - block: Box, - handler: Option, - finalizer: Option>, - }, - Throw(Expression), - Switch { - discriminant: Expression, - cases: Vec, - }, - Import { - specifiers: Vec, - source: Box, - assertions: Vec, - }, - Export(ExportDeclaration), - With { - object: Expression, - body: Box, - }, - Debugger, -} - -#[derive(Debug, Clone)] -pub enum LoopStatement { - While { - test: Expression, - body: Box, - }, - DoWhile { - body: Box, - test: Expression, - }, - For { - init: Option, - test: Option, - update: Option, - body: Box, - }, - ForIn { - left: ForInOfLeft, - right: Expression, - body: Box, - }, - ForOf { - left: ForInOfLeft, - right: Expression, - body: Box, - is_await: bool, - }, -} - -#[derive(Debug, Clone)] -pub enum ForInOfLeft { - Declaration(VariableDeclaration), - Pattern(Expression), -} - -#[derive(Debug, Clone)] +} + +/// Represents a JavaScript expression +#[derive(Debug, Clone, PartialEq)] +pub enum Expression { + Identifier(Identifier), + Literal(Literal), + ThisExpression(ThisExpression), + ArrayExpression(ArrayExpression), + ObjectExpression(ObjectExpression), + FunctionExpression(FunctionExpression), + ArrowFunctionExpression(ArrowFunctionExpression), + ClassExpression(ClassExpression), + TaggedTemplateExpression(TaggedTemplateExpression), + MemberExpression(MemberExpression), + SuperExpression(SuperExpression), + MetaProperty(MetaProperty), + NewExpression(NewExpression), + CallExpression(CallExpression), + UpdateExpression(UpdateExpression), + AwaitExpression(AwaitExpression), + UnaryExpression(UnaryExpression), + BinaryExpression(BinaryExpression), + LogicalExpression(LogicalExpression), + ConditionalExpression(ConditionalExpression), + YieldExpression(YieldExpression), + AssignmentExpression(AssignmentExpression), + SequenceExpression(SequenceExpression), +} + +/// Represents a JavaScript declaration +#[derive(Debug, Clone, PartialEq)] pub enum Declaration { - Variable(VariableDeclaration), - Function(FunctionDeclaration), - Class(ClassDeclaration), + VariableDeclaration(VariableDeclaration), + FunctionDeclaration(FunctionDeclaration), + ClassDeclaration(ClassDeclaration), + ImportDeclaration(ImportDeclaration), + ExportNamedDeclaration(ExportNamedDeclaration), + ExportDefaultDeclaration(ExportDefaultDeclaration), + ExportAllDeclaration(ExportAllDeclaration), } -#[derive(Debug, Clone)] -pub struct VariableDeclaration { - pub declarations: Vec, - pub kind: VariableKind, +/// Represents a JavaScript pattern (destructuring) +#[derive(Debug, Clone, PartialEq)] +pub enum Pattern { + Identifier(Identifier), + ObjectPattern(ObjectPattern), + ArrayPattern(ArrayPattern), + RestElement(RestElement), + AssignmentPattern(AssignmentPattern), + MemberExpression(MemberExpression), } -#[derive(Debug, Clone)] -pub struct FunctionDeclaration { - pub id: Box, - pub params: Vec, - pub body: Vec, - pub is_async: bool, - pub is_generator: bool, +/// Represents an identifier +#[derive(Debug, Clone, PartialEq)] +pub struct Identifier { + + pub name: Box, } -#[derive(Debug, Clone)] -pub struct ClassDeclaration { - pub id: Box, - pub super_class: Option, - pub body: Vec, -} - -#[derive(Debug, Clone)] -pub enum ClassMember { - Constructor { - params: Vec, - body: Vec, - }, - Method { - key: PropertyKey, - value: MethodDefinition, - kind: MethodKind, - is_static: bool, - }, - Property { - key: PropertyKey, - value: Option, - is_static: bool, - }, - StaticBlock { - body: Vec, - }, -} - -#[derive(Debug, Clone)] -pub struct MethodDefinition { - pub params: Vec, +#[derive(Debug, Clone, PartialEq)] +pub struct PrivateIdentifier { + pub name: Box, +} + +/// Represents a literal value +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + StringLiteral(StringLiteral), + BooleanLiteral(BooleanLiteral), + UndefinedLiteral(UndefinedLiteral), + NullLiteral(NullLiteral), + NumericLiteral(NumericLiteral), + BigIntLiteral(BigIntLiteral), + RegExpLiteral(RegExpLiteral), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StringLiteral { + pub value: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BooleanLiteral { + pub value: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NullLiteral { + +} + +#[derive(Debug, Clone, PartialEq)] +pub struct UndefinedLiteral { + +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NumericLiteral { + pub value: f64, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BigIntLiteral { + pub value: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct RegExpLiteral { + pub pattern: Box, + pub flags: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BlockStatement { pub body: Vec, - pub is_async: bool, - pub is_generator: bool, } -#[derive(Debug, Clone)] -pub enum PropertyKey { - Identifier(Box), - StringLiteral(Box), - NumericLiteral(f64), - Computed(Expression), - PrivateIdentifier(Box), +#[derive(Debug, Clone, PartialEq)] +pub struct ExpressionStatement { + pub expression: Box, } #[derive(Debug, Clone, PartialEq)] -pub enum MethodKind { - Method, - Getter, - Setter, +pub struct WithStatement { + pub object: Box, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ReturnStatement { + pub argument: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct LabeledStatement { + pub label: Identifier, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BreakStatement { + pub label: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ContinueStatement { + pub label: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct IfStatement { + pub test: Box, + pub consequent: Box, + pub alternate: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SwitchStatement { + pub discriminant: Box, + pub cases: Vec, } -#[derive(Debug, Clone)] -pub enum ExportDeclaration { - Named { - declaration: Option>, - specifiers: Vec, - source: Option>, - }, - Default(Box), - All { - source: Box, - exported: Option>, - }, +#[derive(Debug, Clone, PartialEq)] +pub struct SwitchCase { + pub test: Option>, + pub consequent: Vec, } -#[derive(Debug, Clone)] -pub enum ExportDefaultDeclaration { - Expression(Expression), - Function(FunctionDeclaration), - Class(ClassDeclaration), +#[derive(Debug, Clone, PartialEq)] +pub struct ThrowStatement { + pub argument: Box, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] +pub struct TryStatement { + pub block: BlockStatement, + pub handler: Option, + pub finalizer: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CatchClause { + pub param: Option, + pub body: BlockStatement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct WhileStatement { + pub test: Box, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct DoWhileStatement { + pub body: Box, + pub test: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForStatement { + pub init: Option, + pub test: Option>, + pub update: Option>, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] pub enum ForInit { - Variable(VariableDeclaration), - Expression(Expression), + VariableDeclaration(VariableDeclaration), + Expression(Box), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForInStatement { + pub left: ForInOf, + pub right: Box, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForOfStatement { + pub left: ForInOf, + pub right: Box, + pub body: Box, + pub await_token: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ForInOf { + VariableDeclaration(VariableDeclaration), + Pattern(Pattern), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VariableDeclaration { + pub declarations: Vec, + pub kind: VariableKind, } #[derive(Debug, Clone, PartialEq)] @@ -197,254 +289,223 @@ pub enum VariableKind { Const, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct VariableDeclarator { pub id: Pattern, - pub init: Option, + pub init: Option>, } -#[derive(Debug, Clone)] -pub enum Pattern { - Identifier(Box), - ObjectPattern(Vec), - ArrayPattern(Vec>), - RestElement(Box), - AssignmentPattern { - left: Box, - right: Expression, - }, +#[derive(Debug, Clone, PartialEq)] +pub struct ThisExpression { + } -#[derive(Debug, Clone)] -pub enum ObjectPatternProperty { - Property { - key: PropertyKey, - value: Pattern, - computed: bool, - shorthand: bool, - }, - Rest(Box), +#[derive(Debug, Clone, PartialEq)] +pub struct ArrayExpression { + pub elements: Vec>, } -#[derive(Debug, Clone)] -pub struct CatchClause { - pub param: Option, - pub body: Box, +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectExpression { + pub properties: Vec, } -#[derive(Debug, Clone)] -pub struct SwitchCase { - pub test: Option, - pub consequent: Vec, +#[derive(Debug, Clone, PartialEq)] +pub struct Property { + pub key: PropertyKey, + pub value: Box, + pub kind: PropertyKind, + pub method: bool, + pub shorthand: bool, + pub computed: bool, } -#[derive(Debug, Clone)] -pub enum ImportSpecifier { - Named { - imported: Box, - local: Box, - }, - Default(Box), - Namespace(Box), +#[derive(Debug, Clone, PartialEq)] +pub enum PropertyKey { + Identifier(Identifier), + PrivateIdentifier(PrivateIdentifier), + Literal(Literal), + Expression(Box), } -#[derive(Debug, Clone)] -pub struct ImportAssertion { - pub key: Box, - pub value: Box, +#[derive(Debug, Clone, PartialEq)] +pub enum PropertyKind { + Init, + Get, + Set, } -#[derive(Debug, Clone)] -pub struct ExportSpecifier { - pub local: Box, - pub exported: Box, +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionExpression { + pub id: Option, + pub params: Vec, + pub body: BlockStatement, + pub generator: bool, + pub async_function: bool, } -#[derive(Debug, Clone)] -pub enum Expression { - Identifier(Box), - This, - Super, - Literal(Literal), - Array(Vec>), - Object(Vec), - Function { - id: Option>, - params: Vec, - body: Vec, - is_async: bool, - is_generator: bool, - }, - ArrowFunction { - params: Vec, - body: ArrowFunctionBody, - is_async: bool, - }, - Class { - id: Option>, - super_class: Option>, - body: Vec, - }, - Unary { - operator: UnaryOperator, - argument: Box, - prefix: bool, - }, - Binary { - operator: BinaryOperator, - left: Box, - right: Box, - }, - Logical { - operator: LogicalOperator, - left: Box, - right: Box, - }, - Assignment { - operator: AssignmentOperator, - left: Box, - right: Box, - }, - Member { - object: Box, - property: Box, - computed: bool, - optional: bool, - }, - Call { - callee: Box, - arguments: Vec, - optional: bool, - }, - New { - callee: Box, - arguments: Vec, - }, - Conditional { - test: Box, - consequent: Box, - alternate: Box, - }, - TemplateLiteral { - quasis: Vec>, - expressions: Vec, - }, - TaggedTemplate { - tag: Box, - quasi: Box, - }, - Sequence(Vec), - Spread(Box), - Yield { - argument: Option>, - delegate: bool, - }, - Await(Box), - OptionalChain { - base: Box, - chain: Vec, - }, - Import(Box), - MetaProperty { - meta: Box, - property: Box, - }, - PrivateName(Box), - ChainExpression(Box), -} - -#[derive(Debug, Clone)] -pub enum ArrayElement { - Expression(Expression), - Spread(Expression), - Hole, -} - -#[derive(Debug, Clone)] -pub enum Argument { - Expression(Expression), - Spread(Expression), -} - -#[derive(Debug, Clone)] -pub enum OptionalChainElement { - Property { - name: Box, - computed: bool, - }, - Call { - arguments: Vec, - }, -} - -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] +pub struct ArrowFunctionExpression { + pub params: Vec, + pub body: ArrowFunctionBody, + pub expression: bool, + pub async_function: bool, +} + +#[derive(Debug, Clone, PartialEq)] pub enum ArrowFunctionBody { - Block(Vec), + BlockStatement(BlockStatement), Expression(Box), } -#[derive(Debug, Clone)] -pub enum ObjectProperty { - Property { - key: PropertyKey, - value: Expression, - kind: PropertyKind, - computed: bool, - shorthand: bool, - }, - Method { - key: PropertyKey, - value: MethodDefinition, - kind: MethodKind, - computed: bool, - }, - Spread(Expression), +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionDeclaration { + pub id: Option, + pub params: Vec, + pub body: BlockStatement, + pub generator: bool, + pub async_function: bool, } #[derive(Debug, Clone, PartialEq)] -pub enum PropertyKind { - Init, +pub struct ClassDeclaration { + pub id: Option, + pub super_class: Option>, + pub body: ClassBody, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ClassExpression { + pub id: Option, + pub super_class: Option>, + pub body: ClassBody, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ClassBody { + pub body: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ClassElement { + MethodDefinition(MethodDefinition), + StaticBlock(StaticBlock), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StaticBlock { + pub body: BlockStatement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MethodDefinition { + pub key: PropertyKey, + pub value: FunctionExpression, + pub kind: MethodKind, + pub computed: bool, + pub static_method: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum MethodKind { + Constructor, + Method, Get, Set, } -#[derive(Debug, Clone)] -pub enum Literal { - Number(f64), - String(Box), - Boolean(bool), - Null, - Undefined, - RegExp { - pattern: Box, - flags: Box, - }, - BigInt(Box), -} - -#[derive(Clone)] -pub struct Comment { - pub text: Box, - pub is_block: bool, - pub span: (u32, u32), +#[derive(Debug, Clone, PartialEq)] +pub struct TaggedTemplateExpression { + pub tag: Box, + pub quasi: TemplateLiteral, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateLiteral { + pub quasis: Vec, + pub expressions: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateElement { + pub value: TemplateElementValue, + pub tail: bool, } -impl std::fmt::Debug for Comment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Comment") - .field("text", &self.text) - .field("is_block", &self.is_block) - .field("span", &self.span) - .finish() - } +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateElementValue { + pub raw: Box, + pub cooked: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MemberExpression { + pub object: Box, + pub property: MemberProperty, + pub computed: bool, + pub optional: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum MemberProperty { + Identifier(Identifier), + Expression(Box), + PrivateIdentifier(PrivateIdentifier), // TODO implement +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SuperExpression { + +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MetaProperty { + pub meta: Identifier, + pub property: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NewExpression { + pub callee: Box, + pub arguments: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CallExpression { + pub callee: Box, + pub arguments: Vec, + pub optional: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct UpdateExpression { + pub operator: UpdateOperator, + pub argument: Box, + pub prefix: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum UpdateOperator { + Increment, + Decrement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AwaitExpression { + pub argument: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct UnaryExpression { + pub operator: UnaryOperator, + pub argument: Box, + pub prefix: bool, } #[derive(Debug, Clone, PartialEq)] pub enum UnaryOperator { Minus, Plus, - Increment, - Decrement, Not, BitwiseNot, Typeof, @@ -452,32 +513,46 @@ pub enum UnaryOperator { Delete, } +#[derive(Debug, Clone, PartialEq)] +pub struct BinaryExpression { + pub operator: BinaryOperator, + pub left: Box, + pub right: Box, +} + #[derive(Debug, Clone, PartialEq)] pub enum BinaryOperator { - Add, - Subtract, - Multiply, - Divide, - Modulo, - Exponent, Equal, - StrictEqual, NotEqual, + StrictEqual, StrictNotEqual, LessThan, - LessThanEqual, + LessThanOrEqual, GreaterThan, - GreaterThanEqual, - BitwiseAnd, - BitwiseOr, - BitwiseXor, + GreaterThanOrEqual, LeftShift, RightShift, UnsignedRightShift, + Addition, + Subtraction, + Multiplication, + Division, + Remainder, + Exponentiation, + BitwiseOr, + BitwiseXor, + BitwiseAnd, In, InstanceOf, } +#[derive(Debug, Clone, PartialEq)] +pub struct LogicalExpression { + pub operator: LogicalOperator, + pub left: Box, + pub right: Box, +} + #[derive(Debug, Clone, PartialEq)] pub enum LogicalOperator { And, @@ -485,22 +560,155 @@ pub enum LogicalOperator { NullishCoalescing, } +#[derive(Debug, Clone, PartialEq)] +pub struct ConditionalExpression { + pub test: Box, + pub consequent: Box, + pub alternate: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct YieldExpression { + + pub argument: Option>, + pub delegate: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AssignmentExpression { + pub operator: AssignmentOperator, + pub left: AssignmentLeft, + pub right: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum AssignmentLeft { + Pattern(Pattern), + Expression(Box), +} + #[derive(Debug, Clone, PartialEq)] pub enum AssignmentOperator { Assign, - AddAssign, - SubtractAssign, + PlusAssign, + MinusAssign, MultiplyAssign, DivideAssign, - ModuloAssign, - ExponentAssign, - BitwiseAndAssign, - BitwiseOrAssign, - BitwiseXorAssign, + RemainderAssign, + ExponentiationAssign, LeftShiftAssign, RightShiftAssign, UnsignedRightShiftAssign, - LogicalAndAssign, + BitwiseOrAssign, + BitwiseXorAssign, + BitwiseAndAssign, LogicalOrAssign, - NullishAssign, + LogicalAndAssign, + NullishCoalescingAssign, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SequenceExpression { + pub expressions: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectPattern { + pub properties: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ObjectPatternProperty { + Property(ObjectProperty), + RestElement(RestElement), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectProperty { + pub key: PropertyKey, + pub value: Pattern, + pub computed: bool, + pub shorthand: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ArrayPattern { + pub elements: Vec>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct RestElement { + pub argument: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AssignmentPattern { + pub left: Box, + pub right: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportDeclaration { + pub specifiers: Vec, + pub source: StringLiteral, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ImportSpecifier { + ImportSpecifier(NamedImportSpecifier), + ImportDefaultSpecifier(ImportDefaultSpecifier), + ImportNamespaceSpecifier(ImportNamespaceSpecifier), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NamedImportSpecifier { + pub imported: Identifier, + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportDefaultSpecifier { + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportNamespaceSpecifier { + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportNamedDeclaration { + pub declaration: Option>, + pub specifiers: Vec, + pub source: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportSpecifier { + pub exported: Identifier, + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportDefaultDeclaration { + pub declaration: ExportDefaultDeclarationKind, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ExportDefaultDeclarationKind { + Declaration(Box), + Expression(Box), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportAllDeclaration { + pub source: StringLiteral, + pub exported: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Comment { + pub text: String, + pub multiline: bool, + pub location: SourceLocation, } diff --git a/src/grammar/array.rs b/src/grammar/array.rs new file mode 100644 index 0000000..a830b01 --- /dev/null +++ b/src/grammar/array.rs @@ -0,0 +1,101 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct ArrayExpressionNode; + +impl ArrayExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrayExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBracket, "Expected '[' at the start of array expression")?; + + let mut elements = Vec::new(); + + if parser.check(&Token::RightBracket) { + parser.advance(); + return Ok(ArrayExpression { elements }); + } + + let expr_parser = ExpressionNode::new(); + + loop { + if parser.check(&Token::Comma) { + parser.advance(); + elements.push(None); + } else if !parser.check(&Token::RightBracket) { + let element = expr_parser.parse_with_precedence(parser, Precedence::Comma.next())?; + elements.push(Some(element)); + + if !parser.check(&Token::Comma) { + break; + } + + parser.advance(); + + if parser.check(&Token::RightBracket) { + break; + } + } else { + break; + } + } + + parser.assert_consume(&Token::RightBracket, "Expected ']' at the end of array expression")?; + + Ok(ArrayExpression { elements }) + } +} + +impl UnparserCombinator for ArrayExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrayExpression) { + unparser.write_char('['); + + if !node.elements.is_empty() { + let multiline = node.elements.len() > 5; + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + match elem { + Some(expr) => { + ExpressionNode::new().unparse(u, expr); + }, + None => { + } + } + } + }); + unparser.newline(); + } else { + unparser.space(); + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + match elem { + Some(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + None => { + } + } + } + unparser.space(); + } + } + + unparser.write_char(']'); + } +} diff --git a/src/grammar/await_expression.rs b/src/grammar/await_expression.rs new file mode 100644 index 0000000..9037fa9 --- /dev/null +++ b/src/grammar/await_expression.rs @@ -0,0 +1,37 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct AwaitExpressionNode; + +impl AwaitExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for AwaitExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + if !parser.allows_await() { + return Err(parser.error_at_current("'await' expressions are only allowed within async functions and modules")); + } + + parser.assert_consume(&Token::Await, "Expected 'await'")?; + + let argument = Box::new(ExpressionNode::new().parse(parser)?); + + Ok(AwaitExpression { + argument, + }) + } +} + +impl UnparserCombinator for AwaitExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &AwaitExpression) { + unparser.write_str("await"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &node.argument); + } +} \ No newline at end of file diff --git a/src/grammar/class.rs b/src/grammar/class.rs new file mode 100644 index 0000000..30c2e66 --- /dev/null +++ b/src/grammar/class.rs @@ -0,0 +1,418 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; +use super::pattern::*; +use super::statement::*; +use super::function::*; +use super::literal::*; + +pub struct ClassDeclarationNode; + +impl ClassDeclarationNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassDeclarationNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Class, "Expected 'class'")?; + + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + let super_class = if parser.consume(&Token::Extends) { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + } else { + None + }; + + let body = ClassBodyNode::new().parse(parser)?; + + Ok(ClassDeclaration { + id, + super_class, + body, + }) + } +} + +impl UnparserCombinator for ClassDeclarationNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassDeclaration) { + unparser.write_str("class"); + + // Write the class name if present + if let Some(id) = &node.id { + unparser.space(); + unparser.write_str(&id.name); + } + + // Write the extends clause if present + if let Some(super_class) = &node.super_class { + unparser.space(); + unparser.write_str("extends"); + unparser.space(); + ExpressionNode::new().unparse(unparser, super_class); + } + + unparser.space(); + + // Write the class body + ClassBodyNode::new().unparse(unparser, &node.body); + } +} + +pub struct ClassExpressionNode; + +impl ClassExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Class, "Expected 'class'")?; + + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + let super_class = if parser.consume(&Token::Extends) { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + } else { + None + }; + + let body = ClassBodyNode::new().parse(parser)?; + + Ok(ClassExpression { + id, + super_class, + body, + }) + } +} + +impl UnparserCombinator for ClassExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassExpression) { + unparser.write_str("class"); + + // Write the class name if present + if let Some(id) = &node.id { + unparser.space(); + unparser.write_str(&id.name); + } + + // Write the extends clause if present + if let Some(super_class) = &node.super_class { + unparser.space(); + unparser.write_str("extends"); + unparser.space(); + ExpressionNode::new().unparse(unparser, super_class); + } + + unparser.space(); + + // Write the class body + ClassBodyNode::new().unparse(unparser, &node.body); + } +} + +pub struct SuperExpressionNode; + +impl SuperExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for SuperExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Super, "Expected 'super'")?; + + Ok(SuperExpression {}) + } +} + +impl UnparserCombinator for SuperExpressionNode { + fn unparse(&self, unparser: &mut Unparser, _node: &SuperExpression) { + unparser.write_str("super"); + } +} + +pub struct ClassBodyNode; + +impl ClassBodyNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassBodyNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBrace, "Expected '{' after class declaration")?; + + let mut body = Vec::new(); + + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Check for static block + if parser.consume(&Token::Static) && parser.check(&Token::LeftBrace) { + let static_block = StaticBlockNode::new().parse(parser)?; + body.push(ClassElement::StaticBlock(static_block)); + continue; + } + + // Parse method definition + let method = MethodDefinitionNode::new().parse(parser)?; + body.push(ClassElement::MethodDefinition(method)); + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after class body")?; + + Ok(ClassBody { body }) + } +} + +impl UnparserCombinator for ClassBodyNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassBody) { + unparser.write_char('{'); + + if !node.body.is_empty() { + unparser.newline(); + + unparser.with_indent(|u| { + for element in &node.body { + match element { + ClassElement::MethodDefinition(method) => { + MethodDefinitionNode::new().unparse(u, method); + }, + ClassElement::StaticBlock(static_block) => { + u.write_str("static"); + u.space(); + StaticBlockNode::new().unparse(u, static_block); + } + } + u.newline(); + } + }); + } + + unparser.write_char('}'); + } +} + +pub struct MethodDefinitionNode; + +impl MethodDefinitionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for MethodDefinitionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for static modifier + let static_method = parser.consume(&Token::Static); + + // Check for method kind + let mut kind = MethodKind::Method; + if parser.consume(&Token::Get) { + kind = MethodKind::Get; + } else if parser.consume(&Token::Set) { + kind = MethodKind::Set; + } else if parser.consume(&Token::Constructor) { + kind = MethodKind::Constructor; + } + + // Check for async modifier + let async_method = parser.consume(&Token::Async); + + // Check for generator modifier + let generator = parser.consume(&Token::Star); + + // Parse the key + let (key, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property name + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property name")?; + (PropertyKey::Expression(Box::new(expr)), true) + } else if parser.check(&Token::Hash) { + // Private field or method + parser.advance(); // Consume the '#' + // TODO what about string literal? + if let Token::Identifier(name) = parser.peek() { + // Clone the name before advancing the parser + let name_clone = name.clone(); + + // Now advance the parser + parser.advance(); + + (PropertyKey::PrivateIdentifier(PrivateIdentifier { name: name_clone.into() }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '#'")); + } + } else { + // Regular identifier or literal + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + (PropertyKey::Literal(literal), false) + }, + _ => { + let ident = IdentifierNode::new().parse(parser)?; + (PropertyKey::Identifier(ident), false) + } + } + }; + + // Parse the function body + parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; + + // Create a function expression for the method + let mut func_expr = FunctionExpressionNode::new().parse(parser)?; + func_expr.generator = generator; + func_expr.async_function = async_method; + + Ok(MethodDefinition { + key, + value: func_expr, + kind, + computed, + static_method, + }) + } +} + +impl UnparserCombinator for MethodDefinitionNode { + fn unparse(&self, unparser: &mut Unparser, node: &MethodDefinition) { + // Write static modifier if present + if node.static_method { + unparser.write_str("static"); + unparser.space(); + } + + // Write method kind + match node.kind { + MethodKind::Constructor => { + unparser.write_str("constructor"); + }, + MethodKind::Method => { + // For async methods + if node.value.async_function { + unparser.write_str("async"); + unparser.space(); + } + + // For generator methods + if node.value.generator { + unparser.write_char('*'); + } + }, + MethodKind::Get => { + unparser.write_str("get"); + unparser.space(); + }, + MethodKind::Set => { + unparser.write_str("set"); + unparser.space(); + } + } + + // Write the method key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::PrivateIdentifier(id) => { + unparser.write_char('#'); + unparser.write_str(&id.name); + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + }, + PropertyKey::PrivateIdentifier(id) => { + unparser.write_char('#'); + unparser.write_str(&id.name); + } + } + } + + // Write the method parameters and body + unparser.write_char('('); + + // Write parameters + if !node.value.params.is_empty() { + for (i, param) in node.value.params.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + unparser.space(); + + // Write the method body + BlockStatementNode::new().unparse(unparser, &node.value.body); + } +} + +pub struct StaticBlockNode; + +impl StaticBlockNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for StaticBlockNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let block = BlockStatementNode::new().parse(parser)?; + + Ok(StaticBlock { body: block }) + } +} + +impl UnparserCombinator for StaticBlockNode { + fn unparse(&self, unparser: &mut Unparser, node: &StaticBlock) { + BlockStatementNode::new().unparse(unparser, &node.body); + } +} diff --git a/src/grammar/declaration.rs b/src/grammar/declaration.rs new file mode 100644 index 0000000..3f8c945 --- /dev/null +++ b/src/grammar/declaration.rs @@ -0,0 +1,873 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::literal::*; +use super::pattern::*; +use super::expression::*; +use super::statement::*; +use super::class::*; + +pub struct VariableDeclarationNode; + +impl VariableDeclarationNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for VariableDeclarationNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Parse variable kind + let kind = if parser.consume(&Token::Var) { + VariableKind::Var + } else if parser.consume(&Token::Let) { + VariableKind::Let + } else if parser.consume(&Token::Const) { + VariableKind::Const + } else { + return Err(parser.error_at_current("Expected variable declaration")); + }; + + let mut declarations = Vec::new(); + + // For let/const, track declared identifiers to enforce TDZ + let mut declared_identifiers = Vec::new(); + + // Parse first declarator + let declarator = VariableDeclaratorNode::new().parse(parser)?; + + // TODO not checking TDZ ? + + // For let/const, collect identifiers for TDZ enforcement + if matches!(kind, VariableKind::Let | VariableKind::Const) { + collect_binding_identifiers(&declarator.id, &mut declared_identifiers); + } + + declarations.push(declarator); + + // Parse additional declarators + while parser.consume(&Token::Comma) { + let declarator = VariableDeclaratorNode::new().parse(parser)?; + + // For let/const, collect identifiers and check TDZ + if matches!(kind, VariableKind::Let | VariableKind::Const) { + // If there's an initializer, check that it doesn't reference any of the declared identifiers + if let Some(ref init) = declarator.init { + check_tdz_violation(init, &declared_identifiers, parser)?; + } + + // Add new identifiers to the list + collect_binding_identifiers(&declarator.id, &mut declared_identifiers); + } + + declarations.push(declarator); + } + + Ok(VariableDeclaration { + declarations, + kind, + }) + } +} + +impl UnparserCombinator for VariableDeclarationNode { + fn unparse(&self, unparser: &mut Unparser, node: &VariableDeclaration) { + // Write the variable kind (var, let, const) + match node.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations + if !node.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &node.declarations[0]); + + // Remaining declarations + for decl in &node.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, decl); + } + } + + // Add semicolon + unparser.write_char(';'); + } +} + + +/// Parser for variable declarators +pub struct VariableDeclaratorNode; + +impl VariableDeclaratorNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for VariableDeclaratorNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Parse the identifier or pattern + let id = PatternNode::new().parse(parser)?; + + // Parse the initializer if present + let init = if parser.consume(&Token::Equal) { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + } else { + None + }; + + Ok(VariableDeclarator { + id, + init, + }) + } +} + +/// Parser for function declarations +pub struct FunctionDeclarationParser; + +impl FunctionDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for FunctionDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if this is an async function + let async_function = parser.consume(&Token::Async); + + // Consume the 'function' keyword + parser.assert_consume(&Token::Function, "Expected 'function'")?; + + // Check if this is a generator function + let generator = parser.consume(&Token::Star); + + // Parse the function name + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Parse the parameter list + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut params = Vec::new(); + + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternNode::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + params.push(PatternNode::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: generator }, |p| { + BlockStatementNode::new().parse(p) + })?; + + Ok(FunctionDeclaration { + id, + params, + body, + generator, + async_function, + }) + } +} + +/// Enum for export declarations +pub enum ExportDeclaration { + Named(ExportNamedDeclaration), + Default(ExportDefaultDeclaration), + All(ExportAllDeclaration), +} + +/// Parser for export declarations +pub struct ExportDeclarationParser; + +impl ExportDeclarationParser { + pub fn new() -> Self { + Self + } +} + +// Add these new parser structs for each export type +pub struct ExportNamedDeclarationParser; +pub struct ExportDefaultDeclarationParser; +pub struct ExportAllDeclarationParser; + +impl ExportNamedDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ExportDefaultDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ExportAllDeclarationParser { + pub fn new() -> Self { + Self + } +} + +// Implement parsing for each export type +impl ParserCombinator for ExportNamedDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' keyword + parser.assert_consume(&Token::Export, "Expected 'export'")?; + + // Parse declaration if present + let declaration = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let var_decl = VariableDeclarationNode::new().parse(parser)?; + Some(Box::new(Declaration::VariableDeclaration(var_decl))) + } else if parser.check(&Token::Function) { + // Function declaration + let func_decl = FunctionDeclarationParser::new().parse(parser)?; + Some(Box::new(Declaration::FunctionDeclaration(func_decl))) + } else if parser.check(&Token::Class) { + // Class declaration + let class_decl = ClassDeclarationNode::new().parse(parser)?; + Some(Box::new(Declaration::ClassDeclaration(class_decl))) + } else { + None + }; + + // If there's no declaration, there must be export specifiers + let mut specifiers = Vec::new(); + + if declaration.is_none() { + // Parse export specifiers + parser.assert_consume(&Token::LeftBrace, "Expected '{' in named export declaration")?; + + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_export_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_export_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after export specifiers")?; + } + + // Parse the 'from' clause if present + let source = if parser.consume(&Token::From) { + match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => Some(str_lit), + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + } else { + None + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after export declaration")?; + + Ok(ExportNamedDeclaration { + declaration, + specifiers, + source, + }) + } +} + +impl ParserCombinator for ExportDefaultDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' and 'default' keywords + parser.assert_consume(&Token::Export, "Expected 'export'")?; + parser.assert_consume(&Token::Default, "Expected 'default'")?; + + // Parse the declaration or expression + let declaration = if parser.check(&Token::Function) || parser.check(&Token::Class) { + // Function or class declaration + if parser.check(&Token::Function) { + let func = FunctionDeclarationParser::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::FunctionDeclaration(func))) + } else { + let class = ClassDeclarationNode::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::ClassDeclaration(class))) + } + } else { + // Expression + let expr = ExpressionNode::new().parse(parser)?; + parser.consume_semicolon("Expected ';' after export default expression")?; + ExportDefaultDeclarationKind::Expression(Box::new(expr)) + }; + + Ok(ExportDefaultDeclaration { declaration }) + } +} + +impl ParserCombinator for ExportAllDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' and '*' tokens + parser.assert_consume(&Token::Export, "Expected 'export'")?; + parser.assert_consume(&Token::Star, "Expected '*'")?; + + // Parse 'as' clause if present + let exported = if parser.consume(&Token::As) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Parse 'from' clause + parser.assert_consume(&Token::From, "Expected 'from' after export *")?; + + // Parse the module source + let source = match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => str_lit, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after export * from declaration")?; + + Ok(ExportAllDeclaration { + source, + exported, + }) + } +} + +// Update the ExportDeclarationParser to use the new specific parsers +impl ParserCombinator for ExportDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Save position to check what type of export this is + let pos = parser.save_position(); + + // Consume the 'export' keyword + parser.assert_consume(&Token::Export, "Expected 'export'")?; + + // Check for export type + if parser.check(&Token::Default) { + // Restore position and parse as default export + parser.restore_position(pos); + ExportDefaultDeclarationParser::new().parse(parser).map(ExportDeclaration::Default) + } else if parser.check(&Token::Star) { + // Restore position and parse as export all + parser.restore_position(pos); + ExportAllDeclarationParser::new().parse(parser).map(ExportDeclaration::All) + } else { + // Restore position and parse as named export + parser.restore_position(pos); + ExportNamedDeclarationParser::new().parse(parser).map(ExportDeclaration::Named) + } + } +} + +// Helper method for ExportNamedDeclarationParser +impl ExportNamedDeclarationParser { + fn parse_export_specifier(&self, parser: &mut Parser) -> ParseResult { + // Parse the local name + let local = IdentifierNode::new().parse(parser)?; + + // Parse the exported name if present + let exported = if parser.consume(&Token::As) { + IdentifierNode::new().parse(parser)? + } else { + // If no 'as', the exported name is the same as the local name + Identifier { name: local.name.clone() } + }; + + Ok(ExportSpecifier { + local, + exported, + }) + } +} + +/// Parser for import declarations +pub struct ImportDeclarationParser; + +impl ImportDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ImportDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'import' keyword + parser.assert_consume(&Token::Import, "Expected 'import'")?; + + let mut specifiers = Vec::new(); + + // Check for import type + if matches!(parser.peek(), Token::StringLiteral(_)) { + // Import without specifiers (side-effect import) + // No specifiers to add + } else if parser.consume(&Token::Star) { + // Namespace import + parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; + let local = IdentifierNode::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { + local, + })); + + parser.assert_consume(&Token::From, "Expected 'from' after namespace import")?; + } else if parser.check(&Token::LeftBrace) { + // Named imports + parser.assert_consume(&Token::LeftBrace, "Expected '{' in named import")?; + + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_import_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_import_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after import specifiers")?; + + parser.assert_consume(&Token::From, "Expected 'from' after named imports")?; + } else if matches!(parser.peek(), Token::Identifier(_)) { + // Default import + let local = IdentifierNode::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportDefaultSpecifier(ImportDefaultSpecifier { + local, + })); + + // Check for additional named imports + if parser.consume(&Token::Comma) { + if parser.consume(&Token::Star) { + // Namespace import after default import + parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; + let local = IdentifierNode::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { + local, + })); + } else if parser.consume(&Token::LeftBrace) { + // Named imports after default import + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_import_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_import_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after import specifiers")?; + } + } + + parser.assert_consume(&Token::From, "Expected 'from' after import specifiers")?; + } + + // Parse the source + let source = match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => str_lit, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after import declaration")?; + + Ok(ImportDeclaration { + specifiers, + source, + }) + } +} + +impl ImportDeclarationParser { + fn parse_import_specifier(&self, parser: &mut Parser) -> ParseResult { + // Parse the imported name + let imported = IdentifierNode::new().parse(parser)?; + + // Parse the local name if present + let local = if parser.consume(&Token::As) { + IdentifierNode::new().parse(parser)? + } else { + // If no 'as', the local name is the same as the imported name + Identifier { name: imported.name.clone() } + }; + + Ok(ImportSpecifier::ImportSpecifier(NamedImportSpecifier { + imported, + local, + })) + } +} + +// Helper function to collect identifiers from a binding pattern +fn collect_binding_identifiers(pattern: &Pattern, identifiers: &mut Vec>) { + match pattern { + Pattern::Identifier(ident) => { + identifiers.push(ident.name.clone()); + }, + Pattern::ObjectPattern(obj_pattern) => { + for prop in &obj_pattern.properties { + match prop { + ObjectPatternProperty::Property(prop) => { + collect_binding_identifiers(&prop.value, identifiers); + }, + ObjectPatternProperty::RestElement(rest) => { + collect_binding_identifiers(&rest.argument, identifiers); + }, + } + } + }, + Pattern::ArrayPattern(arr_pattern) => { + for elem in arr_pattern.elements.iter().flatten() { + collect_binding_identifiers(elem, identifiers); + } + }, + Pattern::RestElement(rest) => { + collect_binding_identifiers(&rest.argument, identifiers); + }, + Pattern::AssignmentPattern(assign) => { + collect_binding_identifiers(&assign.left, identifiers); + }, + _ => {} + } +} + +// Helper function to check for TDZ violations in initializers +fn check_tdz_violation(expr: &Expression, declared_identifiers: &[Box], parser: &mut Parser) -> ParseResult<()> { + match expr { + Expression::Identifier(ident) => { + if declared_identifiers.contains(&ident.name) { + return Err(parser.error_at_current(&format!( + "Cannot access '{}' before initialization (temporal dead zone violation)", + ident.name + ))); + } + }, + Expression::MemberExpression(member) => { + check_tdz_violation(&member.object, declared_identifiers, parser)?; + if let MemberProperty::Expression(ref expr) = member.property { + check_tdz_violation(expr, declared_identifiers, parser)?; + } + }, + Expression::CallExpression(call) => { + check_tdz_violation(&call.callee, declared_identifiers, parser)?; + for arg in &call.arguments { + check_tdz_violation(arg, declared_identifiers, parser)?; + } + }, + // Add checks for other expression types as needed + _ => {} + } + Ok(()) +} + +// Variable declarator unparser +impl UnparserCombinator for VariableDeclaratorNode { + fn unparse(&self, unparser: &mut Unparser, node: &VariableDeclarator) { + // Write the identifier or pattern + PatternNode::new().unparse(unparser, &node.id); + + // Write the initializer if present + if let Some(init) = &node.init { + unparser.space(); + unparser.write_char('='); + unparser.space(); + ExpressionNode::new().unparse(unparser, init); + } + } +} + +// Function declaration unparser +impl UnparserCombinator for FunctionDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &FunctionDeclaration) { + // Write async if it's an async function + if node.async_function { + unparser.write_str("async"); + unparser.write_char(' '); + } + + // Write the function keyword + unparser.write_str("function"); + + // Write * if it's a generator function + if node.generator { + unparser.write_char('*'); + } + + // Write the function name if present + if let Some(id) = &node.id { + unparser.write_char(' '); + unparser.write_str(&id.name); + } + + // Write the parameter list + unparser.write_char('('); + + // Write parameters + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); + + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + unparser.space(); + + // Write the function body + BlockStatementNode::new().unparse(unparser, &node.body); + } +} + +// Export named declaration unparser +impl UnparserCombinator for ExportNamedDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportNamedDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + + // Handle declaration if present + if let Some(decl) = &node.declaration { + match &**decl { + Declaration::VariableDeclaration(var_decl) => { + VariableDeclarationNode::new().unparse(unparser, var_decl); + }, + Declaration::FunctionDeclaration(func_decl) => { + FunctionDeclarationParser::new().unparse(unparser, func_decl); + }, + Declaration::ClassDeclaration(class_decl) => { + // Assuming ClassDeclarationNode is implemented elsewhere + ClassDeclarationNode::new().unparse(unparser, class_decl); + }, + _ => { + // This shouldn't happen for export named declarations + unparser.write_str("/* unsupported declaration */"); + } + } + } else { + // Export specifiers + unparser.write_char('{'); + + if !node.specifiers.is_empty() { + unparser.space(); + + // First specifier + self.unparse_export_specifier(unparser, &node.specifiers[0]); + + // Remaining specifiers + for spec in &node.specifiers[1..] { + unparser.write_char(','); + unparser.space(); + self.unparse_export_specifier(unparser, spec); + } + + unparser.space(); + } + + unparser.write_char('}'); + + // Handle 'from' clause if present + if let Some(source) = &node.source { + unparser.write_char(' '); + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", source.value)); + } + + unparser.write_char(';'); + } + } +} + +// Export default declaration unparser +impl UnparserCombinator for ExportDefaultDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportDefaultDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + unparser.write_str("default"); + unparser.write_char(' '); + + match &node.declaration { + ExportDefaultDeclarationKind::Declaration(decl) => { + match &**decl { + Declaration::FunctionDeclaration(func_decl) => { + FunctionDeclarationParser::new().unparse(unparser, func_decl); + }, + Declaration::ClassDeclaration(class_decl) => { + ClassDeclarationNode::new().unparse(unparser, class_decl); + }, + _ => { + // This shouldn't happen for export default declarations + unparser.write_str("/* unsupported declaration */"); + } + } + }, + ExportDefaultDeclarationKind::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(';'); + } + } + } +} + +// Export all declaration unparser +impl UnparserCombinator for ExportAllDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportAllDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + unparser.write_char('*'); + + // Handle 'as' clause if present + if let Some(exported) = &node.exported { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&exported.name); + } + + unparser.write_char(' '); + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", node.source.value)); + unparser.write_char(';'); + } +} + +// Import declaration unparser +impl UnparserCombinator for ImportDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ImportDeclaration) { + unparser.write_str("import"); + unparser.write_char(' '); + + // Handle specifiers + if node.specifiers.is_empty() { + // Side-effect import (no specifiers) + } else { + let mut has_default = false; + let mut has_namespace = false; + let mut named_specifiers = Vec::new(); + + // Categorize specifiers + for spec in &node.specifiers { + match spec { + ImportSpecifier::ImportDefaultSpecifier(default_spec) => { + has_default = true; + unparser.write_str(&default_spec.local.name); + }, + ImportSpecifier::ImportNamespaceSpecifier(namespace_spec) => { + has_namespace = true; + if has_default { + unparser.write_char(','); + unparser.space(); + } + unparser.write_str("* as "); + unparser.write_str(&namespace_spec.local.name); + }, + ImportSpecifier::ImportSpecifier(named_spec) => { + named_specifiers.push(named_spec); + } + } + } + + // Handle named specifiers + if !named_specifiers.is_empty() { + if has_default || has_namespace { + unparser.write_char(','); + unparser.space(); + } + + unparser.write_char('{'); + unparser.space(); + + // First named specifier + self.unparse_import_specifier(unparser, &named_specifiers[0]); + + // Remaining named specifiers + for spec in &named_specifiers[1..] { + unparser.write_char(','); + unparser.space(); + self.unparse_import_specifier(unparser, spec); + } + + unparser.space(); + unparser.write_char('}'); + } + + //unparser.space(); + unparser.write_char(' '); + } + + // Write the source + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", node.source.value)); + unparser.write_char(';'); + } +} + +// Helper methods for ExportNamedDeclarationParser +impl ExportNamedDeclarationParser { + fn unparse_export_specifier(&self, unparser: &mut Unparser, spec: &ExportSpecifier) { + unparser.write_str(&spec.local.name); + + // If the exported name is different from the local name + if spec.local.name != spec.exported.name { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&spec.exported.name); + } + } +} + +// Helper methods for ImportDeclarationParser +impl ImportDeclarationParser { + fn unparse_import_specifier(&self, unparser: &mut Unparser, spec: &NamedImportSpecifier) { + unparser.write_str(&spec.imported.name); + + // If the local name is different from the imported name + if spec.imported.name != spec.local.name { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&spec.local.name); + } + } +} diff --git a/src/grammar/expression.rs b/src/grammar/expression.rs new file mode 100644 index 0000000..50947e4 --- /dev/null +++ b/src/grammar/expression.rs @@ -0,0 +1,1394 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::function::*; +use super::class::*; +use super::array::*; +use super::pattern::*; +use super::literal::*; +use super::object::*; +use super::this::*; +use super::new::*; +use super::await_expression::*; +use super::yield_expression::*; + + +/// Parser for JavaScript expressions +pub struct ExpressionNode; + +impl ExpressionNode { + pub fn new() -> Self { + Self + } + + /// Parse an expression with the given precedence level + pub fn parse_with_precedence(&self, parser: &mut Parser, precedence: Precedence) -> ParseResult { + // Parse the prefix expression + let mut left = self.parse_prefix(parser)?; + + // Continue parsing infix expressions as long as they have higher precedence + while !parser.is_at_end() && precedence < self.get_precedence(parser) { + left = self.parse_infix(parser, left)?; + } + + Ok(left) + } + + /// Parse a prefix expression + fn parse_prefix(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::Identifier(_) => { + // Check if this is a single-parameter arrow function + let pos = parser.save_position(); + let ident = IdentifierNode::new().parse(parser)?; + + if parser.check(&Token::Arrow) { + // This is an arrow function with a single parameter + parser.restore_position(pos); + return ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression); + } + + Ok(Expression::Identifier(ident)) + }, + Token::StringLiteral(_) | + Token::NumberLiteral(_) | + Token::BigIntLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::True | + Token::False | + Token::Undefined | + Token::Null => { + LiteralNode::new().parse(parser).map(Expression::Literal) + }, + Token::This => { + ThisExpressionNode::new().parse(parser).map(Expression::ThisExpression) + }, + Token::LeftBracket => { + ArrayExpressionNode::new().parse(parser).map(Expression::ArrayExpression) + }, + Token::LeftBrace => { + ObjectExpressionNode::new().parse(parser).map(Expression::ObjectExpression) + }, + Token::Function => { + FunctionExpressionNode::new().parse(parser).map(Expression::FunctionExpression) + }, + Token::Class => { + ClassExpressionNode::new().parse(parser).map(Expression::ClassExpression) + }, + Token::New => { + NewExpressionNode::new().parse(parser).map(Expression::NewExpression) + }, + Token::Super => { + SuperExpressionNode::new().parse(parser).map(Expression::SuperExpression) + }, + Token::LeftParen => { + self.parse_grouped_expression(parser) + }, + Token::PlusPlus | + Token::MinusMinus => { + self.parse_prefix_update_expression(parser) + }, + Token::Plus | + Token::Minus | + Token::Bang | + Token::Tilde | + Token::Typeof | + Token::Void | + Token::Delete => { + self.parse_unary_expression(parser) + }, + Token::Await => { + if parser.allows_await() { + AwaitExpressionNode::new().parse(parser).map(Expression::AwaitExpression) + } else { + Err(parser.error_at_current("'await' expression is only allowed within async functions")) + } + }, + Token::Yield => { + if parser.allows_yield() { + YieldExpressionNode::new().parse(parser).map(Expression::YieldExpression) + } else { + Err(parser.error_at_current("'yield' expression is only allowed within generator functions")) + } + }, + Token::Async => { + // Save position to potentially backtrack + let pos = parser.save_position(); + + // Check if this is an async function + if parser.peek_next(1) == &Token::Function { + // This is an async function expression + parser.advance(); // Consume 'async' + + // Parse the function expression + let mut func_expr = FunctionExpressionNode::new().parse(parser)?; + func_expr.async_function = true; // Mark as async + + Ok(Expression::FunctionExpression(func_expr)) + } + // Check if this is an async arrow function + else if parser.peek_next(1) == &Token::LeftParen || + (matches!(parser.peek_next(1), Token::Identifier(_)) && + parser.peek_next(2) == &Token::Arrow) { + // This is an async arrow function + ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression) + } + // Otherwise, it's just an identifier named "async" + else { + IdentifierNode::new().parse(parser).map(Expression::Identifier) + } + }, + _ => Err(parser.error_at_current("Expected an expression")), + } + } + + /// Parse an infix expression + fn parse_infix(&self, parser: &mut Parser, left: Expression) -> ParseResult { + match parser.peek() { + Token::LeftParen => { + self.parse_with_callee(parser, left, false).map(Expression::CallExpression) + }, + Token::LeftBracket | + Token::Dot => { + self.parse_with_object(parser, left, false).map(Expression::MemberExpression) + }, + Token::QuestionDot => { + if matches!(parser.peek_next(1), &Token::LeftParen) { + self.parse_with_callee(parser, left, true).map(Expression::CallExpression) + } else { + self.parse_with_object(parser, left, true).map(Expression::MemberExpression) + } + }, + Token::PlusPlus | + Token::MinusMinus => { + self.parse_postfix_update_expression(parser, left) + }, + Token::Plus | + Token::Minus | + Token::Star | + Token::Slash | + Token::Percent | + Token::StarStar | + Token::LessLess | + Token::GreaterGreater | + Token::GreaterGreaterGreater | + Token::Ampersand | + Token::Pipe | + Token::Caret | + Token::EqualEqual | + Token::BangEqual | + Token::EqualEqualEqual | + Token::BangEqualEqual | + Token::Less | + Token::LessEqual | + Token::Greater | + Token::GreaterEqual | + Token::In | + Token::InstanceOf => { + self.parse_binary_expression(parser, left) + }, + Token::AmpersandAmpersand | + Token::PipePipe | + Token::QuestionQuestion => { + self.parse_logical_expression(parser, left) + }, + Token::Question => { + self.parse_conditional_expression(parser, left) + }, + Token::Equal | + Token::PlusEqual | + Token::MinusEqual | + Token::StarEqual | + Token::SlashEqual | + Token::PercentEqual | + Token::StarStarEqual | + Token::LessLessEqual | + Token::GreaterGreaterEqual | + Token::GreaterGreaterGreaterEqual | + Token::AmpersandEqual | + Token::PipeEqual | + Token::CaretEqual | + Token::AmpersandAmpersandEqual | + Token::PipePipeEqual | + Token::QuestionQuestionEqual => { + self.parse_assignment_expression(parser, left) + }, + Token::Comma => { + self.parse_sequence_expression(parser, left) + }, + Token::Arrow => { + // This should be handled by the arrow function parser + Err(parser.error_at_current("Unexpected arrow function")) + }, + _ => Ok(left), + } + } + + fn parse_grouped_expression(&self, parser: &mut Parser) -> ParseResult { + // Save position in case we need to backtrack for arrow functions + let pos = parser.save_position(); + + // Check if this might be an arrow function with parameters + if self.is_arrow_function_ahead(parser) { + parser.restore_position(pos); + return ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression); + } + + // Consume the opening parenthesis + parser.assert_consume(&Token::LeftParen, "Expected '(' at the start of grouped expression")?; + + // Check for empty parentheses + if parser.consume(&Token::RightParen) { + return Err(parser.error_at_current("Empty parentheses are not a valid expression")); + } + + // Parse the expression inside the parentheses + let expr = self.parse_with_precedence(parser, Precedence::Lowest)?; + + // Check for trailing comma (which is not allowed in grouped expressions) + if parser.consume(&Token::Comma) { + return Err(parser.error_at_current("Unexpected trailing comma in grouped expression")); + } + + // Consume the closing parenthesis + parser.assert_consume(&Token::RightParen, "Expected ')' after expression")?; + + Ok(expr) + } + + fn parse_with_callee(&self, parser: &mut Parser, callee: Expression, optional: bool) -> ParseResult { + // Consume the question-dot token if this is optional chaining + if optional { + parser.assert_consume(&Token::QuestionDot, "Expected '?.' in optional chaining")?; + } + + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut arguments = Vec::new(); + + if !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + + while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function arguments")?; + + Ok(CallExpression { + callee: Box::new(callee), + arguments, + optional, + }) + } + + fn parse_with_object(&self, parser: &mut Parser, object: Expression, optional: bool) -> ParseResult { + // Consume the dot or question-dot token + if optional { + parser.assert_consume(&Token::QuestionDot, "Expected '?.' in optional chaining")?; + } else if parser.check(&Token::Dot) { + parser.advance(); // Consume the '.' + } + + // Parse the property access + let (property, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property access: obj[expr] or obj?.[expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; + (MemberProperty::Expression(Box::new(expr)), true) + } else { + // Static property access: obj.prop or obj?.prop + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '.' or '?.'")); + } + }; + + // Create the member expression + let member_expr = MemberExpression { + object: Box::new(object), + property, + computed, + optional, + }; + + if parser.check(&Token::LeftBracket) || parser.check(&Token::Dot) { + // Continue parsing the chain of regular property accesses + return self.parse_with_object(parser, Expression::MemberExpression(member_expr), false); + } else if parser.check(&Token::QuestionDot) { + // Save position to check what follows + let pos = parser.save_position(); + parser.advance(); // Consume '?.' + + if parser.check(&Token::LeftParen) { + // This would be a function call, which we can't handle here + // Restore position and return the member expression we've parsed so far + parser.restore_position(pos); + return Ok(member_expr); + } else { + // Continue with optional property access + parser.restore_position(pos); + return self.parse_with_object(parser, Expression::MemberExpression(member_expr), true); + } + } + + return Ok(member_expr); + } + + // Helper method to check if an arrow function is ahead + fn is_arrow_function_ahead(&self, parser: &mut Parser) -> bool { + // Save position + let pos = parser.save_position(); + + // Skip the async keyword if present + let is_async = parser.check(&Token::Async); + if is_async { + parser.advance(); + + // For async arrow functions, we need at least one token after 'async' + if parser.is_at_end() { + parser.restore_position(pos); + return false; + } + + // If 'async' is followed by a line terminator, it's not an arrow function + if parser.previous_line_terminator() { + parser.restore_position(pos); + return false; + } + } + + // Check for single parameter without parentheses + if matches!(parser.peek(), Token::Identifier(_)) { + let id_pos = parser.save_position(); + parser.advance(); // Skip identifier + + if parser.check(&Token::Arrow) { + parser.restore_position(pos); + return true; + } + + parser.restore_position(id_pos); + } + + // Check for parameters in parentheses + if !parser.consume(&Token::LeftParen) { + parser.restore_position(pos); + return false; + } + + // Empty parameter list + if parser.consume(&Token::RightParen) { + let is_arrow = parser.check(&Token::Arrow); + parser.restore_position(pos); + return is_arrow; + } + + // Skip parameters and watch for trailing comma + let mut depth = 1; + let mut had_comma = false; + + while depth > 0 && !parser.is_at_end() { + match parser.peek() { + Token::LeftParen => { + depth += 1; + parser.advance(); + }, + Token::RightParen => { + depth -= 1; + if depth == 0 { + // Check if we just saw a comma before this right paren + if had_comma { + // This is a trailing comma in parameter list + parser.advance(); // Consume the right paren + let is_arrow = parser.check(&Token::Arrow); + parser.restore_position(pos); + return is_arrow; + } + } + parser.advance(); + }, + Token::Comma => { + had_comma = true; + parser.advance(); + }, + _ => { + had_comma = false; + parser.advance(); + } + } + } + + // Check if the next token is an arrow + let is_arrow = parser.check(&Token::Arrow); + + // Restore position + parser.restore_position(pos); + + is_arrow + } + + /// Get the precedence of the current token + fn get_precedence(&self, parser: &mut Parser) -> Precedence { + match parser.peek() { + Token::Comma => Precedence::Comma, + + Token::Equal | + Token::PlusEqual | + Token::MinusEqual | + Token::StarEqual | + Token::SlashEqual | + Token::PercentEqual | + Token::StarStarEqual | + Token::LessLessEqual | + Token::GreaterGreaterEqual | + Token::GreaterGreaterGreaterEqual | + Token::AmpersandEqual | + Token::PipeEqual | + Token::CaretEqual | + Token::AmpersandAmpersandEqual | + Token::PipePipeEqual | + Token::QuestionQuestionEqual => Precedence::Assignment, + + Token::Question => Precedence::Conditional, + + Token::QuestionQuestion => Precedence::NullishCoalescing, + Token::PipePipe => Precedence::LogicalOr, + Token::AmpersandAmpersand => Precedence::LogicalAnd, + + Token::Pipe => Precedence::BitwiseOr, + Token::Caret => Precedence::BitwiseXor, + Token::Ampersand => Precedence::BitwiseAnd, + + Token::EqualEqual | + Token::BangEqual | + Token::EqualEqualEqual | + Token::BangEqualEqual => Precedence::Equality, + + Token::Less | + Token::LessEqual | + Token::Greater | + Token::GreaterEqual | + Token::In | + Token::InstanceOf => Precedence::Relational, + + Token::LessLess | + Token::GreaterGreater | + Token::GreaterGreaterGreater => Precedence::Shift, + + Token::Plus | + Token::Minus => Precedence::Additive, + + Token::Star | + Token::Slash | + Token::Percent => Precedence::Multiplicative, + + Token::StarStar => Precedence::Exponentiation, + + Token::PlusPlus | + Token::MinusMinus => Precedence::Postfix, + + Token::Dot | + Token::QuestionDot | + Token::LeftBracket | + Token::LeftParen => Precedence::Call, + + _ => Precedence::Lowest, + } + } + + /// Parse a prefix update expression (++x, --x) + fn parse_prefix_update_expression(&self, parser: &mut Parser) -> ParseResult { + // Parse the operator + let operator = match parser.peek() { + Token::PlusPlus => { + parser.advance(); + UpdateOperator::Increment + }, + Token::MinusMinus => { + parser.advance(); + UpdateOperator::Decrement + }, + _ => return Err(parser.error_at_current("Expected '++' or '--'")), + }; + + // Parse the argument + let argument = self.parse_with_precedence(parser, Precedence::Prefix)?; + + // Check that the argument is a valid left-hand side expression + if !self.is_valid_lhs_expression(&argument) { + return Err(parser.error_at_current("Invalid left-hand side in prefix operation")); + } + + Ok(Expression::UpdateExpression(UpdateExpression { + operator, + argument: Box::new(argument), + prefix: true, + })) + } + + /// Parse a postfix update expression (x++, x--) + fn parse_postfix_update_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + // Check that the left expression is a valid left-hand side expression + if !self.is_valid_lhs_expression(&left) { + return Err(parser.error_at_current("Invalid left-hand side in postfix operation")); + } + + // Parse the operator + let operator = match parser.peek() { + Token::PlusPlus => { + parser.advance(); + UpdateOperator::Increment + }, + Token::MinusMinus => { + parser.advance(); + UpdateOperator::Decrement + }, + _ => return Err(parser.error_at_current("Expected '++' or '--'")), + }; + + Ok(Expression::UpdateExpression(UpdateExpression { + operator, + argument: Box::new(left), + prefix: false, + })) + } + + /// Parse a unary expression + fn parse_unary_expression(&self, parser: &mut Parser) -> ParseResult { + // Parse the operator + let operator = match parser.peek() { + Token::Plus => { + parser.advance(); + UnaryOperator::Plus + }, + Token::Minus => { + parser.advance(); + UnaryOperator::Minus + }, + Token::Bang => { + parser.advance(); + UnaryOperator::Not + }, + Token::Tilde => { + parser.advance(); + UnaryOperator::BitwiseNot + }, + Token::Typeof => { + parser.advance(); + UnaryOperator::Typeof + }, + Token::Void => { + parser.advance(); + UnaryOperator::Void + }, + Token::Delete => { + parser.advance(); + UnaryOperator::Delete + }, + _ => return Err(parser.error_at_current("Expected a unary operator")), + }; + + // Parse the argument + let argument = self.parse_with_precedence(parser, Precedence::Prefix)?; + + Ok(Expression::UnaryExpression(UnaryExpression { + operator, + argument: Box::new(argument), + prefix: true, + })) + } + + /// Parse a binary expression + fn parse_binary_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + let (operator, precedence) = self.get_binary_operator(parser)?; + + parser.advance(); + + let right = self.parse_with_precedence(parser, precedence.next())?; + + Ok(Expression::BinaryExpression(BinaryExpression { + operator, + left: Box::new(left), + right: Box::new(right), + })) + } + + /// Parse a logical expression + fn parse_logical_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + let (operator, precedence) = self.get_logical_operator(parser)?; + + parser.advance(); + + let right = self.parse_with_precedence(parser, precedence)?; + + Ok(Expression::LogicalExpression(LogicalExpression { + operator, + left: Box::new(left), + right: Box::new(right), + })) + } + + /// Parse a conditional expression (ternary) + fn parse_conditional_expression(&self, parser: &mut Parser, test: Expression) -> ParseResult { + // Consume the question mark + parser.assert_consume(&Token::Question, "Expected '?' in conditional expression")?; + + // Parse the consequent + let consequent = self.parse_with_precedence(parser, Precedence::Lowest)?; + + // Consume the colon + parser.assert_consume(&Token::Colon, "Expected ':' in conditional expression")?; + + // Parse the alternate + let alternate = self.parse_with_precedence(parser, Precedence::Assignment)?; + + Ok(Expression::ConditionalExpression(ConditionalExpression { + test: Box::new(test), + consequent: Box::new(consequent), + alternate: Box::new(alternate), + })) + } + + /// Parse an assignment expression + fn parse_assignment_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + // Get the operator + let operator = self.get_assignment_operator(parser)?; + + // Consume the operator token + parser.advance(); + + // Parse the right side + let right = self.parse_with_precedence(parser, Precedence::Assignment)?; + + // Convert the left expression to a valid assignment target + let left = match self.to_assignment_target(left) { + Ok(target) => target, + Err(_) => return Err(parser.error_at_current("Invalid left-hand side in assignment")), + }; + + Ok(Expression::AssignmentExpression(AssignmentExpression { + operator, + left, + right: Box::new(right), + })) + } + + /// Parse a sequence expression (comma-separated expressions) + fn parse_sequence_expression(&self, parser: &mut Parser, first: Expression) -> ParseResult { + let mut expressions = vec![first]; + + // Consume the comma + parser.assert_consume(&Token::Comma, "Expected ',' in sequence expression")?; + + // Parse the next expression + let next = self.parse_with_precedence(parser, Precedence::Lowest)?; + expressions.push(next); + + // Parse any additional expressions + while parser.consume(&Token::Comma) { + let expr = self.parse_with_precedence(parser, Precedence::Lowest)?; + expressions.push(expr); + } + + Ok(Expression::SequenceExpression(SequenceExpression { expressions })) + } + + /// Helper method to get a binary operator and its precedence + fn get_binary_operator(&self, parser: &mut Parser) -> ParseResult<(BinaryOperator, Precedence)> { + match parser.peek() { + Token::Plus => Ok((BinaryOperator::Addition, Precedence::Additive)), + Token::Minus => Ok((BinaryOperator::Subtraction, Precedence::Additive)), + Token::Star => Ok((BinaryOperator::Multiplication, Precedence::Multiplicative)), + Token::Slash => Ok((BinaryOperator::Division, Precedence::Multiplicative)), + Token::Percent => Ok((BinaryOperator::Remainder, Precedence::Multiplicative)), + Token::StarStar => Ok((BinaryOperator::Exponentiation, Precedence::Exponentiation)), + Token::LessLess => Ok((BinaryOperator::LeftShift, Precedence::Shift)), + Token::GreaterGreater => Ok((BinaryOperator::RightShift, Precedence::Shift)), + Token::GreaterGreaterGreater => Ok((BinaryOperator::UnsignedRightShift, Precedence::Shift)), + Token::Ampersand => Ok((BinaryOperator::BitwiseAnd, Precedence::BitwiseAnd)), + Token::Pipe => Ok((BinaryOperator::BitwiseOr, Precedence::BitwiseOr)), + Token::Caret => Ok((BinaryOperator::BitwiseXor, Precedence::BitwiseXor)), + Token::EqualEqual => Ok((BinaryOperator::Equal, Precedence::Equality)), + Token::BangEqual => Ok((BinaryOperator::NotEqual, Precedence::Equality)), + Token::EqualEqualEqual => Ok((BinaryOperator::StrictEqual, Precedence::Equality)), + Token::BangEqualEqual => Ok((BinaryOperator::StrictNotEqual, Precedence::Equality)), + Token::Less => Ok((BinaryOperator::LessThan, Precedence::Relational)), + Token::LessEqual => Ok((BinaryOperator::LessThanOrEqual, Precedence::Relational)), + Token::Greater => Ok((BinaryOperator::GreaterThan, Precedence::Relational)), + Token::GreaterEqual => Ok((BinaryOperator::GreaterThanOrEqual, Precedence::Relational)), + Token::In => Ok((BinaryOperator::In, Precedence::Relational)), + Token::InstanceOf => Ok((BinaryOperator::InstanceOf, Precedence::Relational)), + _ => Err(parser.error_at_current("Expected a binary operator")), + } + } + + /// Helper method to get a logical operator and its precedence + fn get_logical_operator(&self, parser: &mut Parser) -> ParseResult<(LogicalOperator, Precedence)> { + match parser.peek() { + Token::AmpersandAmpersand => Ok((LogicalOperator::And, Precedence::LogicalAnd)), + Token::PipePipe => Ok((LogicalOperator::Or, Precedence::LogicalOr)), + Token::QuestionQuestion => Ok((LogicalOperator::NullishCoalescing, Precedence::NullishCoalescing)), + _ => Err(parser.error_at_current("Expected a logical operator")), + } + } + + /// Helper method to get an assignment operator + fn get_assignment_operator(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::Equal => Ok(AssignmentOperator::Assign), + Token::PlusEqual => Ok(AssignmentOperator::PlusAssign), + Token::MinusEqual => Ok(AssignmentOperator::MinusAssign), + Token::StarEqual => Ok(AssignmentOperator::MultiplyAssign), + Token::SlashEqual => Ok(AssignmentOperator::DivideAssign), + Token::PercentEqual => Ok(AssignmentOperator::RemainderAssign), + Token::StarStarEqual => Ok(AssignmentOperator::ExponentiationAssign), + Token::LessLessEqual => Ok(AssignmentOperator::LeftShiftAssign), + Token::GreaterGreaterEqual => Ok(AssignmentOperator::RightShiftAssign), + Token::GreaterGreaterGreaterEqual => Ok(AssignmentOperator::UnsignedRightShiftAssign), + Token::AmpersandEqual => Ok(AssignmentOperator::BitwiseAndAssign), + Token::PipeEqual => Ok(AssignmentOperator::BitwiseOrAssign), + Token::CaretEqual => Ok(AssignmentOperator::BitwiseXorAssign), + Token::AmpersandAmpersandEqual => Ok(AssignmentOperator::LogicalAndAssign), + Token::PipePipeEqual => Ok(AssignmentOperator::LogicalOrAssign), + Token::QuestionQuestionEqual => Ok(AssignmentOperator::NullishCoalescingAssign), + _ => Err(parser.error_at_current("Expected an assignment operator")), + } + } + + /// Helper method to convert an expression to an assignment target + fn to_assignment_target(&self, expr: Expression) -> Result { + match expr { + Expression::Identifier(_) => { + // Convert to pattern + Ok(AssignmentLeft::Pattern(Pattern::Identifier(match expr { + Expression::Identifier(ident) => ident, + _ => unreachable!(), + }))) + }, + Expression::MemberExpression(member) => { + // Member expressions are valid assignment targets + Ok(AssignmentLeft::Expression(Box::new(Expression::MemberExpression(member)))) + }, + _ => Err(()), + } + } + + /// Helper method to check if an expression is a valid left-hand side expression + fn is_valid_lhs_expression(&self, expr: &Expression) -> bool { + match expr { + Expression::Identifier(_) | + Expression::MemberExpression(_) => true, + _ => false, + } + } +} + +impl ParserCombinator for ExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + self.parse_with_precedence(parser, Precedence::Lowest) + } +} + +/// Precedence levels for expression parsing +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Precedence { + Lowest, + Comma, // , + Assignment, // = += -= etc. + Conditional, // ?: + NullishCoalescing, // ?? + LogicalOr, // || + LogicalAnd, // && + BitwiseOr, // | + BitwiseXor, // ^ + BitwiseAnd, // & + Equality, // == != === !== + Relational, // < > <= >= in instanceof + Shift, // << >> >>> + Additive, // + - + Multiplicative, // * / % + Exponentiation, // ** + Prefix, // ! ~ + - typeof void delete ++x --x + Postfix, // x++ x-- + Call, // . [] () +} + +impl Precedence { + /// Get the next higher precedence level + pub fn next(&self) -> Self { + match self { + Precedence::Lowest => Precedence::Comma, + Precedence::Comma => Precedence::Assignment, + Precedence::Assignment => Precedence::Conditional, + Precedence::Conditional => Precedence::NullishCoalescing, + Precedence::NullishCoalescing => Precedence::LogicalOr, + Precedence::LogicalOr => Precedence::LogicalAnd, + Precedence::LogicalAnd => Precedence::BitwiseOr, + Precedence::BitwiseOr => Precedence::BitwiseXor, + Precedence::BitwiseXor => Precedence::BitwiseAnd, + Precedence::BitwiseAnd => Precedence::Equality, + Precedence::Equality => Precedence::Relational, + Precedence::Relational => Precedence::Shift, + Precedence::Shift => Precedence::Additive, + Precedence::Additive => Precedence::Multiplicative, + Precedence::Multiplicative => Precedence::Exponentiation, + Precedence::Exponentiation => Precedence::Prefix, + Precedence::Prefix => Precedence::Postfix, + Precedence::Postfix => Precedence::Call, + Precedence::Call => Precedence::Call, // Can't go higher + } + } +} + +// Main expression unparser +impl UnparserCombinator for ExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &Expression) { + match node { + Expression::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + Expression::Literal(lit) => { + self.unparse_literal(unparser, lit); + }, + Expression::ArrayExpression(array) => { + ArrayExpressionNode::new().unparse(unparser, array); + }, + Expression::ObjectExpression(obj) => { + ObjectExpressionNode::new().unparse(unparser, obj); + }, + Expression::FunctionExpression(func) => { + FunctionExpressionNode::new().unparse(unparser, func); + }, + Expression::ArrowFunctionExpression(arrow) => { + ArrowFunctionExpressionNode::new().unparse(unparser, arrow); + }, + Expression::ClassExpression(class) => { + ClassExpressionNode::new().unparse(unparser, class); + }, + Expression::TaggedTemplateExpression(tagged) => { + self.unparse_tagged_template(unparser, tagged); + }, + Expression::MemberExpression(member) => { + self.unparse_member_expression(unparser, member); + }, + Expression::SuperExpression(super_expr) => { + SuperExpressionNode::new().unparse(unparser, super_expr); + }, + Expression::MetaProperty(meta) => { + self.unparse_meta_property(unparser, meta); + }, + Expression::NewExpression(new_expr) => { + self.unparse_new_expression(unparser, new_expr); + }, + Expression::CallExpression(call) => { + self.unparse_call_expression(unparser, call); + }, + Expression::UpdateExpression(update) => { + self.unparse_update_expression(unparser, update); + }, + Expression::AwaitExpression(await_expr) => { + AwaitExpressionNode::new().unparse(unparser, await_expr); + }, + Expression::UnaryExpression(unary) => { + self.unparse_unary_expression(unparser, unary); + }, + Expression::BinaryExpression(binary) => { + self.unparse_binary_expression(unparser, binary); + }, + Expression::LogicalExpression(logical) => { + self.unparse_logical_expression(unparser, logical); + }, + Expression::ConditionalExpression(cond) => { + self.unparse_conditional_expression(unparser, cond); + }, + Expression::YieldExpression(yield_expr) => { + YieldExpressionNode::new().unparse(unparser, yield_expr); + }, + Expression::AssignmentExpression(assign) => { + self.unparse_assignment_expression(unparser, assign); + }, + Expression::SequenceExpression(seq) => { + self.unparse_sequence_expression(unparser, seq); + }, + Expression::ThisExpression(this) => { + ThisExpressionNode::new().unparse(unparser, this); + }, + // TODO implement +// Expression::TemplateLiteral(template) => { +// self.unparse_template_literal(unparser, template); +// }, + //_ => { + // Fallback for any expression types not explicitly handled + // unparser.write_str("/* unsupported expression */"); + //} + } + } +} + +// Helper methods for ExpressionNode +impl ExpressionNode { + fn unparse_literal(&self, unparser: &mut Unparser, lit: &Literal) { + match lit { + Literal::StringLiteral(s) => { + unparser.write_char('"'); + unparser.write_str(&s.value); + unparser.write_char('"'); + }, + Literal::NumericLiteral(n) => { + unparser.write_str(&n.value.to_string()); + }, + Literal::BooleanLiteral(b) => { + unparser.write_str(if b.value { "true" } else { "false" }); + }, + Literal::NullLiteral(_) => { + unparser.write_str("null"); + }, + Literal::UndefinedLiteral(_) => { + unparser.undefined(); + }, + Literal::RegExpLiteral(r) => { + unparser.write_char('/'); + unparser.write_str(&r.pattern); + unparser.write_char('/'); + unparser.write_str(&r.flags); + }, + Literal::BigIntLiteral(b) => { + unparser.write_str(&b.value); + unparser.write_char('n'); + } + } + } + + fn unparse_tagged_template(&self, unparser: &mut Unparser, tagged: &TaggedTemplateExpression) { + // Unparse the tag + self.unparse(unparser, &tagged.tag); + + // Unparse the template literal + self.unparse_template_literal(unparser, &tagged.quasi); + } + + fn unparse_template_literal(&self, unparser: &mut Unparser, template: &TemplateLiteral) { + unparser.write_char('`'); + + for (i, elem) in template.quasis.iter().enumerate() { + // Write the template string part + unparser.write_str(&elem.value.raw); + + // If there's an expression after this quasi, write it + if i < template.expressions.len() { + unparser.write_str("${"); + self.unparse(unparser, &template.expressions[i]); + unparser.write_char('}'); + } + } + + unparser.write_char('`'); + } + + fn unparse_member_expression(&self, unparser: &mut Unparser, member: &MemberExpression) { + // Unparse the object + self.unparse(unparser, &member.object); + + // Handle optional chaining + if member.optional { + unparser.write_str("?."); + } + + // Unparse the property + match &member.property { + MemberProperty::Identifier(id) => { + if !member.optional { + unparser.write_char('.'); + } + unparser.write_str(&id.name); + }, + MemberProperty::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + MemberProperty::Expression(expr) => { + unparser.write_char('['); + self.unparse(unparser, expr); + unparser.write_char(']'); + } + } + } + + fn unparse_meta_property(&self, unparser: &mut Unparser, meta: &MetaProperty) { + unparser.write_str(&meta.meta.name); + unparser.write_char('.'); + unparser.write_str(&meta.property.name); + } + + fn unparse_new_expression(&self, unparser: &mut Unparser, new_expr: &NewExpression) { + unparser.write_str("new "); + + // Unparse the callee + self.unparse(unparser, &new_expr.callee); + + // Unparse the arguments + unparser.write_char('('); + + for (i, arg) in new_expr.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, arg); + } + + unparser.write_char(')'); + } + + fn unparse_call_expression(&self, unparser: &mut Unparser, call: &CallExpression) { + // Unparse the callee + self.unparse(unparser, &call.callee); + + // Handle optional chaining + if call.optional { + unparser.write_str("?."); + } + + // Unparse the arguments + unparser.write_char('('); + + for (i, arg) in call.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, arg); + } + + unparser.write_char(')'); + } + + fn unparse_update_expression(&self, unparser: &mut Unparser, update: &UpdateExpression) { + let operator_str = match update.operator { + UpdateOperator::Increment => "++", + UpdateOperator::Decrement => "--", + }; + + if update.prefix { + unparser.write_str(operator_str); + self.unparse(unparser, &update.argument); + } else { + self.unparse(unparser, &update.argument); + unparser.write_str(operator_str); + } + } + + fn unparse_unary_expression(&self, unparser: &mut Unparser, unary: &UnaryExpression) { + let operator_str = match unary.operator { + UnaryOperator::Plus => "+", + UnaryOperator::Minus => "-", + UnaryOperator::Not => "!", + UnaryOperator::BitwiseNot => "~", + UnaryOperator::Typeof => "typeof ", + UnaryOperator::Void => "void ", + UnaryOperator::Delete => "delete ", + }; + + unparser.write_str(operator_str); + + // Determine if we need parentheses + let needs_parens = matches!(&*unary.argument, + Expression::UnaryExpression(_) | + Expression::BinaryExpression(_) | + Expression::LogicalExpression(_) | + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) + ) && !matches!(unary.operator, UnaryOperator::Typeof | UnaryOperator::Void | UnaryOperator::Delete); + + // Unparse the argument + if needs_parens { + unparser.write_char('('); + self.unparse(unparser, &unary.argument); + unparser.write_char(')'); + } else { + self.unparse(unparser, &unary.argument); + } + } + + + fn unparse_binary_expression(&self, unparser: &mut Unparser, binary: &BinaryExpression) { + let operator_str = match binary.operator { + BinaryOperator::Addition => "+", + BinaryOperator::Subtraction => "-", + BinaryOperator::Multiplication => "*", + BinaryOperator::Division => "/", + BinaryOperator::Remainder => "%", + BinaryOperator::Exponentiation => "**", + BinaryOperator::LeftShift => "<<", + BinaryOperator::RightShift => ">>", + BinaryOperator::UnsignedRightShift => ">>>", + BinaryOperator::BitwiseAnd => "&", + BinaryOperator::BitwiseOr => "|", + BinaryOperator::BitwiseXor => "^", + BinaryOperator::Equal => "==", + BinaryOperator::NotEqual => "!=", + BinaryOperator::StrictEqual => "===", + BinaryOperator::StrictNotEqual => "!==", + BinaryOperator::LessThan => "<", + BinaryOperator::LessThanOrEqual => "<=", + BinaryOperator::GreaterThan => ">", + BinaryOperator::GreaterThanOrEqual => ">=", + BinaryOperator::In => " in ", + BinaryOperator::InstanceOf => " instanceof ", + }; + + // Determine if we need parentheses based on operator precedence + let left_needs_parens = self.needs_parentheses(&binary.left, &binary.operator, true); + let right_needs_parens = self.needs_parentheses(&binary.right, &binary.operator, false); + + // Unparse left operand + if left_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &binary.left); + unparser.write_char(')'); + } else { + self.unparse(unparser, &binary.left); + } + + // Add space before operator for readability + if !matches!(binary.operator, BinaryOperator::In | BinaryOperator::InstanceOf) { + unparser.space(); + } + + // Write the operator + unparser.write_str(operator_str); + + // Add space after operator for readability + if !matches!(binary.operator, BinaryOperator::In | BinaryOperator::InstanceOf) { + unparser.space(); + } + + // Unparse right operand + if right_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &binary.right); + unparser.write_char(')'); + } else { + self.unparse(unparser, &binary.right); + } + } + + fn unparse_logical_expression(&self, unparser: &mut Unparser, logical: &LogicalExpression) { + let operator_str = match logical.operator { + LogicalOperator::And => "&&", + LogicalOperator::Or => "||", + LogicalOperator::NullishCoalescing => "??", + }; + + // Determine if we need parentheses based on operator precedence + let left_needs_parens = self.needs_logical_parentheses(&logical.left, &logical.operator, true); + let right_needs_parens = self.needs_logical_parentheses(&logical.right, &logical.operator, false); + + // Unparse left operand + if left_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &logical.left); + unparser.write_char(')'); + } else { + self.unparse(unparser, &logical.left); + } + + // Add space before operator + unparser.space(); + + // Write the operator + unparser.write_str(operator_str); + + // Add space after operator + unparser.space(); + + // Unparse right operand + if right_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &logical.right); + unparser.write_char(')'); + } else { + self.unparse(unparser, &logical.right); + } + } + + fn unparse_conditional_expression(&self, unparser: &mut Unparser, cond: &ConditionalExpression) { + // Determine if test needs parentheses + let test_needs_parens = matches!(&*cond.test, + Expression::AssignmentExpression(_) | + Expression::ConditionalExpression(_) | + Expression::SequenceExpression(_) + ); + + // Unparse test expression + if test_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &cond.test); + unparser.write_char(')'); + } else { + self.unparse(unparser, &cond.test); + } + + // Write the question mark + unparser.space(); + unparser.write_char('?'); + unparser.space(); + + // Unparse consequent expression + self.unparse(unparser, &cond.consequent); + + // Write the colon + unparser.space(); + unparser.write_char(':'); + unparser.space(); + + // Unparse alternate expression + self.unparse(unparser, &cond.alternate); + } + + fn unparse_assignment_expression(&self, unparser: &mut Unparser, assign: &AssignmentExpression) { + // Unparse the left side + match &assign.left { + AssignmentLeft::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + }, + AssignmentLeft::Expression(expr) => { + self.unparse(unparser, expr); + } + } + + // Write the operator + unparser.space(); + match assign.operator { + AssignmentOperator::Assign => unparser.write_char('='), + AssignmentOperator::PlusAssign => unparser.write_str("+="), + AssignmentOperator::MinusAssign => unparser.write_str("-="), + AssignmentOperator::MultiplyAssign => unparser.write_str("*="), + AssignmentOperator::DivideAssign => unparser.write_str("/="), + AssignmentOperator::RemainderAssign => unparser.write_str("%="), + AssignmentOperator::ExponentiationAssign => unparser.write_str("**="), + AssignmentOperator::LeftShiftAssign => unparser.write_str("<<="), + AssignmentOperator::RightShiftAssign => unparser.write_str(">>="), + AssignmentOperator::UnsignedRightShiftAssign => unparser.write_str(">>>="), + AssignmentOperator::BitwiseAndAssign => unparser.write_str("&="), + AssignmentOperator::BitwiseOrAssign => unparser.write_str("|="), + AssignmentOperator::BitwiseXorAssign => unparser.write_str("^="), + AssignmentOperator::LogicalAndAssign => unparser.write_str("&&="), + AssignmentOperator::LogicalOrAssign => unparser.write_str("||="), + AssignmentOperator::NullishCoalescingAssign => unparser.write_str("??="), + } + unparser.space(); + + // Unparse the right side + self.unparse(unparser, &assign.right); + } + + fn unparse_sequence_expression(&self, unparser: &mut Unparser, seq: &SequenceExpression) { + for (i, expr) in seq.expressions.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, expr); + } + } + + // Helper method to determine if parentheses are needed for binary expressions + fn needs_parentheses(&self, expr: &Expression, parent_op: &BinaryOperator, is_left: bool) -> bool { + match expr { + Expression::BinaryExpression(binary) => { + let child_precedence = self.get_binary_precedence(&binary.operator); + let parent_precedence = self.get_binary_precedence(parent_op); + + // If the child has lower precedence, we need parentheses + if child_precedence < parent_precedence { + return true; + } + + // If they have the same precedence, we need parentheses for right-associative operators + // or for the right operand of left-associative operators + if child_precedence == parent_precedence { + // Handle right-associative operators (currently only **) + if matches!(parent_op, BinaryOperator::Exponentiation) { + return is_left; + } + // For left-associative operators, need parentheses on right side when precedences are equal + return !is_left; + } + + false + }, + Expression::LogicalExpression(_) | + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) | + Expression::SequenceExpression(_) => true, + _ => false, + } + } + + // Helper method to determine if parentheses are needed for logical expressions + fn needs_logical_parentheses(&self, expr: &Expression, parent_op: &LogicalOperator, is_left: bool) -> bool { + match expr { + Expression::LogicalExpression(logical) => { + let child_precedence = self.get_logical_precedence(&logical.operator); + let parent_precedence = self.get_logical_precedence(parent_op); + + // If the child has lower precedence, we need parentheses + if child_precedence < parent_precedence { + return true; + } + + // If they have the same precedence, we need parentheses for the right operand + // of left-associative operators (all logical operators are left-associative) + if child_precedence == parent_precedence && !is_left { + return true; + } + + false + }, + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) | + Expression::SequenceExpression(_) => true, + _ => false, + } + } + + // Helper method to get binary operator precedence + fn get_binary_precedence(&self, op: &BinaryOperator) -> u8 { + match op { + BinaryOperator::Exponentiation => 14, + BinaryOperator::Multiplication | BinaryOperator::Division | BinaryOperator::Remainder => 13, + BinaryOperator::Addition | BinaryOperator::Subtraction => 12, + BinaryOperator::LeftShift | BinaryOperator::RightShift | BinaryOperator::UnsignedRightShift => 11, + BinaryOperator::LessThan | BinaryOperator::LessThanOrEqual | + BinaryOperator::GreaterThan | BinaryOperator::GreaterThanOrEqual | + BinaryOperator::In | BinaryOperator::InstanceOf => 10, + BinaryOperator::Equal | BinaryOperator::NotEqual | + BinaryOperator::StrictEqual | BinaryOperator::StrictNotEqual => 9, + BinaryOperator::BitwiseAnd => 8, + BinaryOperator::BitwiseXor => 7, + BinaryOperator::BitwiseOr => 6, + //_ => 0, // Should not happen + } + } + + // Helper method to get logical operator precedence + fn get_logical_precedence(&self, op: &LogicalOperator) -> u8 { + match op { + LogicalOperator::And => 5, + LogicalOperator::Or => 4, + LogicalOperator::NullishCoalescing => 3, + } + } +} diff --git a/src/grammar/function.rs b/src/grammar/function.rs new file mode 100644 index 0000000..383a734 --- /dev/null +++ b/src/grammar/function.rs @@ -0,0 +1,221 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::pattern::*; +use super::expression::*; +use super::statement::*; + +pub struct FunctionExpressionNode; + +impl FunctionExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for FunctionExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if this is an async function + let async_function = parser.consume(&Token::Async); + + // Consume the 'function' keyword + parser.assert_consume(&Token::Function, "Expected 'function'")?; + + // Check if this is a generator function + let generator = parser.consume(&Token::Star); + + // Parse the function name if present + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Parse the parameter list + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut params = Vec::new(); + + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternNode::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + params.push(PatternNode::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: generator }, |p| { + BlockStatementNode::new().parse(p) + })?; + + Ok(FunctionExpression { + id, + params, + body, + generator, + async_function, + }) + } +} + + +impl UnparserCombinator for FunctionExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &FunctionExpression) { + // Write async if it's an async function + if node.async_function { + unparser.write_str("async"); + unparser.write_char(' '); + } + + // Write the function keyword + unparser.write_str("function"); + + // Write * if it's a generator function + if node.generator { + unparser.write_char('*'); + } + + // Write the function name if present + if let Some(id) = &node.id { + //unparser.space(); + unparser.write_char(' '); + unparser.write_str(&id.name); + } + + // Write the parameter list + unparser.write_char('('); + + // Write parameters + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); + + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + unparser.space(); + + // Write the function body + BlockStatementNode::new().unparse(unparser, &node.body); + } +} + +/// Parser for arrow function expressions +pub struct ArrowFunctionExpressionNode; + +impl ArrowFunctionExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrowFunctionExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for async arrow function + let async_function = parser.consume(&Token::Async); + + // Parse the parameters + let mut params = Vec::new(); + + if parser.consume(&Token::LeftParen) { + // Multiple parameters in parentheses + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternNode::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + // Check for trailing comma (right paren immediately after comma) + if parser.check(&Token::RightParen) { + break; // This is a trailing comma, so stop parsing parameters + } + + // Parse the next parameter + params.push(PatternNode::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after arrow function parameters")?; + } else { + // Single parameter without parentheses + params.push(PatternNode::new().parse(parser)?); + } + + // Consume the arrow + parser.assert_consume(&Token::Arrow, "Expected '=>' after arrow function parameters")?; + + let block = parser.check(&Token::LeftBrace); + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: false }, |p| { + if p.check(&Token::LeftBrace) { + let block = BlockStatementNode::new().parse(p)?; + Ok(ArrowFunctionBody::BlockStatement(block)) + } else { + let expr = ExpressionNode::new().parse(p)?; + Ok(ArrowFunctionBody::Expression(Box::new(expr))) + } + })?; + + Ok(ArrowFunctionExpression { + params, + body, + expression: !block, + async_function, + }) + } +} + +impl UnparserCombinator for ArrowFunctionExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrowFunctionExpression) { + // Write async if it's an async arrow function + if node.async_function { + unparser.write_str("async"); + unparser.space(); + } + + // Write the parameter list + if node.params.len() == 1 && node.expression { + // Single parameter without parentheses for expression body arrow functions + PatternNode::new().unparse(unparser, &node.params[0]); + } else { + // Multiple parameters or block body requires parentheses + unparser.write_char('('); + + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); + + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + } + + // Write the arrow + unparser.space(); + unparser.write_str("=>"); + unparser.space(); + + // Write the function body + match &node.body { + ArrowFunctionBody::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + ArrowFunctionBody::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + } + } + } +} diff --git a/src/grammar/literal.rs b/src/grammar/literal.rs new file mode 100644 index 0000000..9ed2f2e --- /dev/null +++ b/src/grammar/literal.rs @@ -0,0 +1,118 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; + +pub struct LiteralNode; + +impl LiteralNode { + pub fn new() -> Self { + Self + } + + fn escape_string(&self, s: &str) -> String { + let mut result = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '\n' => result.push_str("\\n"), + '\r' => result.push_str("\\r"), + '\t' => result.push_str("\\t"), + '\\' => result.push_str("\\\\"), + '"' => result.push_str("\\\""), + '\'' => result.push_str("\\'"), + '\0' => result.push_str("\\0"), + '\u{08}' => result.push_str("\\b"), // backspace + '\u{0C}' => result.push_str("\\f"), // form feed + c if c.is_control() => { + // Use Unicode escape sequence for other control characters + let code = c as u32; + result.push_str(&format!("\\u{:04x}", code)); + }, + _ => result.push(c), + } + } + result + } +} + +impl ParserCombinator for LiteralNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::StringLiteral(value) => { + let value_clone = value.clone(); + parser.advance(); + return Ok(Literal::StringLiteral(StringLiteral { value: value_clone.into_boxed_str() })); + }, + Token::NumberLiteral(value) => { + let value_copy = *value; + parser.advance(); + return Ok(Literal::NumericLiteral(NumericLiteral { value: value_copy })); + }, + Token::BigIntLiteral(value) => { + let value_clone = value.clone(); + parser.advance(); + return Ok(Literal::BigIntLiteral(BigIntLiteral { value: value_clone.into_boxed_str() })); + }, + Token::RegExpLiteral(pattern, flags) => { + let pattern_clone = pattern.clone(); + let flags_clone = flags.clone(); + parser.advance(); + return Ok(Literal::RegExpLiteral(RegExpLiteral { pattern: pattern_clone.into_boxed_str(), flags: flags_clone.into_boxed_str() })); + }, + Token::True => { + parser.advance(); + return Ok(Literal::BooleanLiteral(BooleanLiteral { value: true })); + }, + Token::False => { + parser.advance(); + return Ok(Literal::BooleanLiteral(BooleanLiteral { value: false })); + }, + Token::Undefined => { + parser.advance(); + return Ok(Literal::UndefinedLiteral(UndefinedLiteral {})); + }, + Token::Null => { + parser.advance(); + return Ok(Literal::NullLiteral(NullLiteral {})); + }, + _ => return Err(parser.error_at_current("Expected a literal")), + }; + } +} + +impl UnparserCombinator for LiteralNode { + fn unparse(&self, unparser: &mut Unparser, expr: &Expression) { + if let Expression::Literal(lit) = expr { + match lit { + Literal::StringLiteral(value) => { + let escaped = self.escape_string(&value.value); + unparser.write_char('"'); + unparser.write_str(&escaped); + unparser.write_char('"'); + }, + Literal::NumericLiteral(value) => { + unparser.write_str(&value.value.to_string()); + }, + Literal::BooleanLiteral(value) => { + unparser.write_str(if value.value { "true" } else { "false" }); + }, + Literal::NullLiteral(_) => { + unparser.write_str("null"); + }, + Literal::UndefinedLiteral(_) => { + unparser.undefined(); + }, + Literal::RegExpLiteral(value) => { + unparser.write_char('/'); + unparser.write_str(&value.pattern); + unparser.write_char('/'); + unparser.write_str(&value.flags); + }, + Literal::BigIntLiteral(value) => { + unparser.write_str(&value.value); + unparser.write_char('n'); + }, + } + } + } +} \ No newline at end of file diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs new file mode 100644 index 0000000..9203c7e --- /dev/null +++ b/src/grammar/mod.rs @@ -0,0 +1,33 @@ +mod expression; +mod pattern; +mod statement; +mod declaration; +mod literal; +mod function; +mod class; +mod array; +mod object; +mod property; +mod module; +mod script; +mod this; +mod new; +mod await_expression; +mod yield_expression; + +pub use self::expression::*; +pub use self::pattern::*; +pub use self::statement::*; +pub use self::declaration::*; +pub use self::literal::*; +pub use self::class::*; +pub use self::array::*; +pub use self::function::*; +pub use self::object::*; +pub use self::property::*; +pub use self::module::*; +pub use self::script::*; +pub use self::this::*; +pub use self::new::*; +pub use self::await_expression::*; +pub use self::yield_expression::*; diff --git a/src/grammar/module.rs b/src/grammar/module.rs new file mode 100644 index 0000000..9704851 --- /dev/null +++ b/src/grammar/module.rs @@ -0,0 +1,35 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::statement::*; + +pub struct ModuleNode; + +impl ModuleNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ModuleNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let body = parser.with_context(LexicalContext::ModuleBody { allow_await: true }, |p| { + let mut result = Vec::new(); + while !p.is_at_end() { + result.push(StatementNode::new().parse(p)?); + } + Ok(result) + })?; + Ok(Program { source_type: SourceType::Module, body }) + } +} + +impl UnparserCombinator for ModuleNode { + fn unparse(&self, unparser: &mut Unparser, program: &Program) { + for stmt in &program.body { + StatementNode::new().unparse(unparser, stmt); + unparser.newline(); + } + } +} \ No newline at end of file diff --git a/src/grammar/new.rs b/src/grammar/new.rs new file mode 100644 index 0000000..1c01fee --- /dev/null +++ b/src/grammar/new.rs @@ -0,0 +1,53 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct NewExpressionNode; + +impl NewExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for NewExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::New, "Expected 'new'")?; + let callee = Box::new(ExpressionNode::new().parse_with_precedence(parser, Precedence::Call)?); + let mut arguments = Vec::new(); + if parser.check(&Token::LeftParen) { + parser.assert_consume(&Token::LeftParen, "Expected '(' after new expression")?; + if !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + } + } + parser.assert_consume(&Token::RightParen, "Expected ')' after new expression arguments")?; + } + Ok(NewExpression { callee, arguments }) + } +} + +impl UnparserCombinator for NewExpressionNode { + fn unparse(&self, unparser: &mut Unparser, expr: &Expression) { + if let Expression::NewExpression(new_expr) = expr { + unparser.write_str("new"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &new_expr.callee); + unparser.write_char('('); + if !new_expr.arguments.is_empty() { + for (i, arg) in new_expr.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + ExpressionNode::new().unparse(unparser, arg); + } + } + unparser.write_char(')'); + } + } +} \ No newline at end of file diff --git a/src/grammar/object.rs b/src/grammar/object.rs new file mode 100644 index 0000000..4497c0a --- /dev/null +++ b/src/grammar/object.rs @@ -0,0 +1,59 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::property::*; + +pub struct ObjectExpressionNode; + +impl ObjectExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object expression")?; + let mut properties = Vec::new(); + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + let property = PropertyNode::new().parse(parser)?; + properties.push(property.clone()); + if parser.consume(&Token::Comma) { + if parser.check(&Token::RightBrace) { + break; + } + } else { + break; + } + } + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of object expression")?; + Ok(ObjectExpression { properties }) + } +} + +impl UnparserCombinator for ObjectExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectExpression) { + unparser.write_char('{'); + if !node.properties.is_empty() { + let multiline = node.properties.len() > 1; + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + PropertyNode::new().unparse(u, &node.properties[0]); + for property in &node.properties[1..] { + u.write_char(','); + u.newline(); + PropertyNode::new().unparse(u, property); + } + }); + unparser.newline(); + } else { + unparser.space(); + PropertyNode::new().unparse(unparser, &node.properties[0]); + unparser.space(); + } + } + unparser.write_char('}'); + } +} diff --git a/src/grammar/pattern.rs b/src/grammar/pattern.rs new file mode 100644 index 0000000..cdb5ef0 --- /dev/null +++ b/src/grammar/pattern.rs @@ -0,0 +1,602 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::literal::*; +use super::expression::*; + +pub struct PatternNode; + +impl PatternNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for PatternNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Try to parse as object pattern + if parser.check(&Token::LeftBrace) { + return ObjectPatternNode::new().parse(parser).map(Pattern::ObjectPattern); + } + + // Try to parse as array pattern + if parser.check(&Token::LeftBracket) { + return ArrayPatternNode::new().parse(parser).map(Pattern::ArrayPattern); + } + + // Try to parse as rest element + if parser.check(&Token::Ellipsis) { + return RestElementNode::new().parse(parser).map(Pattern::RestElement); + } + + // Try to parse as identifier or assignment pattern + // First parse an identifier + let pos = parser.save_position(); + + if let Ok(ident) = IdentifierNode::new().parse(parser) { + // Check if this is an assignment pattern + if parser.check(&Token::Equal) { + // Consume the equals sign + parser.assert_consume(&Token::Equal, "Expected '=' in assignment pattern")?; + + // Parse the right side (must be a valid expression) + let right = ExpressionNode::new().parse(parser)?; + + return Ok(Pattern::AssignmentPattern(AssignmentPattern { + left: Box::new(Pattern::Identifier(ident)), + right: Box::new(right), + })); + } + + // If not an assignment, return the identifier + return Ok(Pattern::Identifier(ident)); + } + + // Restore position after failed identifier attempt + parser.restore_position(pos); + + // Try to parse as member expression (only valid in some contexts) + let result = MemberPatternNode::new().parse(parser); + if result.is_ok() { + return result.map(Pattern::MemberExpression); + } + + // If all attempts failed, return an error + Err(parser.error_at_current("Expected a valid pattern")) + } +} + + +pub struct MemberPatternNode; + +impl MemberPatternNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for MemberPatternNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let object = ExpressionNode::new().parse(parser)?; + + // Parse the property access (without optional chaining) + let (property, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property access: obj[expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; + (MemberProperty::Expression(Box::new(expr)), true) + } else if parser.consume(&Token::Dot) { + // Static property access: obj.prop + // At this point, we should be directly at the identifier + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '.'")); + } + } else { + // If we're here, we're expecting a direct property access without a dot + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected '.' or '[' in member pattern")); + } + }; + + Ok(MemberExpression { + object: Box::new(object), + property, + computed, + optional: false, + }) + } +} + + +/// Parser for object patterns +pub struct ObjectPatternNode; + +impl ObjectPatternNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectPatternNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the opening brace + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object pattern")?; + + let mut properties = Vec::new(); + + // Parse properties until we hit the closing brace + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Check for rest element + if parser.check(&Token::Ellipsis) { + let rest = RestElementNode::new().parse(parser)?; + properties.push(ObjectPatternProperty::RestElement(rest)); + + // After rest element, allow a trailing comma (ES2018+) + parser.consume(&Token::Comma); + break; + } else { + // Parse regular property + let property = ObjectPropertyNode::new().parse(parser)?; + properties.push(ObjectPatternProperty::Property(property)); + + // If there's no comma, we should be at the end + if !parser.consume(&Token::Comma) { + break; + } + } + } + + // Consume the closing brace + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of object pattern")?; + + Ok(ObjectPattern { + properties, + }) + } +} + +/// Parser for object pattern properties +pub struct ObjectPropertyNode; + +impl ObjectPropertyNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectPropertyNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if we have a computed property + let computed = parser.consume(&Token::LeftBracket); + + // Parse the key + let key = if computed { + // Parse expression inside brackets + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; + PropertyKey::Expression(Box::new(expr)) + } else { + // Parse identifier or literal + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + PropertyKey::Literal(literal) + }, + Token::Default => { + // Special case for 'default' as property key + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + PropertyKey::Identifier(Identifier { name }) + }, + _ => { + let ident = IdentifierNode::new().parse(parser)?; + PropertyKey::Identifier(ident) + } + } + }; + + // Check if this is a shorthand property + let shorthand = !computed && !parser.check(&Token::Colon); + + // Parse the value if not shorthand + let value = if shorthand { + // For shorthand, the value is the same as the key + match &key { + PropertyKey::Identifier(ident) => { + // Create a new identifier with the same name + let name = ident.name.clone(); + Pattern::Identifier(Identifier { name }) + }, + _ => return Err(parser.error_at_current("Invalid shorthand property in object pattern")), + } + } else { + // Consume the colon + parser.assert_consume(&Token::Colon, "Expected ':' after property key in object pattern")?; + + // Parse the pattern + PatternNode::new().parse(parser)? + }; + + Ok(ObjectProperty { + key, + value, + computed, + shorthand, + }) + } +} + +/// Parser for array patterns +pub struct ArrayPatternNode; + +impl ArrayPatternNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrayPatternNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBracket, "Expected '[' at start of array pattern")?; + + let mut elements = Vec::new(); + + while !parser.check(&Token::RightBracket) && !parser.is_at_end() { + if parser.consume(&Token::Comma) { + // Handle elision (hole in the pattern) + elements.push(None); + } else if parser.consume(&Token::Ellipsis) { + // Handle rest element + let argument = Box::new(PatternNode::new().parse(parser)?); + elements.push(Some(Pattern::RestElement(RestElement { argument }))); + + // Rest element must be the last one + if parser.consume(&Token::Comma) && !parser.check(&Token::RightBracket) { + return Err(parser.error_at_current("Rest element must be the last element in array pattern")); + } + break; + } else { + // Parse regular element + let element = PatternNode::new().parse(parser)?; + elements.push(Some(element)); + + // If there's no comma, we should be at the end + if !parser.consume(&Token::Comma) { + break; + } + } + } + + parser.assert_consume(&Token::RightBracket, "Expected ']' at end of array pattern")?; + + Ok(ArrayPattern { elements }) + } +} + + +/// Parser for rest elements +pub struct RestElementNode; + +impl RestElementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for RestElementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the ellipsis + parser.assert_consume(&Token::Ellipsis, "Expected '...' for rest element")?; + + // Parse the argument pattern + let argument = PatternNode::new().parse(parser)?; + + Ok(RestElement { + argument: Box::new(argument), + }) + } +} + +/// Parser for identifiers +pub struct IdentifierNode; + +impl IdentifierNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for IdentifierNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + + let name = match parser.peek() { + Token::Identifier(name) => { + let name = name.clone().into_boxed_str(); + parser.advance(); + Ok(name) + }, + _ => Err(parser.error_at_current("Expected an identifier")), + }?; + + Ok(Identifier { + name, + }) + } +} + +// Main pattern unparser +impl UnparserCombinator for PatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &Pattern) { + match node { + Pattern::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + Pattern::ObjectPattern(pattern) => { + ObjectPatternNode::new().unparse(unparser, pattern); + }, + Pattern::ArrayPattern(pattern) => { + ArrayPatternNode::new().unparse(unparser, pattern); + }, + Pattern::RestElement(rest) => { + RestElementNode::new().unparse(unparser, rest); + }, + Pattern::AssignmentPattern(pattern) => { + // Left side (typically an identifier) + match &*pattern.left { + Pattern::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + _ => { + PatternNode::new().unparse(unparser, &pattern.left); + } + } + + // Equals sign and default value + unparser.space(); + unparser.write_char('='); + unparser.space(); + ExpressionNode::new().unparse(unparser, &pattern.right); + }, + Pattern::MemberExpression(expr) => { + MemberPatternNode::new().unparse(unparser, expr); + } + } + } +} + +// Member expression pattern unparser +impl UnparserCombinator for MemberPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &MemberExpression) { + // Unparse the object part + ExpressionNode::new().unparse(unparser, &node.object); + + // Unparse the property access + match &node.property { + MemberProperty::Identifier(ident) => { + // Static property access: obj.prop + unparser.write_char('.'); + unparser.write_str(&ident.name); + }, + MemberProperty::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + MemberProperty::Expression(expr) => { + // Computed property access: obj[expr] + unparser.write_char('['); + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(']'); + } + } + } +} + +// Object pattern unparser +impl UnparserCombinator for ObjectPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectPattern) { + unparser.write_char('{'); + + if !node.properties.is_empty() { + let multiline = node.properties.len() > 3; + + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + // Process all properties + for (i, prop) in node.properties.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + + match prop { + ObjectPatternProperty::Property(property) => { + ObjectPropertyNode::new().unparse(u, property); + }, + ObjectPatternProperty::RestElement(rest) => { + RestElementNode::new().unparse(u, rest); + } + } + } + }); + unparser.newline(); + } else { + // Compact format for few properties + unparser.space(); + + for (i, prop) in node.properties.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + + match prop { + ObjectPatternProperty::Property(property) => { + ObjectPropertyNode::new().unparse(unparser, property); + }, + ObjectPatternProperty::RestElement(rest) => { + RestElementNode::new().unparse(unparser, rest); + } + } + } + + unparser.space(); + } + } + + unparser.write_char('}'); + } +} + +// Object property pattern unparser +impl UnparserCombinator for ObjectPropertyNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectProperty) { + // Handle the property key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys in computed properties + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + } + } + } + + // Handle the property value if not shorthand + if !node.shorthand { + unparser.write_char(':'); + unparser.space(); + PatternNode::new().unparse(unparser, &node.value); + } + } +} + +// Array pattern unparser +impl UnparserCombinator for ArrayPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrayPattern) { + unparser.write_char('['); + + if !node.elements.is_empty() { + let multiline = node.elements.len() > 5; + + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + // Process all elements + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + + match elem { + Some(pattern) => { + PatternNode::new().unparse(u, pattern); + }, + None => { + // Empty slot (elision) + } + } + } + }); + unparser.newline(); + } else { + // Compact format for few elements + unparser.space(); + + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + + match elem { + Some(pattern) => { + PatternNode::new().unparse(unparser, pattern); + }, + None => { + // Empty slot (elision) + } + } + } + + unparser.space(); + } + } + + unparser.write_char(']'); + } +} + +// Rest element unparser +impl UnparserCombinator for RestElementNode { + fn unparse(&self, unparser: &mut Unparser, node: &RestElement) { + unparser.write_str("..."); + PatternNode::new().unparse(unparser, &node.argument); + } +} + +// Identifier unparser +impl UnparserCombinator for IdentifierNode { + fn unparse(&self, unparser: &mut Unparser, node: &Identifier) { + unparser.write_str(&node.name); + } +} diff --git a/src/grammar/property.rs b/src/grammar/property.rs new file mode 100644 index 0000000..74c93f6 --- /dev/null +++ b/src/grammar/property.rs @@ -0,0 +1,246 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; +use super::function::*; +use super::literal::*; + +pub struct PropertyNode; + +impl PropertyNode { + pub fn new() -> Self { + Self + } +} + +/// Parser for object properties +impl ParserCombinator for PropertyNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for special property types (getter, setter, async, generator) + let mut method = false; + let mut kind = PropertyKind::Init; + + // Parse property modifiers + if parser.consume(&Token::Get) { + kind = PropertyKind::Get; + } else if parser.consume(&Token::Set) { + kind = PropertyKind::Set; + } else if parser.consume(&Token::Async) { + method = true; + } else if parser.consume(&Token::Star) { + method = true; + } + + // Parse the property key (computed or not) + let computed = parser.consume(&Token::LeftBracket); + + let key = if computed { + // Computed property key: [expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; + PropertyKey::Expression(Box::new(expr)) + } else { + // Regular property key: identifier, string, or number + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + PropertyKey::Literal(literal) + }, + Token::Identifier(_) => { + // Identifier as key + let name = match parser.peek() { + Token::Identifier(ident) => ident.clone(), + _ => unreachable!() + }; + parser.advance(); // Consume the identifier + PropertyKey::Identifier(Identifier { name: name.into() }) + }, + Token::Default => { + // Special case for 'default' as property key + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + PropertyKey::Identifier(Identifier { name }) + }, + _ => return Err(parser.error_at_current("Expected property name")) + } + }; + + // Check if this is a method (has parentheses after the key) + if parser.check(&Token::LeftParen) { + method = true; + } + + // Check if this is a shorthand property (no colon after key) + let shorthand = !computed && !method && !parser.check(&Token::Colon) && + matches!(kind, PropertyKind::Init); + + // Parse the property value + let value = if shorthand { + // Shorthand property: { x } is equivalent to { x: x } + match &key { + PropertyKey::Identifier(ident) => { + Box::new(Expression::Identifier(Identifier { name: ident.name.clone() })) + }, + _ => return Err(parser.error_at_current("Invalid shorthand property")) + } + } else if method { + // Method definition: { method() { ... } } + parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; + let func_expr = FunctionExpressionNode::new().parse(parser)?; + Box::new(Expression::FunctionExpression(func_expr)) + } else { + // Regular property: { key: value } + parser.assert_consume(&Token::Colon, "Expected ':' after property key")?; + + // Check for arrow function + let pos = parser.save_position(); + if matches!(parser.peek(), Token::Identifier(_)) && + parser.peek_next(1) == &Token::Arrow { + // This might be an arrow function + if let Ok(arrow_func) = ArrowFunctionExpressionNode::new().parse(parser) { + return Ok(Property { + key, + value: Box::new(Expression::ArrowFunctionExpression(arrow_func)), + kind, + method, + shorthand, + computed, + }); + } + parser.restore_position(pos); + } + + // Parse the value as an expression + let expr = ExpressionNode::new().parse_with_precedence(parser, Precedence::Assignment)?; + + Box::new(expr) + }; + + Ok(Property { + key, + value, + kind, + method, + shorthand, + computed, + }) + } +} + + +impl UnparserCombinator for PropertyNode { + fn unparse(&self, unparser: &mut Unparser, node: &Property) { + // Handle property modifiers (get, set, async, generator) + match node.kind { + PropertyKind::Get => { + unparser.write_str("get"); + unparser.write_char(' '); + }, + PropertyKind::Set => { + unparser.write_str("set"); + unparser.write_char(' '); + }, + PropertyKind::Init => { + // For async methods + if node.method && !node.computed { + if let PropertyKey::Identifier(id) = &node.key { + if id.name.starts_with("async") && id.name.len() > 5 { + unparser.write_str("async"); + unparser.write_char(' '); + // Continue with the rest of the method name later + } + } + } + + // For generator methods + if node.method && !node.computed { + if let PropertyKey::Identifier(id) = &node.key { + if id.name.starts_with("*") { + unparser.write_char('*'); + // Continue with the rest of the method name later + } + } + } + } + } + + // Handle the property key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys in computed properties + match lit { + crate::ast::Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + crate::ast::Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + crate::ast::Literal::BooleanLiteral(b) => unparser.write_str(if b.value { "true" } else { "false" }), + crate::ast::Literal::BigIntLiteral(b) => { + unparser.write_str(&b.value); + unparser.write_char('n'); + }, + crate::ast::Literal::NullLiteral(_) => unparser.write_str("null"), + crate::ast::Literal::UndefinedLiteral(_) => unparser.undefined(), + crate::ast::Literal::RegExpLiteral(r) => { + unparser.write_char('/'); + unparser.write_str(&r.pattern); + unparser.write_char('/'); + unparser.write_str(&r.flags); + }, + } + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + // For regular identifiers + unparser.write_str(&id.name); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys + match lit { + crate::ast::Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + crate::ast::Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + } + } + } + + // Handle the property value + if node.shorthand { + // Shorthand property: { x } instead of { x: x } + // No need to write anything else + } else if node.method { + // Method definition: { method() { ... } } + ExpressionNode::new().unparse(unparser, &node.value); + } else { + // Regular property: { key: value } + unparser.write_char(':'); + unparser.space(); + ExpressionNode::new().unparse(unparser, &node.value); + } + } +} diff --git a/src/grammar/script.rs b/src/grammar/script.rs new file mode 100644 index 0000000..a5b348a --- /dev/null +++ b/src/grammar/script.rs @@ -0,0 +1,31 @@ +use crate::ast::*; +use crate::parser::*; +use crate::unparser::*; +use super::statement::*; + +pub struct ScriptNode; + +impl ScriptNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ScriptNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let mut body = Vec::new(); + while !parser.is_at_end() { + body.push(StatementNode::new().parse(parser)?); + } + Ok(Program { source_type: SourceType::Script, body }) + } +} + +impl UnparserCombinator for ScriptNode { + fn unparse(&self, unparser: &mut Unparser, program: &Program) { + for stmt in &program.body { + StatementNode::new().unparse(unparser, stmt); + unparser.newline(); + } + } +} \ No newline at end of file diff --git a/src/grammar/statement.rs b/src/grammar/statement.rs new file mode 100644 index 0000000..c63d834 --- /dev/null +++ b/src/grammar/statement.rs @@ -0,0 +1,1510 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; +use super::declaration::*; +use super::pattern::*; +use super::class::*; +use super::object::*; + +pub struct StatementNode; + +impl StatementNode { + pub fn new() -> Self { + Self + } + + fn determine_for_loop_type(&self, parser: &mut Parser) -> ForLoopType { + let pos = parser.save_position(); + + // Skip the variable declaration or pattern + if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + parser.advance(); + + while !parser.check(&Token::Semicolon) && + !parser.check(&Token::In) && + !parser.check(&Token::Of) && + !parser.check(&Token::RightParen) && + !parser.is_at_end() { + parser.advance(); + } + } else { + while !parser.check(&Token::Semicolon) && + !parser.check(&Token::In) && + !parser.check(&Token::Of) && + !parser.check(&Token::RightParen) && + !parser.is_at_end() { + parser.advance(); + } + } + + let loop_type = match parser.peek() { + Token::In => ForLoopType::ForIn, + Token::Of => ForLoopType::ForOf, + _ => ForLoopType::Standard, + }; + + // Restore position + parser.restore_position(pos); + + loop_type + } + +} + +// Enum to represent the different types of for loops +enum ForLoopType { + Standard, // for (init; test; update) + ForIn, // for (left in right) + ForOf, // for (left of right) +} + +impl ParserCombinator for StatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + // Special case for object literals at the start of a statement + Token::LeftBrace => { + // Try to parse as object expression first + let pos = parser.save_position(); + + // Attempt to parse as an object literal + match ObjectExpressionNode::new().parse(parser) { + Ok(obj_expr) => { + + // Successfully parsed as object expression + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + return Ok(Statement::ExpressionStatement(ExpressionStatement { + expression: Box::new(Expression::ObjectExpression(obj_expr)), + })); + }, + Err(_) => { + // Failed to parse as object expression, restore position and try as block statement + parser.restore_position(pos); + return BlockStatementNode::new().parse(parser).map(Statement::BlockStatement); + } + } + }, + Token::Var | + Token::Let | + Token::Const => { + VariableDeclarationNode::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::VariableDeclaration(decl)) + ) + }, + Token::Async => { + // Check if this is an async function declaration + let pos = parser.save_position(); + parser.advance(); // Skip 'async' + + if parser.check(&Token::Function) { + // This is an async function declaration or expression + parser.restore_position(pos); + + // Try to parse as function declaration first + let pos2 = parser.save_position(); + parser.advance(); // Skip 'async' + parser.advance(); // Skip 'function' + + // Check for generator function + let _is_generator = parser.consume(&Token::Star); + + // If the next token is an identifier, this is a function declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + FunctionDeclarationParser::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::FunctionDeclaration(decl)) + ) + } else { + // Otherwise, it's a function expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + } else { + // Not a function, treat as regular expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + Token::Function => { + // Check if this is a function declaration (has an identifier) + let pos = parser.save_position(); + parser.advance(); // Skip 'function' + + // Check for generator function + let _is_generator = parser.consume(&Token::Star); + + // If the next token is an identifier, this is a function declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + FunctionDeclarationParser::new().parse(parser).map(|decl| Statement::Declaration(Declaration::FunctionDeclaration(decl))) + } else { + // Otherwise, it's a function expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + Token::Class => { + // Check if this is a class declaration (has an identifier) + let pos = parser.save_position(); + parser.advance(); // Skip 'class' + + // If the next token is an identifier, this is a class declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + ClassDeclarationNode::new().parse(parser).map(|decl| Statement::Declaration(Declaration::ClassDeclaration(decl))) + } else { + // Otherwise, it's a class expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + Token::Import => { + ImportDeclarationParser::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::ImportDeclaration(decl)) + ) + }, + Token::Export => { + ExportDeclarationParser::new().parse(parser).map(|decl| { + match decl { + ExportDeclaration::Named(named) => Statement::Declaration(Declaration::ExportNamedDeclaration(named)), + ExportDeclaration::Default(default) => Statement::Declaration(Declaration::ExportDefaultDeclaration(default)), + ExportDeclaration::All(all) => Statement::Declaration(Declaration::ExportAllDeclaration(all)), + } + }) + }, + Token::If => { + IfStatementNode::new().parse(parser).map(Statement::IfStatement) + }, + Token::Switch => { + SwitchStatementNode::new().parse(parser).map(Statement::SwitchStatement) + }, + Token::For => { + let pos = parser.save_position(); + + // Consume the 'for' token + parser.advance(); + + // Expect opening parenthesis + if !parser.consume(&Token::LeftParen) { + parser.restore_position(pos); + return Err(parser.error_at_current("Expected '(' after 'for'")); + } + + // Look ahead to determine the type of for loop + let loop_type = self.determine_for_loop_type(parser); + + // Restore position to start parsing the full statement + parser.restore_position(pos); + + match loop_type { + ForLoopType::Standard => { + ForStatementNode::new().parse(parser).map(Statement::ForStatement) + }, + ForLoopType::ForIn => { + ForInStatementNode::new().parse(parser).map(Statement::ForInStatement) + }, + ForLoopType::ForOf => { + ForOfStatementNode::new().parse(parser).map(Statement::ForOfStatement) + }, + } + }, + Token::While => { + WhileStatementNode::new().parse(parser).map(Statement::WhileStatement) + }, + Token::Do => { + DoWhileStatementNode::new().parse(parser).map(Statement::DoWhileStatement) + }, + Token::Try => { + TryStatementNode::new().parse(parser).map(Statement::TryStatement) + }, + Token::With => { + WithStatementNode::new().parse(parser).map(Statement::WithStatement) + }, + Token::Throw => { + ThrowStatementNode::new().parse(parser).map(Statement::ThrowStatement) + }, + Token::Return => { + ReturnStatementNode::new().parse(parser).map(Statement::ReturnStatement) + }, + Token::Break => { + BreakStatementNode::new().parse(parser).map(Statement::BreakStatement) + }, + Token::Continue => { + ContinueStatementNode::new().parse(parser).map(Statement::ContinueStatement) + }, + Token::Debugger => { + parser.advance(); // Consume 'debugger' + parser.consume(&Token::Semicolon); // Optional semicolon + Ok(Statement::DebuggerStatement) + }, + Token::Semicolon => { + parser.advance(); // Consume ';' + Ok(Statement::EmptyStatement) + }, + // Check for labeled statements (identifier followed by colon) + Token::Identifier(_) => { + let pos = parser.save_position(); + let ident = IdentifierNode::new().parse(parser)?; + + if parser.consume(&Token::Colon) { + // This is a labeled statement + let body = Box::new(self.parse(parser)?); + Ok(Statement::LabeledStatement(LabeledStatement { label: ident, body })) + } else { + // Not a labeled statement, restore position and parse as expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + // Default to expression statement + _ => { + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + } + } +} + +/// Parser for block statements +pub struct BlockStatementNode; + +impl BlockStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for BlockStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of block statement")?; + + let mut body = Vec::new(); + + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Parse a statement + let statement = StatementNode::new().parse(parser)?; + body.push(statement); + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of block statement")?; + + Ok(BlockStatement { body }) + } +} + +impl UnparserCombinator for BlockStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &BlockStatement) { + unparser.write_char('{'); + unparser.newline(); + + if !node.body.is_empty() { + unparser.with_indent(|u| { + for stmt in &node.body { + StatementNode::new().unparse(u, stmt); + u.newline(); + } + }); + } + + unparser.write_char('}'); + } +} + +/// Parser for expression statements +pub struct ExpressionStatementNode; + +impl ExpressionStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ExpressionStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for directive prologue (string literals that might be "use strict") + if let Token::StringLiteral(_) = parser.peek() { + let pos = parser.save_position(); + let expr = ExpressionNode::new().parse(parser)?; + + // If this is followed by a semicolon or end of block, it's a directive + if parser.check(&Token::Semicolon) || parser.check(&Token::RightBrace) || parser.is_at_end() { + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + /* + // Check if this is "use strict" + if let Expression::Literal(Literal::StringLiteral(StringLiteral { value })) = &expr { + if value == "use strict" { + // Set strict mode + parser.set_strict_mode(true); + } + }*/ + + return Ok(ExpressionStatement { + expression: Box::new(expr), + }); + } + + // Not a directive, restore position and continue with normal parsing + parser.restore_position(pos); + } + + // Special case for object literals at the start of a statement + if parser.check(&Token::LeftBrace) { + let pos = parser.save_position(); + + // Try to parse as object expression + match ObjectExpressionNode::new().parse(parser) { + Ok(obj_expr) => { + // Successfully parsed as object expression + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + return Ok(ExpressionStatement { + expression: Box::new(Expression::ObjectExpression(obj_expr)), + }); + }, + Err(_) => { + // Failed to parse as object expression, restore position + parser.restore_position(pos); + // Will fall through to regular expression parsing + } + } + } + + // Regular expression statement parsing + let expr = ExpressionNode::new().parse(parser)?; + + // Consume the semicolon if present (ASI rules apply) + // In JavaScript, semicolons are optional in many cases due to Automatic Semicolon Insertion (ASI) + if !parser.previous_line_terminator() && + !parser.check(&Token::RightBrace) && + !parser.is_at_end() { + parser.assert_consume(&Token::Semicolon, "Expected ';' after expression statement")?; + } else { + // Semicolon is optional if: + // 1. There's a line terminator after the expression + // 2. The next token is a closing brace + // 3. We're at the end of the input + parser.consume(&Token::Semicolon); + } + + Ok(ExpressionStatement { + expression: Box::new(expr), + }) + } +} + +/// Parser for if statements +pub struct IfStatementNode; + +impl IfStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for IfStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::If, "Expected 'if'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'if'")?; + + let test = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after if condition")?; + + // Check if the consequent starts with a left brace + let consequent = if parser.check(&Token::LeftBrace) { + // Force parsing as a block statement + Box::new(BlockStatementNode::new().parse(parser).map(Statement::BlockStatement)?) + } else { + // For other statement types, use the general statement parser + Box::new(StatementNode::new().parse(parser)?) + }; + + let alternate = if parser.consume(&Token::Else) { + Some(Box::new(StatementNode::new().parse(parser)?)) + } else { + None + }; + + Ok(IfStatement { + test, + consequent, + alternate, + }) + } +} + + +/// Parser for switch statements +pub struct SwitchStatementNode; + +impl SwitchStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for SwitchStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Switch, "Expected 'switch'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'switch'")?; + + let discriminant = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after switch expression")?; + parser.assert_consume(&Token::LeftBrace, "Expected '{' to start switch body")?; + + let cases = parser.with_context(LexicalContext::SwitchBody, |p| { + + let mut result = Vec::new(); + + while !p.check(&Token::RightBrace) && !p.is_at_end() { + if p.consume(&Token::Case) { + // Case clause + let test = Some(Box::new(ExpressionNode::new().parse(p)?)); + p.assert_consume(&Token::Colon, "Expected ':' after case value")?; + + let mut consequent = Vec::new(); + while !p.check(&Token::Case) && + !p.check(&Token::Default) && + !p.check(&Token::RightBrace) && + !p.is_at_end() { + consequent.push(StatementNode::new().parse(p)?); + } + + result.push(SwitchCase { test, consequent }); + } else if p.consume(&Token::Default) { + // Default clause + p.assert_consume(&Token::Colon, "Expected ':' after 'default'")?; + + let mut consequent = Vec::new(); + while !p.check(&Token::Case) && + !p.check(&Token::Default) && + !p.check(&Token::RightBrace) && + !p.is_at_end() { + consequent.push(StatementNode::new().parse(p)?); + } + + result.push(SwitchCase { test: None, consequent }); + } else { + return Err(p.error_at_current("Expected 'case' or 'default' in switch statement")); + } + } + + p.assert_consume(&Token::RightBrace, "Expected '}' to end switch statement")?; + + Ok(result) + })?; + + + Ok(SwitchStatement { + discriminant, + cases, + }) + } +} + +/// Parser for while statements +pub struct WhileStatementNode; + +impl WhileStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for WhileStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::While, "Expected 'while'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; + + let test = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } + })?; + + Ok(WhileStatement { + test, + body: Box::new(body), + }) + } +} + +/// Parser for do-while statements +pub struct DoWhileStatementNode; + +impl DoWhileStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for DoWhileStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Do, "Expected 'do'")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } + })?; + + parser.assert_consume(&Token::While, "Expected 'while' after do block")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; + + let test = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after while condition")?; + + Ok(DoWhileStatement { + body: Box::new(body), + test, + }) + } +} + +/// Parser for for statements +pub struct ForStatementNode; + +impl ForStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ForStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse initialization + let init = if parser.consume(&Token::Semicolon) { + None + } else if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationNode::new().parse(parser)?; + + // Check if this is a for-in or for-of loop + if parser.check(&Token::In) || parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected ';' after for initialization")); + } + + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; + Some(ForInit::VariableDeclaration(decl)) + } else { + // Expression + let expr = ExpressionNode::new().parse(parser)?; + + // Check if this is a for-in or for-of loop + if parser.check(&Token::In) || parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected ';' after for initialization")); + } + + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; + Some(ForInit::Expression(Box::new(expr))) + }; + + // Parse condition + let test = if parser.consume(&Token::Semicolon) { + None + } else { + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after for condition")?; + Some(Box::new(expr)) + }; + + // Parse update + let update = if parser.consume(&Token::RightParen) { + None + } else { + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightParen, "Expected ')' after for clauses")?; + Some(Box::new(expr)) + }; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } + })?; + + Ok(ForStatement { + init, + test, + update, + body: Box::new(body), + }) + } +} + + +/// Parser for for-in statements +pub struct ForInStatementNode; + +impl ForInStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ForInStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse left side (variable declaration or pattern) + let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationNode::new().parse(parser)?; + ForInOf::VariableDeclaration(decl) + } else { + // Pattern + let pattern = PatternNode::new().parse(parser)?; + ForInOf::Pattern(pattern) + }; + + // Check for 'in' keyword - fail early if not found + if !parser.check(&Token::In) { + return Err(parser.error_at_current("Expected 'in' in for-in statement")); + } + + // Expect 'in' keyword + parser.assert_consume(&Token::In, "Expected 'in' in for-in statement")?; + + // Parse right side (expression) + let right = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after for-in clauses")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } + })?; + + Ok(ForInStatement { + left, + right, + body: Box::new(body), + }) + } +} + +impl UnparserCombinator for ForInStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForInStatement) { + unparser.write_str("for"); + unparser.space(); + unparser.write_char('('); + + match &node.left { + ForInOf::VariableDeclaration(decl) => { + // Special handling for variable declarations in for-in loops + // Write the variable kind (var, let, const) + match decl.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations without semicolon + if !decl.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &decl.declarations[0]); + + // Remaining declarations + for d in &decl.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, d); + } + } + // No semicolon here! + }, + ForInOf::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + } + } + + unparser.write_char(' '); + unparser.write_str("in"); + unparser.write_char(' '); + + ExpressionNode::new().unparse(unparser, &node.right); + + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + +/// Parser for for-of statements +pub struct ForOfStatementNode; + +impl ForOfStatementNode { + pub fn new() -> Self { + Self + } +} + +/// Parser for for-of statements +impl ParserCombinator for ForOfStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + + // Check for 'await' (for await of) + let await_token = parser.consume(&Token::Await); + + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse left side (variable declaration or pattern) + let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationNode::new().parse(parser)?; + ForInOf::VariableDeclaration(decl) + } else { + // Pattern + let pattern = PatternNode::new().parse(parser)?; + ForInOf::Pattern(pattern) + }; + + // Check for 'of' keyword - fail early if not found + if !parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected 'of' in for-of statement")); + } + + // Expect 'of' keyword + parser.assert_consume(&Token::Of, "Expected 'of' in for-of statement")?; + + // Parse right side (expression) + let right = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after for-of clauses")?; + + // Parse the body with special handling for block statements + let body = parser.with_context(LexicalContext::LoopBody, |p| { + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } + })?; + + Ok(ForOfStatement { + left, + right, + body: Box::new(body), + await_token, + }) + } +} + + +impl UnparserCombinator for ForOfStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForOfStatement) { + unparser.write_str("for"); + + if node.await_token { + unparser.write_char(' '); + unparser.write_str("await"); + } + + unparser.space(); + unparser.write_char('('); + + match &node.left { + ForInOf::VariableDeclaration(decl) => { + // Special handling for variable declarations in for-of loops + // Write the variable kind (var, let, const) + match decl.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations without semicolon + if !decl.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &decl.declarations[0]); + + // Remaining declarations + for d in &decl.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, d); + } + } + // No semicolon here! + }, + ForInOf::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + } + } + + unparser.write_char(' '); + unparser.write_str("of"); + unparser.write_char(' '); + + ExpressionNode::new().unparse(unparser, &node.right); + + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + +/// Parser for break statements +pub struct BreakStatementNode; + +impl BreakStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for BreakStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Break, "Expected 'break'")?; + + // Check if we're in a loop or switch + if !parser.is_in_loop_body() && !parser.is_in_switch() { + return Err(parser.error_at_current("'break' statement can only be used within a loop or switch statement")); + } + + // Check for label + let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(BreakStatement { label }) + } +} + +/// Parser for continue statements +pub struct ContinueStatementNode; + +impl ContinueStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ContinueStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Continue, "Expected 'continue'")?; + + // Check if we're in a loop + if !parser.is_in_loop_body() { + return Err(parser.error_at_current("'continue' statement can only be used within a loop")); + } + + // Check for label + let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(ContinueStatement { label }) + } +} + +impl UnparserCombinator for ContinueStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ContinueStatement) { + unparser.write_str("continue"); + + if let Some(label) = &node.label { + unparser.space(); + unparser.write_str(&label.name); + } + + unparser.write_char(';'); + } +} + +/// Parser for return statements +pub struct ReturnStatementNode; + +impl ReturnStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ReturnStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Return, "Expected 'return'")?; + + // Check if we're in a function + if !parser.is_in_function() { + return Err(parser.error_at_current("'return' statement can only be used within a function")); + } + + // Check for return value + let argument = if parser.previous_line_terminator() || + parser.check(&Token::Semicolon) || + parser.check(&Token::RightBrace) || + parser.is_at_end() { + None + } else { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(ReturnStatement { argument }) + } +} + +impl UnparserCombinator for ReturnStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ReturnStatement) { + unparser.write_str("return"); + + if let Some(argument) = &node.argument { + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, argument); + } + + unparser.write_char(';'); + } +} + +/// Parser for with statements +pub struct WithStatementNode; + +impl WithStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for WithStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::With, "Expected 'with'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'with'")?; + + let object = Box::new(ExpressionNode::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after with object")?; + + let body = Box::new(StatementNode::new().parse(parser)?); + + Ok(WithStatement { + object, + body, + }) + } +} + +impl UnparserCombinator for WithStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &WithStatement) { + unparser.write_str("with"); + unparser.write_char(' '); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.object); + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + +/// Parser for throw statements +pub struct ThrowStatementNode; + +impl ThrowStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ThrowStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Throw, "Expected 'throw'")?; + + if parser.previous_line_terminator() { + return Err(parser.error_at_current("Line terminator not allowed after 'throw'")); + } + + let argument = Box::new(ExpressionNode::new().parse(parser)?); + + parser.consume(&Token::Semicolon); + + Ok(ThrowStatement { argument }) + } +} + +impl UnparserCombinator for ThrowStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ThrowStatement) { + unparser.write_str("throw"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &node.argument); + unparser.write_char(';'); + } +} + +/// Parser for try statements +pub struct TryStatementNode; + +impl TryStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for TryStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Try, "Expected 'try'")?; + + let block = BlockStatementNode::new().parse(parser)?; + + // Parse catch clause if present + let handler = if parser.consume(&Token::Catch) { + // Parse parameter if present + let param = if parser.consume(&Token::LeftParen) { + let pattern = PatternNode::new().parse(parser)?; + parser.assert_consume(&Token::RightParen, "Expected ')' after catch parameter")?; + Some(pattern) + } else { + None + }; + + let body = BlockStatementNode::new().parse(parser)?; + + Some(CatchClause { + param, + body, + }) + + } else { + None + }; + + // Parse finally clause if present + let finalizer = if parser.consume(&Token::Finally) { + Some(BlockStatementNode::new().parse(parser)?) + } else { + None + }; + + // Either catch or finally must be present + if handler.is_none() && finalizer.is_none() { + return Err(parser.error_at_current("Missing catch or finally after try")); + } + + Ok(TryStatement { + block, + handler, + finalizer, + }) + } +} + +impl UnparserCombinator for TryStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &TryStatement) { + unparser.write_str("try"); + unparser.space(); + BlockStatementNode::new().unparse(unparser, &node.block); + + // Handle catch clause if present + if let Some(handler) = &node.handler { + unparser.space(); + unparser.write_str("catch"); + + // Handle catch parameter if present + if let Some(param) = &handler.param { + unparser.space(); + unparser.write_char('('); + PatternNode::new().unparse(unparser, param); + unparser.write_char(')'); + } + + unparser.space(); + BlockStatementNode::new().unparse(unparser, &handler.body); + } + + // Handle finally clause if present + if let Some(finalizer) = &node.finalizer { + unparser.space(); + unparser.write_str("finally"); + unparser.space(); + BlockStatementNode::new().unparse(unparser, finalizer); + } + } +} + +/// Parser for labeled statements +pub struct LabeledStatementNode; + +impl LabeledStatementNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for LabeledStatementNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let label = IdentifierNode::new().parse(parser)?; + + parser.assert_consume(&Token::Colon, "Expected ':' after label")?; + + // Add label to context + //parser.add_label(label.name.clone()); + + let body = Box::new(StatementNode::new().parse(parser)?); + + // Remove label from context + //parser.remove_label(&label.name); + + Ok(LabeledStatement { + label, + body, + }) + } +} + +impl UnparserCombinator for LabeledStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &LabeledStatement) { + unparser.write_str(&node.label.name); + unparser.write_char(':'); + unparser.space(); + StatementNode::new().unparse(unparser, &node.body); + } +} + +// Main statement unparser +impl UnparserCombinator for StatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &Statement) { + match node { + Statement::BlockStatement(stmt) => BlockStatementNode::new().unparse(unparser, stmt), + Statement::BreakStatement(stmt) => BreakStatementNode::new().unparse(unparser, stmt), + Statement::ContinueStatement(stmt) => ContinueStatementNode::new().unparse(unparser, stmt), + Statement::DebuggerStatement => { + // TODO its own unparser + unparser.write_str("debugger"); + unparser.write_char(';'); + }, + Statement::DoWhileStatement(stmt) => DoWhileStatementNode::new().unparse(unparser, stmt), + //Statement::EmptyStatement => unparser.write_char(';'), + Statement::EmptyStatement => {}, + Statement::ExpressionStatement(stmt) => ExpressionStatementNode::new().unparse(unparser, stmt), + Statement::ForStatement(stmt) => ForStatementNode::new().unparse(unparser, stmt), + Statement::ForInStatement(stmt) => ForInStatementNode::new().unparse(unparser, stmt), + Statement::ForOfStatement(stmt) => ForOfStatementNode::new().unparse(unparser, stmt), + Statement::IfStatement(stmt) => IfStatementNode::new().unparse(unparser, stmt), + Statement::LabeledStatement(stmt) => LabeledStatementNode::new().unparse(unparser, stmt), + Statement::ReturnStatement(stmt) => ReturnStatementNode::new().unparse(unparser, stmt), + Statement::SwitchStatement(stmt) => SwitchStatementNode::new().unparse(unparser, stmt), + Statement::ThrowStatement(stmt) => ThrowStatementNode::new().unparse(unparser, stmt), + Statement::TryStatement(stmt) => TryStatementNode::new().unparse(unparser, stmt), + Statement::WhileStatement(stmt) => WhileStatementNode::new().unparse(unparser, stmt), + Statement::WithStatement(stmt) => WithStatementNode::new().unparse(unparser, stmt), + Statement::Declaration(decl) => { + match decl { + Declaration::ClassDeclaration(decl) => ClassDeclarationNode::new().unparse(unparser, decl), + Declaration::FunctionDeclaration(decl) => FunctionDeclarationParser::new().unparse(unparser, decl), + Declaration::VariableDeclaration(decl) => VariableDeclarationNode::new().unparse(unparser, decl), + Declaration::ImportDeclaration(decl) => ImportDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportNamedDeclaration(decl) => ExportNamedDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportDefaultDeclaration(decl) => ExportDefaultDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportAllDeclaration(decl) => ExportAllDeclarationParser::new().unparse(unparser, decl), + } + } + } + } +} + +// Expression statement unparser +impl UnparserCombinator for ExpressionStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ExpressionStatement) { + ExpressionNode::new().unparse(unparser, &node.expression); + unparser.write_char(';'); + } +} + +// If statement unparser +impl UnparserCombinator for IfStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &IfStatement) { + unparser.write_str("if"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_char(')'); + + // Handle consequent + match &*node.consequent { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.consequent); + }); + } + } + + // Handle alternate (else branch) + if let Some(alt) = &node.alternate { + unparser.space(); + unparser.write_str("else"); + + match &**alt { + Statement::IfStatement(_) => { + // For else if, keep on same line + unparser.space(); + StatementNode::new().unparse(unparser, alt); + }, + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, alt); + }); + } + } + } + } +} + +// Switch statement unparser +impl UnparserCombinator for SwitchStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &SwitchStatement) { + unparser.write_str("switch"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.discriminant); + unparser.write_char(')'); + unparser.space(); + unparser.write_char('{'); + unparser.newline(); + + for case in &node.cases { + if let Some(test) = &case.test { + unparser.write_str("case"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, test); + unparser.write_char(':'); + } else { + unparser.write_str("default:"); + } + + if !case.consequent.is_empty() { + unparser.newline(); + + unparser.with_indent(|u| { + for stmt in &case.consequent { + StatementNode::new().unparse(u, stmt); + u.newline(); + } + }); + } else { + unparser.newline(); + } + } + + unparser.write_char('}'); + } +} + +// While statement unparser +impl UnparserCombinator for WhileStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &WhileStatement) { + unparser.write_str("while"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_char(')'); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + +// Do-while statement unparser +impl UnparserCombinator for DoWhileStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &DoWhileStatement) { + unparser.write_str("do"); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + unparser.newline(); + } + } + + unparser.space(); + unparser.write_str("while"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_str(");"); + } +} + +// For statement unparser +impl UnparserCombinator for ForStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForStatement) { + unparser.write_str("for"); + unparser.space(); + unparser.write_char('('); + + // Initialization + if let Some(init) = &node.init { + match init { + ForInit::VariableDeclaration(decl) => { + VariableDeclarationNode::new().unparse(unparser, decl); + }, + ForInit::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(';'); + } + } + } else { + unparser.write_char(';'); + } + + // Test condition + unparser.space(); + if let Some(test) = &node.test { + ExpressionNode::new().unparse(unparser, test); + } + unparser.write_char(';'); + + // Update expression + unparser.space(); + if let Some(update) = &node.update { + ExpressionNode::new().unparse(unparser, update); + } + + unparser.write_char(')'); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + +// Break statement unparser +impl UnparserCombinator for BreakStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &BreakStatement) { + unparser.write_str("break"); + + if let Some(label) = &node.label { + unparser.space(); + unparser.write_str(&label.name); + } + + unparser.write_char(';'); + } +} diff --git a/src/grammar/this.rs b/src/grammar/this.rs new file mode 100644 index 0000000..e347eeb --- /dev/null +++ b/src/grammar/this.rs @@ -0,0 +1,25 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; + +pub struct ThisExpressionNode; + +impl ThisExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ThisExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::This, "Expected 'this'")?; + Ok(ThisExpression {}) + } +} + +impl UnparserCombinator for ThisExpressionNode { + fn unparse(&self, unparser: &mut Unparser, _node: &ThisExpression) { + unparser.write_str("this"); + } +} diff --git a/src/grammar/yield_expression.rs b/src/grammar/yield_expression.rs new file mode 100644 index 0000000..48b6a32 --- /dev/null +++ b/src/grammar/yield_expression.rs @@ -0,0 +1,52 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct YieldExpressionNode; + +impl YieldExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for YieldExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + if !parser.allows_yield() { + return Err(parser.error_at_current("'yield' expressions are only allowed within generator functions")); + } + + parser.assert_consume(&Token::Yield, "Expected 'yield'")?; + + let delegate = parser.consume(&Token::Star); + + let argument = if parser.check(&Token::Semicolon) || parser.is_at_end() || + parser.check(&Token::RightBrace) || parser.check(&Token::Comma) || + parser.check(&Token::RightParen) || parser.check(&Token::RightBracket) || + parser.check(&Token::Colon) || parser.previous_line_terminator() { + None + } else { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + }; + + Ok(YieldExpression { + argument, + delegate, + }) + } +} + +impl UnparserCombinator for YieldExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &YieldExpression) { + unparser.write_str("yield"); + if node.delegate { + unparser.write_char('*'); + } + if let Some(argument) = &node.argument { + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, argument); + } + } +} \ No newline at end of file diff --git a/src/lexer/context.rs b/src/lexer/context.rs new file mode 100644 index 0000000..92de66f --- /dev/null +++ b/src/lexer/context.rs @@ -0,0 +1,159 @@ +use std::fmt; +use crate::lexer::Token; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LexicalContext { + Default, + PropertyKey, + MemberAccess, + ImportExport, + ObjectPattern, + ParameterName { strict_mode: bool }, + FunctionBody { allow_yield: bool, allow_await: bool }, + LoopParameters, + LoopBody, + SwitchBody, + ModuleBody { allow_await: bool }, +} + +impl fmt::Display for LexicalContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Default => write!(f, "global"), + Self::ModuleBody { allow_await: _ } => write!(f, "module"), + Self::PropertyKey => write!(f, "property key"), + Self::MemberAccess => write!(f, "member access"), + Self::ImportExport => write!(f, "import export"), + Self::ObjectPattern => write!(f, "object pattern"), + Self::ParameterName { strict_mode: false } => write!(f, "param name"), + Self::ParameterName { strict_mode: true } => write!(f, "strict param name"), + Self::FunctionBody { allow_yield: false, allow_await: false } => write!(f, "function body"), + Self::FunctionBody { allow_yield: true, allow_await: false } => write!(f, "generator function body"), + Self::FunctionBody { allow_yield: false, allow_await: true } => write!(f, "async function body"), + Self::FunctionBody { allow_yield: true, allow_await: true } => write!(f, "async generator function body"), + Self::LoopParameters => write!(f, "loop init"), + Self::LoopBody => write!(f, "loop body"), + Self::SwitchBody => write!(f, "switch body"), + } + } +} + +impl LexicalContext { + + pub fn allows_token_as_identifier(&self, token: &Token) -> bool { + match self { + // In property contexts, all keywords can be identifiers except a few special ones + Self::MemberAccess => { + + //let result = matches!(keyword, "default"); + + //println!("Checking in MemberAccess with {:#?}", token); + + if token == &Token::Default { + true + } else if token == &Token::From { + true + } else if token == &Token::For { + true + } else if token == &Token::Get { + true + } else if token == &Token::Set { + true + } else if token == &Token::As { + true + } else { + false + } + + //result + //false + }, + Self::PropertyKey => { + + if token == &Token::Default { + true + } else if token == &Token::From { + true + } else if token == &Token::For { + true + } else if token == &Token::Get { + true + } else if token == &Token::Set { + true + } else if token == &Token::As { + true + } else { + false + } + + //println!("Currently in PropertyKey with {:#?}", keyword); + //false + }, + + // In import/export contexts, specific keywords are allowed as identifiers + Self::ImportExport => { + //println!("Currently in ImportExport with {:#?}", keyword); + false + }, + + // In object patterns, allow destructuring with keywords except special ones + Self::ObjectPattern => { + //println!("Currently in ObjectPattern with {:#?}", keyword); + false + }, + + // In parameter names, most keywords can be identifiers in non-strict mode + Self::ParameterName { strict_mode } => { + //println!("Currently in ParameterName strict={:#?} with {:#?}", strict_mode, keyword); + if *strict_mode { + false + } else { + false + } + }, + Self::LoopParameters => { + //println!("Currently in LoopParameters with {:#?}", token); + if token == &Token::Set { + true + } else { + false + } + }, + // In function bodies, yield and await have special handling + Self::FunctionBody { allow_yield, allow_await } => { + //println!("Currently in FunctionBody with {:#?}", keyword); + + if (*allow_yield && token == &Token::Yield) || (*allow_await && token == &Token::Await) { + false + } else if token == &Token::As { + true + } else { + // Default to not allowing keywords as identifiers in function bodies + false + } + }, + + // In loop bodies, break and continue are special + Self::LoopBody => { + //println!("Currently in LoopBody with {:#?}", keyword); + false + }, + + // In switch bodies, case and default are special + Self::SwitchBody => { + //println!("Currently in SwitchBody with {:#?}", keyword); + false + }, + + // In default context, keywords are not identifiers + Self::Default => { + //println!("Currently in Default with {:#?}", keyword); + false + }, + + Self::ModuleBody { allow_await: _ } => { + false + }, + } + } +} diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 5f8d827..10c8f6d 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,45 +1,212 @@ use std::collections::HashSet; -use crate::lexer::{Token, TokenType, TemplatePart, LexerError}; +use crate::lexer::{Token, TemplatePart, LexerError}; pub struct Lexer<'a> { source: &'a str, - chars: Vec, // TODO chars: Peekable>, - tokens: Vec, + bytes: &'a [u8], + source_len: usize, + tokens: Vec<(Token, [usize; 2])>, start: usize, current: usize, line: usize, column: usize, + current_char: char, + previous_char: char, } -macro_rules! add_token { - ($self:expr, $token_type:expr) => { - $self.tokens.push(Token::new($token_type, $self.line, $self.column - 1, 1)) - }; - ($self:expr, $token_type:expr, $length:expr) => { - $self.tokens.push(Token::new($token_type, $self.line, $self.column - $length, $length)) - }; +macro_rules! emit_token { + ($lexer:expr, $token:expr) => { + $lexer.tokens.push(($token, [$lexer.line, $lexer.column])) + } } impl<'a> Lexer<'a> { + + #[inline] pub fn new(source: &'a str) -> Self { Lexer { - chars: source.chars().collect(), source, + bytes: source.as_bytes(), + source_len: source.len(), tokens: Vec::with_capacity(source.len() / 4), start: 0, current: 0, line: 1, column: 0, + current_char: '\0', + previous_char: '\0', } } - pub fn scan_tokens(&mut self) -> Result, LexerError> { + #[inline(always)] + fn identifier(&mut self) { + // Track whether the identifier is all ASCII + let mut is_all_ascii = true; + + // Fast path for identifiers (most common case) + while !self.is_at_end() { + if self.current < self.source_len { + let b = self.bytes[self.current]; + + // Fast check for ASCII alphanumeric characters + if (b >= b'a' && b <= b'z') || + (b >= b'A' && b <= b'Z') || + (b >= b'0' && b <= b'9') || + b == b'_' || + b == b'$' { + // Advance without the overhead of UTF-8 decoding + self.previous_char = self.current_char; + self.current_char = b as char; + self.current += 1; + self.column += 1; + continue; + } else if b >= 128 { + // Found a non-ASCII byte + is_all_ascii = false; + + // Use advance() which properly handles UTF-8 characters + self.advance(); + continue; + } + } + + // If we reach here, either we're at the end or the next character + // is not an identifier character + if !self.is_at_end() { + let c = self.peek(); + if self.is_alphanumeric(c) { + self.advance(); + if !c.is_ascii() { + is_all_ascii = false; + } + continue; + } + } + + // Not a valid identifier character or end of source + break; + } + + // Calculate the length of the identifier + let length = self.current - self.start; + + // Only check for keywords if the identifier is within the length range of keywords + // and is all ASCII (since all keywords are ASCII) + let token_type = if is_all_ascii && length >= 2 && length <= 10 { + // For ASCII identifiers, we can do direct byte comparisons + let bytes = &self.bytes[self.start..self.current]; + + // First check by length for faster matching + match bytes.len() { + 2 => match bytes { + b"do" => Token::Do, + b"if" => Token::If, + b"in" => Token::In, + b"of" => Token::Of, + b"as" => Token::As, + _ => self.create_identifier_token(), + }, + 3 => match bytes { + b"for" => Token::For, + b"let" => Token::Let, + b"new" => Token::New, + b"try" => Token::Try, + b"var" => Token::Var, + b"get" => Token::Get, + b"set" => Token::Set, + _ => self.create_identifier_token(), + }, + 4 => match bytes { + b"case" => Token::Case, + b"else" => Token::Else, + b"enum" => Token::Enum, + b"from" => Token::From, + b"null" => Token::Null, + b"this" => Token::This, + b"true" => Token::True, + b"void" => Token::Void, + b"with" => Token::With, + b"eval" => Token::Eval, + _ => self.create_identifier_token(), + }, + 5 => match bytes { + b"async" => Token::Async, + b"await" => Token::Await, + b"break" => Token::Break, + b"catch" => Token::Catch, + b"class" => Token::Class, + b"const" => Token::Const, + b"false" => Token::False, + b"super" => Token::Super, + b"throw" => Token::Throw, + b"while" => Token::While, + b"yield" => Token::Yield, + _ => self.create_identifier_token(), + }, + 6 => match bytes { + b"delete" => Token::Delete, + b"export" => Token::Export, + b"import" => Token::Import, + b"public" => Token::Public, + b"return" => Token::Return, + b"static" => Token::Static, + b"switch" => Token::Switch, + b"target" => Token::Target, + b"typeof" => Token::Typeof, + _ => self.create_identifier_token(), + }, + 7 => match bytes { + b"default" => Token::Default, + b"extends" => Token::Extends, + b"finally" => Token::Finally, + b"package" => Token::Package, + b"private" => Token::Private, + _ => self.create_identifier_token(), + }, + 8 => match bytes { + b"continue" => Token::Continue, + b"debugger" => Token::Debugger, + b"function" => Token::Function, + _ => self.create_identifier_token(), + }, + 9 => match bytes { + b"arguments" => Token::Arguments, + b"interface" => Token::Interface, + b"protected" => Token::Protected, + b"undefined" => Token::Undefined, + _ => self.create_identifier_token(), + }, + 10 => match bytes { + b"instanceof" => Token::InstanceOf, + b"implements" => Token::Implements, + b"constructor" => Token::Constructor, + _ => self.create_identifier_token(), + }, + _ => self.create_identifier_token(), + } + } else { + // For non-ASCII identifiers or identifiers with lengths outside keyword range + self.create_identifier_token() + }; + + // Add the token + emit_token!(self, token_type); + } + + // Helper method to create an identifier token + #[inline] + fn create_identifier_token(&self) -> Token { + let text = &self.source[self.start..self.current]; + Token::Identifier(text.to_string()) + } + + pub fn scan_tokens(&mut self) -> Result, LexerError> { while !self.is_at_end() { self.start = self.current; self.scan_token()?; } - let eof_column = self.column; - add_token!(self, TokenType::EOF, 0); + let _eof_column = self.column; + emit_token!(self, Token::EOS); Ok(std::mem::take(&mut self.tokens)) } @@ -47,168 +214,168 @@ impl<'a> Lexer<'a> { let c = self.advance(); match c { - '(' => add_token!(self, TokenType::LeftParen), - ')' => add_token!(self, TokenType::RightParen), - '{' => add_token!(self, TokenType::LeftBrace), - '}' => add_token!(self, TokenType::RightBrace), - '[' => add_token!(self, TokenType::LeftBracket), - ']' => add_token!(self, TokenType::RightBracket), - ',' => add_token!(self, TokenType::Comma), - ';' => add_token!(self, TokenType::Semicolon), - ':' => add_token!(self, TokenType::Colon), - '#' => add_token!(self, TokenType::Hash), + '(' => emit_token!(self, Token::LeftParen), + ')' => emit_token!(self, Token::RightParen), + '{' => emit_token!(self, Token::LeftBrace), + '}' => emit_token!(self, Token::RightBrace), + '[' => emit_token!(self, Token::LeftBracket), + ']' => emit_token!(self, Token::RightBracket), + ',' => emit_token!(self, Token::Comma), + ';' => emit_token!(self, Token::Semicolon), + ':' => emit_token!(self, Token::Colon), + '#' => emit_token!(self, Token::Hash), '.' => { if self.match_char('.') && self.match_char('.') { - add_token!(self, TokenType::Ellipsis, 3); + emit_token!(self, Token::Ellipsis); } else { - add_token!(self, TokenType::Dot); + emit_token!(self, Token::Dot); } }, '+' => { if self.match_char('+') { - add_token!(self, TokenType::PlusPlus, 2); + emit_token!(self, Token::PlusPlus); } else if self.match_char('=') { - add_token!(self, TokenType::PlusEqual, 2); + emit_token!(self, Token::PlusEqual); } else { - add_token!(self, TokenType::Plus); + emit_token!(self, Token::Plus); } }, '-' => { if self.match_char('-') { - add_token!(self, TokenType::MinusMinus, 2); + emit_token!(self, Token::MinusMinus); } else if self.match_char('=') { - add_token!(self, TokenType::MinusEqual, 2); + emit_token!(self, Token::MinusEqual); } else { - add_token!(self, TokenType::Minus); + emit_token!(self, Token::Minus); } }, '%' => { if self.match_char('=') { - add_token!(self, TokenType::PercentEqual, 2); + emit_token!(self, Token::PercentEqual); } else { - add_token!(self, TokenType::Percent); + emit_token!(self, Token::Percent); } }, '^' => { if self.match_char('=') { - add_token!(self, TokenType::CaretEqual, 2); + emit_token!(self, Token::CaretEqual); } else { - add_token!(self, TokenType::Caret); + emit_token!(self, Token::Caret); } }, '*' => { if self.match_char('*') { if self.match_char('=') { - add_token!(self, TokenType::StarStarEqual, 3); + emit_token!(self, Token::StarStarEqual); } else { - add_token!(self, TokenType::StarStar, 2); + emit_token!(self, Token::StarStar); } } else if self.match_char('=') { - add_token!(self, TokenType::StarEqual, 2); + emit_token!(self, Token::StarEqual); } else { - add_token!(self, TokenType::Star); + emit_token!(self, Token::Star); } }, '/' => self.handle_slash()?, '!' => { if self.match_char('=') { if self.match_char('=') { - add_token!(self, TokenType::BangEqualEqual, 3); + emit_token!(self, Token::BangEqualEqual); } else { - add_token!(self, TokenType::BangEqual, 2); + emit_token!(self, Token::BangEqual); } } else { - add_token!(self, TokenType::Bang); + emit_token!(self, Token::Bang); } }, '=' => { if self.match_char('>') { - add_token!(self, TokenType::Arrow, 2); + emit_token!(self, Token::Arrow); } else if self.match_char('=') { if self.match_char('=') { - add_token!(self, TokenType::EqualEqualEqual, 3); + emit_token!(self, Token::EqualEqualEqual); } else { - add_token!(self, TokenType::EqualEqual, 2); + emit_token!(self, Token::EqualEqual); } } else { - add_token!(self, TokenType::Equal); + emit_token!(self, Token::Equal); } }, '<' => { if self.match_char('=') { - add_token!(self, TokenType::LessEqual, 2); + emit_token!(self, Token::LessEqual); } else if self.match_char('<') { if self.match_char('=') { - add_token!(self, TokenType::LessLessEqual, 3); + emit_token!(self, Token::LessLessEqual); } else { - add_token!(self, TokenType::LessLess, 2); + emit_token!(self, Token::LessLess); } } else { - add_token!(self, TokenType::Less); + emit_token!(self, Token::Less); } }, '>' => { if self.match_char('=') { - add_token!(self, TokenType::GreaterEqual, 2); + emit_token!(self, Token::GreaterEqual); } else if self.match_char('>') { if self.match_char('>') { if self.match_char('=') { - add_token!(self, TokenType::GreaterGreaterGreaterEqual, 4); + emit_token!(self, Token::GreaterGreaterGreaterEqual); } else { - add_token!(self, TokenType::GreaterGreaterGreater, 3); + emit_token!(self, Token::GreaterGreaterGreater); } } else if self.match_char('=') { - add_token!(self, TokenType::GreaterGreaterEqual, 3); + emit_token!(self, Token::GreaterGreaterEqual); } else { - add_token!(self, TokenType::GreaterGreater, 2); + emit_token!(self, Token::GreaterGreater); } } else { - add_token!(self, TokenType::Greater); + emit_token!(self, Token::Greater); } }, '&' => { if self.match_char('&') { if self.match_char('=') { - add_token!(self, TokenType::AmpersandAmpersandEqual, 3); + emit_token!(self, Token::AmpersandAmpersandEqual); } else { - add_token!(self, TokenType::AmpersandAmpersand, 2); + emit_token!(self, Token::AmpersandAmpersand); } } else if self.match_char('=') { - add_token!(self, TokenType::AmpersandEqual, 2); + emit_token!(self, Token::AmpersandEqual); } else { - add_token!(self, TokenType::Ampersand); + emit_token!(self, Token::Ampersand); } }, '|' => { if self.match_char('|') { if self.match_char('=') { - add_token!(self, TokenType::PipePipeEqual, 3); + emit_token!(self, Token::PipePipeEqual); } else { - add_token!(self, TokenType::PipePipe, 2); + emit_token!(self, Token::PipePipe); } } else if self.match_char('=') { - add_token!(self, TokenType::PipeEqual, 2); + emit_token!(self, Token::PipeEqual); } else { - add_token!(self, TokenType::Pipe); + emit_token!(self, Token::Pipe); } }, - '~' => add_token!(self, TokenType::Tilde), + '~' => emit_token!(self, Token::Tilde), '?' => { if self.match_char('?') { if self.match_char('=') { - add_token!(self, TokenType::QuestionQuestionEqual, 3); + emit_token!(self, Token::QuestionQuestionEqual); } else { - add_token!(self, TokenType::QuestionQuestion, 2); + emit_token!(self, Token::QuestionQuestion); } } else if self.match_char('.') { - add_token!(self, TokenType::QuestionDot, 2); + emit_token!(self, Token::QuestionDot); } else { - add_token!(self, TokenType::Question); + emit_token!(self, Token::Question); } }, @@ -240,6 +407,7 @@ impl<'a> Lexer<'a> { Ok(()) } + #[inline(always)] fn line_comment(&mut self) { while !self.is_at_end() && self.peek() != '\n' { self.advance(); @@ -274,52 +442,51 @@ impl<'a> Lexer<'a> { Ok(()) } - /// Handles a forward slash character, which could be division, regexp, or comment + #[inline] fn handle_slash(&mut self) -> Result<(), LexerError> { if self.match_char('/') { self.line_comment(); } else if self.match_char('*') { self.block_comment()?; } else if self.match_char('=') { - add_token!(self, TokenType::SlashEqual, 2); + emit_token!(self, Token::SlashEqual); } else if self.is_regexp_start() { self.regexp()?; } else { - add_token!(self, TokenType::Slash); + emit_token!(self, Token::Slash); } Ok(()) } - /// Determines if a forward slash should be interpreted as the start of a regular expression - /// rather than a division operator based on JavaScript syntax rules. + #[inline] fn is_regexp_start(&self) -> bool { if self.tokens.is_empty() { return true; } // Get the last token type - let last_token = &self.tokens.last().unwrap().token_type; + let (last_token, _) = &self.tokens.last().unwrap(); // A slash starts a regex if it follows a token that cannot be the end of an expression match last_token { // After these tokens, a slash is division (these can end an expression) - TokenType::Identifier(_) | - TokenType::NumberLiteral(_) | - TokenType::StringLiteral(_) | - TokenType::RegExpLiteral(_, _) | - TokenType::TemplateLiteral(_) | - TokenType::True | - TokenType::False | - TokenType::Null | - TokenType::This | - TokenType::RightParen | - TokenType::RightBracket | - TokenType::PlusPlus | - TokenType::MinusMinus => false, + Token::Identifier(_) | + Token::NumberLiteral(_) | + Token::StringLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::TemplateLiteral(_) | + Token::True | + Token::False | + Token::Null | + Token::This | + Token::RightParen | + Token::RightBracket | + Token::PlusPlus | + Token::MinusMinus => false, // Special case: right brace - could be block or object literal - TokenType::RightBrace => { + Token::RightBrace => { // TODO implement properly // This is a complex case that depends on context @@ -334,82 +501,84 @@ impl<'a> Lexer<'a> { /// Parses a regular expression literal fn regexp(&mut self) -> Result<(), LexerError> { - let start_column = self.column - 1; - let mut pattern = String::with_capacity(16); + let start_column = self.column - 1; + let mut pattern = String::with_capacity(16); + let mut in_character_class = false; + + // Parse the pattern + while !self.is_at_end() && (in_character_class || self.peek() != '/') { + let c = self.peek(); - // Parse the pattern - while !self.is_at_end() && self.peek() != '/' { - if self.peek() == '\\' { - pattern.push(self.advance()); // Add the escape character - - if self.is_at_end() { - return Err(LexerError::new( - "Unterminated regular expression: escape sequence not completed", - self.line, - start_column - )); - } - - // Add the escaped character (whatever it is) - pattern.push(self.advance()); - } else if self.peek() == '\n' { + if c == '[' && !in_character_class { + // Start of character class + in_character_class = true; + pattern.push(self.advance()); + } else if c == ']' && in_character_class { + // End of character class + in_character_class = false; + pattern.push(self.advance()); + } else if c == '\\' { + // Handle escape sequences + pattern.push(self.advance()); // Add the backslash + + if self.is_at_end() { return Err(LexerError::new( - "Unterminated regular expression: newline in pattern", + "Unterminated regular expression: escape sequence not completed", self.line, start_column )); - } else { - pattern.push(self.advance()); } - } - - if self.is_at_end() { + + // Add the escaped character (whatever it is) + pattern.push(self.advance()); + } else if c == '\n' { return Err(LexerError::new( - "Unterminated regular expression", + "Unterminated regular expression: newline in pattern", self.line, start_column )); + } else { + pattern.push(self.advance()); } - - // Consume the closing slash - self.advance(); - - // Parse flags - let mut flags = String::with_capacity(4); - while !self.is_at_end() && self.is_regexp_flag(self.peek()) { - flags.push(self.advance()); - } - - // Validate flags (no duplicates, only valid flags) - let mut seen_flags = HashSet::with_capacity(flags.len()); - for flag in flags.chars() { - if !seen_flags.insert(flag) { - return Err(LexerError::new( - &format!("Duplicate flag '{}' in regular expression", flag), - self.line, - self.column - 1 - )); - } - - if !matches!(flag, 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd') { - return Err(LexerError::new( - &format!("Invalid regular expression flag '{}'", flag), - self.line, - self.column - 1 - )); - } + } + + if self.is_at_end() { + return Err(LexerError::new( + "Unterminated regular expression", + self.line, + start_column + )); + } + + // Consume the closing slash + self.advance(); + + // Parse flags + let mut flags = String::new(); + while !self.is_at_end() && self.is_regexp_flag(self.peek()) { + flags.push(self.advance()); + } + + // Validate flags (no duplicates) + let mut seen_flags = HashSet::new(); + for flag in flags.chars() { + if !seen_flags.insert(flag) { + return Err(LexerError::new( + &format!("Duplicate flag '{}' in regular expression", flag), + self.line, + self.column - 1 + )); } - - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::RegExpLiteral(pattern, flags), length); - - Ok(()) } + + // Emit the token + emit_token!(self, Token::RegExpLiteral(pattern, flags)); + + Ok(()) +} - - #[inline] + #[inline(always)] fn is_regexp_flag(&self, c: char) -> bool { matches!(c, 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd') } @@ -544,9 +713,7 @@ impl<'a> Lexer<'a> { // Consume the closing backtick self.advance(); - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::TemplateLiteral(parts), length); + emit_token!(self, Token::TemplateLiteral(parts)); Ok(()) } @@ -627,16 +794,14 @@ impl<'a> Lexer<'a> { // Consume the closing quote self.advance(); - - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::StringLiteral(value), length); + emit_token!(self, Token::StringLiteral(value)); Ok(()) } - fn parse_unicode_escape(&mut self, start_line: usize, start_column: usize) -> Result { - if self.peek() == '{' { + fn parse_unicode_escape(&mut self, start_line: usize, start_column: usize) -> Result { + if self.peek() == '{' { + // Unicode code point escape \u{XXXXXX} - this part is already correct // Unicode code point escape \u{XXXXXX} self.advance(); // Consume '{' @@ -687,41 +852,104 @@ impl<'a> Lexer<'a> { start_column )) } - } else { - // Fixed 4-digit Unicode escape \uXXXX - let mut hex_string = String::with_capacity(4); - - for _ in 0..4 { - if self.is_at_end() || !self.is_hex_digit(self.peek()) { - return Err(LexerError::new( - "Invalid Unicode escape sequence: expected 4 hex digits", - start_line, - start_column - )); - } - hex_string.push(self.advance()); - } - - match u16::from_str_radix(&hex_string, 16) { - Ok(code_unit) => { - match std::char::from_u32(code_unit as u32) { - Some(c) => Ok(c), - None => Err(LexerError::new( - &format!("Invalid Unicode code unit: {}", hex_string), - start_line, - start_column - )) - } - }, - Err(_) => Err(LexerError::new( - &format!("Invalid Unicode escape sequence: \\u{}", hex_string), - start_line, - start_column - )) - } - } + } else { + // Fixed 4-digit Unicode escape \uXXXX + let mut hex_string = String::with_capacity(4); + + for _ in 0..4 { + if self.is_at_end() || !self.is_hex_digit(self.peek()) { + return Err(LexerError::new( + "Invalid Unicode escape sequence: expected 4 hex digits", + start_line, + start_column + )); + } + hex_string.push(self.advance()); + } + + match u16::from_str_radix(&hex_string, 16) { + Ok(code_unit) => { + // Check if this is a high surrogate + if (0xD800..=0xDBFF).contains(&code_unit) { + // This is a high surrogate, we need to look for a low surrogate + if self.peek() == '\\' && self.peek_next() == 'u' { + // Save current position in case we need to revert + let save_current = self.current; + let save_line = self.line; + let save_column = self.column; + + // Consume the \u + self.advance(); // \ + self.advance(); // u + + // Parse the next 4 hex digits + let mut low_hex = String::with_capacity(4); + let mut valid_low_surrogate = true; + + for _ in 0..4 { + if self.is_at_end() || !self.is_hex_digit(self.peek()) { + valid_low_surrogate = false; + break; + } + low_hex.push(self.advance()); + } + + if valid_low_surrogate { + if let Ok(low_code_unit) = u16::from_str_radix(&low_hex, 16) { + if (0xDC00..=0xDFFF).contains(&low_code_unit) { + // Valid surrogate pair, calculate the Unicode code point + let code_point = 0x10000 + ((code_unit - 0xD800) as u32 * 0x400) + (low_code_unit - 0xDC00) as u32; + return match std::char::from_u32(code_point) { + Some(c) => Ok(c), + None => Err(LexerError::new( + &format!("Invalid Unicode surrogate pair: \\u{}\\u{}", hex_string, low_hex), + start_line, + start_column + )) + }; + } + } + } + + // If we get here, the sequence after the high surrogate wasn't a valid low surrogate + // Revert to the position after the high surrogate + self.current = save_current; + self.line = save_line; + self.column = save_column; + } + + // Lone high surrogate without a following low surrogate + // In strict mode, this should be an error, but JavaScript allows it + // and replaces it with a replacement character + return Ok('\u{FFFD}'); // Unicode replacement character + } + + // Check if this is a low surrogate without a preceding high surrogate + if (0xDC00..=0xDFFF).contains(&code_unit) { + // Lone low surrogate, also replace with replacement character + return Ok('\u{FFFD}'); + } + + // Regular BMP character + match std::char::from_u32(code_unit as u32) { + Some(c) => Ok(c), + None => Err(LexerError::new( + &format!("Invalid Unicode code unit: {}", hex_string), + start_line, + start_column + )) + } + }, + Err(_) => Err(LexerError::new( + &format!("Invalid Unicode escape sequence: \\u{}", hex_string), + start_line, + start_column + )) + } + } } + #[inline] fn parse_hex_escape(&mut self, start_line: usize, start_column: usize) -> Result { // Hexadecimal escape sequence \xXX let mut hex_string = String::with_capacity(2); @@ -814,10 +1042,8 @@ impl<'a> Lexer<'a> { start_column )); } - - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(value_str), length); + emit_token!(self, Token::BigIntLiteral(value_str)); return Ok(()); } @@ -830,18 +1056,14 @@ impl<'a> Lexer<'a> { !value_str.contains('E') && value_str.len() < 10 { // For small integers, parse directly to avoid floating point conversion if let Ok(int_val) = value_str.parse::() { - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::NumberLiteral(int_val as f64), length); - + emit_token!(self, Token::NumberLiteral(int_val as f64)); return Ok(()); } } match value_str.parse::() { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value), length); + emit_token!(self, Token::NumberLiteral(value)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -852,6 +1074,7 @@ impl<'a> Lexer<'a> { } } + #[inline] fn binary_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -879,8 +1102,7 @@ impl<'a> Lexer<'a> { // Parse as binary match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0b{}", value_str)), length); + emit_token!(self, Token::BigIntLiteral(format!("0b{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -896,8 +1118,7 @@ impl<'a> Lexer<'a> { // Parse as binary and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -909,6 +1130,7 @@ impl<'a> Lexer<'a> { } } + #[inline] fn octal_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -936,8 +1158,7 @@ impl<'a> Lexer<'a> { // Parse as octal match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0o{}", value_str)), length); + emit_token!(self, Token::BigIntLiteral(format!("0o{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -953,8 +1174,7 @@ impl<'a> Lexer<'a> { // Parse as octal and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -966,7 +1186,7 @@ impl<'a> Lexer<'a> { } } - + #[inline] fn hex_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -994,8 +1214,7 @@ impl<'a> Lexer<'a> { // Parse as hex match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0x{}", value_str)), length); + emit_token!(self, Token::BigIntLiteral(format!("0x{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1011,8 +1230,7 @@ impl<'a> Lexer<'a> { // Parse as hex and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1024,13 +1242,14 @@ impl<'a> Lexer<'a> { } } - #[inline] + #[inline(always)] fn consume_digits(&mut self) { while self.is_digit(self.peek()) || self.peek() == '_' { self.advance(); } } - + + #[inline] fn extract_number_value(&self, start: usize, end: usize) -> String { // Remove numeric separators (_) let mut value_str = String::with_capacity(end - start); @@ -1042,158 +1261,113 @@ impl<'a> Lexer<'a> { value_str } - fn identifier(&mut self) { - let start_column = self.column - 1; - - while self.is_alphanumeric(self.peek()) { - self.advance(); - } - - // Get the identifier text - let text = &self.source[self.start..self.current]; - - // Check if it's a keyword using a match statement for better performance - let token_type = match text { - "break" => TokenType::Break, - "case" => TokenType::Case, - "catch" => TokenType::Catch, - "class" => TokenType::Class, - "const" => TokenType::Const, - "continue" => TokenType::Continue, - "debugger" => TokenType::Debugger, - "default" => TokenType::Default, - "delete" => TokenType::Delete, - "do" => TokenType::Do, - "else" => TokenType::Else, - "enum" => TokenType::Enum, - "export" => TokenType::Export, - "extends" => TokenType::Extends, - "false" => TokenType::False, - "finally" => TokenType::Finally, - "for" => TokenType::For, - "function" => TokenType::Function, - "if" => TokenType::If, - "import" => TokenType::Import, - "in" => TokenType::In, - "instanceof" => TokenType::InstanceOf, - "new" => TokenType::New, - "null" => TokenType::Null, - "return" => TokenType::Return, - "super" => TokenType::Super, - "undefined" => TokenType::Undefined, - "constructor" => TokenType::Constructor, - "switch" => TokenType::Switch, - "this" => TokenType::This, - "throw" => TokenType::Throw, - "true" => TokenType::True, - "try" => TokenType::Try, - "typeof" => TokenType::Typeof, - "var" => TokenType::Var, - "void" => TokenType::Void, - "while" => TokenType::While, - "with" => TokenType::With, - "yield" => TokenType::Yield, - "async" => TokenType::Async, - "await" => TokenType::Await, - "let" => TokenType::Let, - "static" => TokenType::Static, - "get" => TokenType::Get, - "set" => TokenType::Set, - "of" => TokenType::Of, - "as" => TokenType::As, - "from" => TokenType::From, - "target" => TokenType::Target, - "implements" => TokenType::Implements, - "interface" => TokenType::Interface, - "package" => TokenType::Package, - "private" => TokenType::Private, - "protected" => TokenType::Protected, - "public" => TokenType::Public, - "arguments" => TokenType::Arguments, - "eval" => TokenType::Eval, - _ => TokenType::Identifier(text.to_string()), - }; - - let length = (self.current - self.start) as usize; - - add_token!(self, token_type, length); - } - - #[inline] + #[inline(always)] fn is_digit(&self, c: char) -> bool { - c.is_ascii_digit() + c >= '0' && c <= '9' // Direct comparison is faster than is_ascii_digit() } - #[inline] + #[inline(always)] fn is_alpha(&self, c: char) -> bool { - c.is_ascii_alphabetic() || c == '_' || c == '$' + // Include $ character which is valid in JavaScript identifiers + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_' || + c == '$' || + // For non-ASCII characters, use a simple heuristic + // This covers most Unicode letters that would be valid in JS identifiers + (c > '\x7F' && !c.is_whitespace() && !c.is_control()) || + // Zero-width characters allowed in JS identifiers + c == '\u{200C}' || c == '\u{200D}' } - #[inline] + #[inline(always)] fn is_alphanumeric(&self, c: char) -> bool { self.is_alpha(c) || self.is_digit(c) } - #[inline] + #[inline(always)] fn is_hex_digit(&self, c: char) -> bool { - c.is_ascii_hexdigit() + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } - #[inline] + #[inline(always)] + fn is_at_end(&self) -> bool { + self.current >= self.source_len + } + + #[inline(always)] fn is_octal_digit(&self, c: char) -> bool { c >= '0' && c <= '7' } - #[inline] - fn is_at_end(&self) -> bool { - self.current >= self.chars.len() - } - - #[inline] + #[inline(always)] fn advance(&mut self) -> char { - let c = self.chars[self.current]; - self.current += 1; + if self.is_at_end() { + return '\0'; + } + + // Fast path for ASCII (most common case in JS) + if self.current < self.source_len && self.bytes[self.current] < 128 { + let c = self.bytes[self.current] as char; + self.previous_char = self.current_char; + self.current_char = c; + self.current += 1; + self.column += 1; + return c; + } + + // Fallback for non-ASCII (UTF-8) + let c = self.source[self.current..].chars().next().unwrap(); + self.previous_char = self.current_char; + self.current_char = c; + self.current += c.len_utf8(); self.column += 1; c } - #[inline] + #[inline(always)] fn peek(&self) -> char { if self.is_at_end() { - '\0' - } else { - self.chars[self.current] + return '\0'; + } + if self.bytes[self.current] < 128 { + return self.bytes[self.current] as char; } + self.source[self.current..].chars().next().unwrap() } - - #[inline] + + #[inline(always)] fn peek_next(&self) -> char { - if self.current + 1 >= self.chars.len() { - '\0' - } else { - self.chars[self.current + 1] + if self.current + 1 >= self.source_len { + return '\0'; + } + // Fast path for ASCII + if self.bytes[self.current] < 128 && self.bytes[self.current + 1] < 128 { + return self.bytes[self.current + 1] as char; + } + // If current is ASCII but next might not be + if self.bytes[self.current] < 128 { + let next_pos = self.current + 1; + return self.source[next_pos..].chars().next().unwrap_or('\0'); } + // Both current and next are non-ASCII + let mut iter = self.source[self.current..].chars(); + iter.next(); + iter.next().unwrap_or('\0') } - - #[inline] + + #[inline(always)] fn peek_previous(&self) -> char { - if self.current == 0 { - '\0' - } else { - self.chars[self.current - 1] - } + self.previous_char } - - #[inline] + + #[inline(always)] fn match_char(&mut self, expected: char) -> bool { if self.is_at_end() || self.peek() != expected { false } else { - self.current += 1; - self.column += 1; + self.advance(); true } } - } - diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 3e04d58..e1bebad 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,7 +1,9 @@ mod error; mod token; mod lexer; +mod context; pub use error::LexerError; -pub use token::{Token, TokenType, TemplatePart}; -pub use lexer::Lexer; \ No newline at end of file +pub use token::{Token, TemplatePart}; +pub use lexer::Lexer; +pub use context::LexicalContext; diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 5d9d88d..f0986db 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,5 +1,5 @@ #[derive(Debug, Clone, PartialEq)] -pub enum TokenType { +pub enum Token { // Characters LeftParen, RightParen, @@ -66,7 +66,10 @@ pub enum TokenType { NumberLiteral(f64), BigIntLiteral(String), RegExpLiteral(String, String), - True, False, Null, Undefined, + True, + False, + Null, + Undefined, // Keywords Var, Let, @@ -122,7 +125,242 @@ pub enum TokenType { From, As, // Sentinel - EOF, + EOS, +} + +impl Token { + + pub fn keyword_text(&self) -> Option<&str> { + match self { + // Characters/operators don't have keyword text + Token::LeftParen | Token::RightParen | Token::LeftBrace | + Token::RightBrace | Token::LeftBracket | Token::RightBracket | + Token::Comma | Token::Dot | Token::Semicolon | Token::Colon | + Token::Question | Token::Arrow | Token::Hash | Token::Plus | + Token::PlusPlus | Token::PlusEqual | Token::Minus | + Token::MinusMinus | Token::MinusEqual | Token::Star | + Token::StarStar | Token::StarEqual | Token::StarStarEqual | + Token::Slash | Token::SlashEqual | Token::Percent | + Token::PercentEqual | Token::Equal | Token::EqualEqual | + Token::EqualEqualEqual | Token::Bang | Token::BangEqual | + Token::BangEqualEqual | Token::Greater | Token::GreaterEqual | + Token::GreaterGreater | Token::GreaterGreaterEqual | + Token::GreaterGreaterGreater | Token::GreaterGreaterGreaterEqual | + Token::Less | Token::LessEqual | Token::LessLess | + Token::LessLessEqual | Token::Ampersand | Token::AmpersandEqual | + Token::AmpersandAmpersand | Token::AmpersandAmpersandEqual | + Token::Pipe | Token::PipeEqual | Token::PipePipe | + Token::PipePipeEqual | Token::Caret | Token::CaretEqual | + Token::Tilde | Token::Ellipsis | Token::QuestionQuestion | + Token::QuestionQuestionEqual | Token::QuestionDot => None, + + // Literals don't have keyword text + Token::Identifier(_) | Token::StringLiteral(_) | + Token::TemplateLiteral(_) | Token::NumberLiteral(_) | + Token::BigIntLiteral(_) | Token::RegExpLiteral(_, _) => None, + + // Boolean literals and null + Token::True => Some("true"), + Token::False => Some("false"), + Token::Null => Some("null"), + Token::Undefined => Some("undefined"), + + // Keywords + Token::Var => Some("var"), + Token::Let => Some("let"), + Token::With => Some("with"), + Token::Const => Some("const"), + Token::Function => Some("function"), + Token::Return => Some("return"), + Token::If => Some("if"), + Token::Else => Some("else"), + Token::While => Some("while"), + Token::For => Some("for"), + Token::Break => Some("break"), + Token::Continue => Some("continue"), + Token::This => Some("this"), + Token::Super => Some("super"), + Token::New => Some("new"), + Token::Delete => Some("delete"), + Token::Typeof => Some("typeof"), + Token::Void => Some("void"), + Token::In => Some("in"), + Token::InstanceOf => Some("instanceof"), + Token::Try => Some("try"), + Token::Catch => Some("catch"), + Token::Finally => Some("finally"), + Token::Throw => Some("throw"), + Token::Switch => Some("switch"), + Token::Case => Some("case"), + Token::Default => Some("default"), + Token::Await => Some("await"), + Token::Async => Some("async"), + Token::Do => Some("do"), + Token::Enum => Some("enum"), + Token::Of => Some("of"), + Token::Target => Some("target"), + Token::Implements => Some("implements"), + Token::Interface => Some("interface"), + Token::Package => Some("package"), + Token::Private => Some("private"), + Token::Protected => Some("protected"), + Token::Public => Some("public"), + Token::Arguments => Some("arguments"), + Token::Eval => Some("eval"), + Token::Debugger => Some("debugger"), + Token::Class => Some("class"), + Token::Extends => Some("extends"), + Token::Constructor => Some("constructor"), + Token::Static => Some("static"), + Token::Get => Some("get"), + Token::Set => Some("set"), + Token::Yield => Some("yield"), + Token::Import => Some("import"), + Token::Export => Some("export"), + Token::From => Some("from"), + Token::As => Some("as"), + + // Sentinel + Token::EOS => None, + } + } + + pub fn to_string(&self) -> String { + match self { + // Literals + Token::Identifier(name) => name.clone(), + Token::StringLiteral(s) => format!("\"{}\"", s), + Token::NumberLiteral(n) => n.to_string(), + Token::BigIntLiteral(b) => format!("{}n", b), + Token::RegExpLiteral(pattern, flags) => format!("/{}/{}", pattern, flags), + Token::TemplateLiteral(_) => "`...`".to_string(), + + // Boolean literals and null + Token::True => "true".to_string(), + Token::False => "false".to_string(), + Token::Null => "null".to_string(), + Token::Undefined => "undefined".to_string(), + + // Keywords + Token::Var => "var".to_string(), + Token::Let => "let".to_string(), + Token::With => "with".to_string(), + Token::Const => "const".to_string(), + Token::Function => "function".to_string(), + Token::Return => "return".to_string(), + Token::If => "if".to_string(), + Token::Else => "else".to_string(), + Token::While => "while".to_string(), + Token::For => "for".to_string(), + Token::Break => "break".to_string(), + Token::Continue => "continue".to_string(), + Token::This => "this".to_string(), + Token::Super => "super".to_string(), + Token::New => "new".to_string(), + Token::Delete => "delete".to_string(), + Token::Typeof => "typeof".to_string(), + Token::Void => "void".to_string(), + Token::In => "in".to_string(), + Token::InstanceOf => "instanceof".to_string(), + Token::Try => "try".to_string(), + Token::Catch => "catch".to_string(), + Token::Finally => "finally".to_string(), + Token::Throw => "throw".to_string(), + Token::Switch => "switch".to_string(), + Token::Case => "case".to_string(), + Token::Default => "default".to_string(), + Token::Await => "await".to_string(), + Token::Async => "async".to_string(), + Token::Do => "do".to_string(), + Token::Enum => "enum".to_string(), + Token::Of => "of".to_string(), + Token::Target => "target".to_string(), + Token::Implements => "implements".to_string(), + Token::Interface => "interface".to_string(), + Token::Package => "package".to_string(), + Token::Private => "private".to_string(), + Token::Protected => "protected".to_string(), + Token::Public => "public".to_string(), + Token::Arguments => "arguments".to_string(), + Token::Eval => "eval".to_string(), + Token::Debugger => "debugger".to_string(), + Token::Class => "class".to_string(), + Token::Extends => "extends".to_string(), + Token::Constructor => "constructor".to_string(), + Token::Static => "static".to_string(), + Token::Get => "get".to_string(), + Token::Set => "set".to_string(), + Token::Yield => "yield".to_string(), + Token::Import => "import".to_string(), + Token::Export => "export".to_string(), + Token::From => "from".to_string(), + Token::As => "as".to_string(), + + // Characters and operators + Token::LeftParen => "(".to_string(), + Token::RightParen => ")".to_string(), + Token::LeftBrace => "{".to_string(), + Token::RightBrace => "}".to_string(), + Token::LeftBracket => "[".to_string(), + Token::RightBracket => "]".to_string(), + Token::Comma => ",".to_string(), + Token::Dot => ".".to_string(), + Token::Semicolon => ";".to_string(), + Token::Colon => ":".to_string(), + Token::Question => "?".to_string(), + Token::Arrow => "=>".to_string(), + Token::Hash => "#".to_string(), + Token::Plus => "+".to_string(), + Token::PlusPlus => "++".to_string(), + Token::PlusEqual => "+=".to_string(), + Token::Minus => "-".to_string(), + Token::MinusMinus => "--".to_string(), + Token::MinusEqual => "-=".to_string(), + Token::Star => "*".to_string(), + Token::StarStar => "**".to_string(), + Token::StarEqual => "*=".to_string(), + Token::StarStarEqual => "**=".to_string(), + Token::Slash => "/".to_string(), + Token::SlashEqual => "/=".to_string(), + Token::Percent => "%".to_string(), + Token::PercentEqual => "%=".to_string(), + Token::Equal => "=".to_string(), + Token::EqualEqual => "==".to_string(), + Token::EqualEqualEqual => "===".to_string(), + Token::Bang => "!".to_string(), + Token::BangEqual => "!=".to_string(), + Token::BangEqualEqual => "!==".to_string(), + Token::Greater => ">".to_string(), + Token::GreaterEqual => ">=".to_string(), + Token::GreaterGreater => ">>".to_string(), + Token::GreaterGreaterEqual => ">>=".to_string(), + Token::GreaterGreaterGreater => ">>>".to_string(), + Token::GreaterGreaterGreaterEqual => ">>>=".to_string(), + Token::Less => "<".to_string(), + Token::LessEqual => "<=".to_string(), + Token::LessLess => "<<".to_string(), + Token::LessLessEqual => "<<=".to_string(), + Token::Ampersand => "&".to_string(), + Token::AmpersandEqual => "&=".to_string(), + Token::AmpersandAmpersand => "&&".to_string(), + Token::AmpersandAmpersandEqual => "&&=".to_string(), + Token::Pipe => "|".to_string(), + Token::PipeEqual => "|=".to_string(), + Token::PipePipe => "||".to_string(), + Token::PipePipeEqual => "||=".to_string(), + Token::Caret => "^".to_string(), + Token::CaretEqual => "^=".to_string(), + Token::Tilde => "~".to_string(), + Token::Ellipsis => "...".to_string(), + Token::QuestionQuestion => "??".to_string(), + Token::QuestionQuestionEqual => "??=".to_string(), + Token::QuestionDot => "?.".to_string(), + + // Sentinel + Token::EOS => "".to_string(), + } + } + } #[derive(Debug, Clone, PartialEq)] @@ -130,19 +368,3 @@ pub enum TemplatePart { String(String), Expression(String), } - -#[derive(Debug, Clone)] -pub struct Token { - pub token_type: TokenType, - pub column: usize, - pub line: usize, - pub length: usize, -} - -impl Token { - - #[inline] - pub fn new(token_type: TokenType, line: usize, column: usize, length: usize) -> Self { - Token { token_type, line, column, length } - } -} diff --git a/src/main.rs b/src/main.rs index 6609b39..34cb7c5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,8 @@ mod ast; mod lexer; mod parser; +mod unparser; +mod grammar; use std::path::PathBuf; use std::fs; @@ -9,7 +11,7 @@ use std::process; use lexer::Lexer; use parser::Parser; - +use unparser::{Unparser, FormatStyle}; fn main() { let args: Vec = env::args().collect(); @@ -23,7 +25,7 @@ fn main() { match run::(entry_file) { Ok(_) => { - println!("Successfully parsed {}", entry_file); + println!("Successfully parsed and unparsed {}", entry_file); }, Err(error) => { eprintln!("Error: {}", error); @@ -33,18 +35,35 @@ fn main() { } fn run(file: &str) -> Result<(), Box> where str: AsRef { - let path = PathBuf::from(file); - - let source = fs::read_to_string(path)?; + let source = fs::read_to_string(path)?; let mut lexer = Lexer::new(&source); let tokens = lexer.scan_tokens()?; + + println!("Tokens: {:#?}", tokens); + + let mut parser = Parser::new(&tokens); + parser.attach_source(&source); + let ast = parser.parse_script()?; + + println!("AST: {:#?}", ast); + + let mut pretty_unparser = Unparser::new(FormatStyle::Pretty { indent_size: 2 }); + + let pretty_code = pretty_unparser.unparse_script(&ast); + + let mut compact_unparser = Unparser::new(FormatStyle::Compact); + + let compact_code = compact_unparser.unparse_script(&ast); - let mut parser = Parser::new(tokens); - let ast = parser.parse()?; + println!("\nPretty JavaScript code:"); + println!("{}", pretty_code); - println!("AST: {:#?}", ast); + println!("\nCompact JavaScript code:"); + println!("{}", compact_code); + + println!("\n\n"); Ok(()) } diff --git a/src/parser/asi.rs b/src/parser/asi.rs new file mode 100644 index 0000000..a513cc1 --- /dev/null +++ b/src/parser/asi.rs @@ -0,0 +1,76 @@ +use crate::lexer::Token; +use super::error::ParseResult; +use super::parser::Parser; + +// TODO remove? +impl<'a> Parser<'a> { + pub fn consume_semicolon(&mut self, message: &str) -> ParseResult<&Token> { + if self.consume(&Token::Semicolon) { + return Ok(self.peek_previous()); + } + + // Automatic Semicolon Insertion (ASI) rules + if self.check(&Token::RightBrace) { + return Ok(self.peek_previous()); + } + + if self.is_at_end() { + return Ok(self.peek_previous()); + } + + if self.previous_line_terminator() { + // Special case: restricted productions + // These statements cannot be followed by a line terminator without a semicolon + let prev = self.peek_previous(); + + match prev { + // Rule: No LineTerminator here after return/throw/yield/break/continue + Token::Return | + Token::Throw | + Token::Yield | + Token::Break | + Token::Continue => { + // Check if there's an expression after these keywords + // If not, ASI applies + if !self.is_expression_start() { + return Err(self.error_at_current(message)); + } + }, + _ => { + return Ok(prev) + }, + } + } + + // Otherwise, it's an error + Err(self.error_at_current(message)) + } + + // Helper method to check if the current token would start an expression + fn is_expression_start(&self) -> bool { + match self.peek() { + Token::Identifier(_) | + Token::NumberLiteral(_) | + Token::StringLiteral(_) | + Token::TemplateLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::True | + Token::False | + Token::Null | + Token::This | + Token::LeftParen | + Token::LeftBracket | + Token::LeftBrace | + Token::Function | + Token::New | + Token::Delete | + Token::Typeof | + Token::Void | + Token::Plus | + Token::Minus | + Token::Bang | + Token::Tilde => true, + _ => false + } + } +} diff --git a/src/parser/classes.rs b/src/parser/classes.rs deleted file mode 100644 index 2a2446c..0000000 --- a/src/parser/classes.rs +++ /dev/null @@ -1,145 +0,0 @@ -use crate::ast::*; -use crate::lexer::TokenType; -use super::error::ParseResult; -use super::core::Parser; -use super::expressions::Precedence; - -impl Parser { - - pub fn parse_class_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'class' - - let id = self.expect_identifier("Expected class name")?; - let super_class = self.match_token(&TokenType::Extends) - .then(|| self.parse_expression_with_precedence(Precedence::Call)) - .transpose()?; - - let body = self.parse_class_body()?; - - Ok(ClassDeclaration { id, super_class, body }) - } - - pub fn parse_class_expression(&mut self) -> ParseResult { - self.advance(); // consume 'class' - - // Optional class name for expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) - .then(|| self.expect_identifier("Expected class name")) - .transpose()?; - - // Optional extends clause - let super_class = self.match_token(&TokenType::Extends) - .then(|| self.parse_expression_with_precedence(Precedence::Call).map(Box::new)) - .transpose()?; - - let body = self.parse_class_body()?; - - Ok(Expression::Class { id, super_class, body }) - } - - pub fn parse_class_body(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' before class body")?; - - // Classes are always in strict mode - let prev_strict = self.state.in_strict_mode; - self.state.in_strict_mode = true; - - let mut body = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - // Skip empty class elements (semicolons) - if self.match_token(&TokenType::Semicolon) { - continue; - } - - body.push(self.parse_class_member()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' after class body")?; - self.state.in_strict_mode = prev_strict; - - Ok(body) - } - - pub fn parse_class_member(&mut self) -> ParseResult { - let is_static = self.match_token(&TokenType::Static); - - // Handle static blocks (ES2022) - if is_static && self.check(&TokenType::LeftBrace) { - return self.parse_static_block(); - } - - // Parse method modifiers - let is_async = self.match_token(&TokenType::Async); - let is_generator = self.match_token(&TokenType::Star); - - // Check for getter/setter - let mut kind = MethodKind::Method; - if !is_async && !is_generator { - if self.match_token(&TokenType::Get) { - kind = MethodKind::Getter; - } else if self.match_token(&TokenType::Set) { - kind = MethodKind::Setter; - } - } - - // Parse property key - let key = self.parse_property_key()?; - - // Check for constructor method - if !is_static && !is_async && !is_generator && kind == MethodKind::Method { - if let PropertyKey::Identifier(name) = &key { - if name.as_ref() == "constructor" { - let params = self.parse_function_params()?; - let body = self.parse_function_body(false, false)?; - return Ok(ClassMember::Constructor { params, body }); - } - } - } - - // Method definition - if self.check(&TokenType::LeftParen) || is_generator || is_async { - let params = self.parse_function_params()?; - let body = self.parse_function_body(is_async, is_generator)?; - - return Ok(ClassMember::Method { - key, - value: MethodDefinition { - params, - body, - is_async, - is_generator, - }, - kind, - is_static, - }); - } - - // Class field - let value = self.match_token(&TokenType::Equal) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume_semicolon("Expected ';' after class field")?; - - Ok(ClassMember::Property { - key, - value, - is_static, - }) - } - - pub fn parse_static_block(&mut self) -> ParseResult { - self.consume(&TokenType::LeftBrace, "Expected '{' after 'static'")?; - - let mut body = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - body.push(self.parse_statement()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' after static block")?; - - Ok(ClassMember::StaticBlock { body }) - } -} diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs new file mode 100644 index 0000000..c0ce981 --- /dev/null +++ b/src/parser/combinator.rs @@ -0,0 +1,6 @@ +use super::error::ParseResult; +use super::parser::Parser; + +pub trait ParserCombinator { + fn parse(&self, parser: &mut Parser) -> ParseResult; +} diff --git a/src/parser/context.rs b/src/parser/context.rs new file mode 100644 index 0000000..fd1d432 --- /dev/null +++ b/src/parser/context.rs @@ -0,0 +1,65 @@ +use std::collections::HashSet; +use crate::lexer::LexicalContext; + +pub struct ParserContext { + // TODO to lexical context + pub in_strict_mode: bool, + pub labels: HashSet>, + pub context_stack: Vec, +} + +impl ParserContext { + pub fn new() -> Self { + Self { + in_strict_mode: false, + labels: HashSet::new(), + context_stack: vec![LexicalContext::Default], + } + } + + pub fn push_context(&mut self, context: LexicalContext) { + self.context_stack.push(context); + } + + pub fn pop_context(&mut self) { + if self.context_stack.len() > 1 { + self.context_stack.pop(); + } + } + + fn current_context(&self) -> &LexicalContext { + self.context_stack.last().unwrap_or(&LexicalContext::Default) + } + + pub fn is_in_loop_body(&self) -> bool { + matches!(self.current_context(), LexicalContext::LoopBody) + } + + pub fn is_in_switch(&self) -> bool { + matches!(self.current_context(), LexicalContext::SwitchBody) + } + + pub fn is_in_function(&self) -> bool { + self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + } + + pub fn allows_yield(&self) -> bool { + matches!(self.current_context(), LexicalContext::FunctionBody { allow_yield: true, .. }) + } + + pub fn allows_await(&self) -> bool { + matches!(self.current_context(), LexicalContext::FunctionBody { allow_await: true, .. }) + } + + pub fn get_context_stack_info(&self) -> Vec { + let depth = 10; + let stack_len = self.context_stack.len(); + let start_idx = if stack_len > depth { stack_len - depth } else { 0 }; + + self.context_stack[start_idx..] + .iter() + .rev() + .map(|ctx| format!("{}", ctx)) + .collect() + } +} diff --git a/src/parser/core.rs b/src/parser/core.rs deleted file mode 100644 index 89e4466..0000000 --- a/src/parser/core.rs +++ /dev/null @@ -1,402 +0,0 @@ -use crate::ast::*; -use crate::lexer::{Token, TokenType}; -use super::error::{ParserError, ParseResult}; -use super::state::ParserState; -use std::collections::HashSet; - -pub struct Parser { - pub tokens: Vec, - pub current: usize, - pub comments: Vec, - pub state: ParserState, -} - -impl Parser { - pub fn new(tokens: Vec) -> Self { - Parser { - tokens, - current: 0, - comments: Vec::new(), - state: ParserState::new(), - } - } - - // Token navigation methods - pub fn is_at_end(&self) -> bool { - self.current >= self.tokens.len() || matches!(self.peek_token_type(), Some(TokenType::EOF)) - } - - pub fn peek_token(&self) -> Option<&Token> { - self.tokens.get(self.current) - } - - pub fn peek_token_type(&self) -> Option<&TokenType> { - self.peek_token().map(|t| &t.token_type) - } - - pub fn previous(&self) -> Option<&Token> { - if self.current > 0 { - self.tokens.get(self.current - 1) - } else { - None - } - } - - pub fn advance(&mut self) -> Option<&Token> { - if !self.is_at_end() { - self.current += 1; - } - self.previous() - } - - pub fn check(&self, token_type: &TokenType) -> bool { - match self.peek_token_type() { - Some(t) => t == token_type, - None => false, - } - } - - pub fn match_token(&mut self, token_type: &TokenType) -> bool { - if self.check(token_type) { - self.advance(); - true - } else { - false - } - } - - pub fn match_any(&mut self, token_types: &[TokenType]) -> bool { - for token_type in token_types { - if self.check(token_type) { - self.advance(); - return true; - } - } - false - } - - pub fn consume(&mut self, token_type: &TokenType, message: &str) -> ParseResult<&Token> { - if self.check(token_type) { - Ok(self.advance().unwrap()) - } else { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - Err(ParserError::new(message, token.line, token.column)) - } - } - - pub fn previous_line_terminator(&self) -> bool { - if let Some(prev) = self.previous() { - if let Some(curr) = self.peek_token() { - return prev.line < curr.line; - } - } - false - } - - pub fn consume_semicolon(&mut self, message: &str) -> ParseResult<()> { - // Handle automatic semicolon insertion (ASI) - if self.match_token(&TokenType::Semicolon) { - return Ok(()); - } - - // ASI rules: insert semicolon if - // 1. The current token is on a new line from the previous token - // 2. The current token is a closing brace - // 3. We've reached the end of input - if self.previous_line_terminator() || - self.check(&TokenType::RightBrace) || - self.is_at_end() { - return Ok(()); - } - - // Otherwise, it's an error - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - Err(ParserError::new(message, token.line, token.column)) - } - - // TODO delete - pub fn identifier_name(&self, token: &Token) -> ParseResult> { - match &token.token_type { - TokenType::Identifier(name) => Ok(name.clone().into_boxed_str()), - _ => Err(ParserError::new("Expected identifier", token.line, token.column)), - } - } - - pub fn expect_identifier(&mut self, message: &str) -> ParseResult> { - // Create a binding for the error case - let binding = Token::new(TokenType::EOF, 0, 0, 0); - - // Get the token, handling the case where there might not be one - let token = match self.advance() { - Some(t) => t, - None => { - let last = self.previous().unwrap_or(&binding); - return Err(ParserError::new(message, last.line, last.column)); - } - }; - - match &token.token_type { - TokenType::Identifier(name) => Ok(name.clone().into_boxed_str()), - TokenType::Default => Ok("default".into()), - TokenType::As => Ok("as".into()), - TokenType::For => Ok("for".into()), - TokenType::Target => Ok("target".into()), - TokenType::From => Ok("from".into()), - TokenType::Class => Ok("class".into()), - TokenType::Get => Ok("get".into()), - TokenType::Set => Ok("set".into()), - _ => Err(ParserError::new( - &format!("Expected identifier, found {:?}", token.token_type), - token.line, - token.column - )), - } - } - - // Error helper - pub fn error_unexpected(&self, message: &str) -> ParserError { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = match self.peek_token() { - Some(t) => t, - None => match self.previous() { - Some(t) => t, - None => &binding - } - }; - ParserError::new(message, token.line, token.column) - } - - // Main parse methods - pub fn parse(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Program { - source_type: SourceType::Script, - body: Vec::new(), - comments: Vec::new(), - }); - } - - self.parse_program() - } - - pub fn parse_as_module(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Program { - source_type: SourceType::Module, - body: Vec::new(), - comments: Vec::new(), - }); - } - - self.parse_module() - } - - pub fn parse_single_statement(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Statement::Empty); - } - - let stmt = self.parse_statement()?; - - // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { - let token = self.peek_token().unwrap(); - return Err(ParserError::new("Unexpected token after statement", token.line, token.column)); - } - - Ok(stmt) - } - - pub fn parse_single_expression(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Err(ParserError::new("Empty input", 0, 0)); - } - - let expr = self.parse_expression()?; - - // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { - let token = self.peek_token().unwrap(); - return Err(ParserError::new("Unexpected token after expression", token.line, token.column)); - } - - Ok(expr) - } - - pub fn parse_comment(&mut self, text: String, is_block: bool, start: usize, end: usize) { - let comment = Comment { - text: text.into_boxed_str(), - is_block, - span: (start as u32, end as u32), - }; - self.comments.push(comment); - } - - // Helper method to handle parsing of "enum" keyword which is reserved in strict mode - pub fn handle_reserved_word(&self, word: &str) -> ParseResult<()> { - if self.state.in_strict_mode { - let reserved_words = ["implements", "interface", "package", "private", "protected", "public", "enum", "eval", "arguments"]; - - if reserved_words.contains(&word) { - let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( - &format!("'{}' is a reserved word in strict mode", word), - token.line, - token.column, - )); - } - } - - Ok(()) - } - - // Helper method to validate variable names - pub fn validate_variable_name(&self, name: &str) -> ParseResult<()> { - if self.state.in_strict_mode { - if name == "eval" || name == "arguments" { - let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( - &format!("'{}' cannot be used as a variable name in strict mode", name), - token.line, - token.column, - )); - } - } - - Ok(()) - } - - // Helper method to validate function parameters - pub fn validate_function_params(&self, params: &[Pattern]) -> ParseResult<()> { - let mut seen_params = HashSet::new(); - - for param in params { - if let Pattern::Identifier(name) = param { - if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { - return Err(super::error::ParserError::new( - &format!("'{}' cannot be used as a parameter name in strict mode", name), - self.previous().unwrap().line, - self.previous().unwrap().column, - )); - } - - if !seen_params.insert(name.clone()) { - return Err(super::error::ParserError::new( - &format!("Duplicate parameter name '{}'", name), - self.previous().unwrap().line, - self.previous().unwrap().column, - )); - } - } - } - - Ok(()) - } - - // Helper method to handle octal literals in strict mode - pub fn validate_octal_literal(&self, value: &str) -> ParseResult<()> { - if self.state.in_strict_mode && value.starts_with('0') && !value.starts_with("0x") && !value.starts_with("0b") && !value.starts_with("0o") { - return Err(super::error::ParserError::new( - "Octal literals are not allowed in strict mode", - self.previous().unwrap().line, - self.previous().unwrap().column, - )); - } - - Ok(()) - } - - // Helper method to parse a list of elements separated by commas - pub fn parse_comma_separated_list(&mut self, terminator: &TokenType, parser_fn: F) -> ParseResult> - where - F: Fn(&mut Self) -> ParseResult, - { - let mut elements = Vec::new(); - - if !self.check(terminator) { - loop { - elements.push(parser_fn(self)?); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(terminator) { - break; - } - } - } - - self.consume(terminator, &format!("Expected '{:?}'", terminator))?; - Ok(elements) - } - - // Helper method to parse arguments for function calls - pub fn parse_arguments(&mut self) -> ParseResult> { - let mut args = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Spread argument - let expr = self.parse_expression()?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression()?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; - - Ok(args) - } - - // Property key parsing for object literals, class members, and destructuring patterns - pub fn parse_property_key(&mut self) -> ParseResult { - if self.match_token(&TokenType::LeftBracket) { - let expr = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property key")?; - Ok(PropertyKey::Computed(expr)) - } else if self.match_token(&TokenType::Hash) { - let name = self.expect_identifier("Expected private identifier name")?; - Ok(PropertyKey::PrivateIdentifier(name)) - } else if let Some(TokenType::StringLiteral(_)) = self.peek_token_type() { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { - Ok(PropertyKey::StringLiteral(s.into_boxed_str())) - } else { - unreachable!() - } - } else if let Some(TokenType::NumberLiteral(_)) = self.peek_token_type() { - if let TokenType::NumberLiteral(n) = self.advance().unwrap().token_type { - Ok(PropertyKey::NumericLiteral(n)) - } else { - unreachable!() - } - } else if self.check(&TokenType::Default) { - Ok(PropertyKey::Identifier("default".into())) - } else if self.check(&TokenType::Get) { - Ok(PropertyKey::Identifier("get".into())) - } - else if self.check(&TokenType::Set) { - Ok(PropertyKey::Identifier("set".into())) - } else { - let name = self.expect_identifier("Expected property name 999")?; - Ok(PropertyKey::Identifier(name)) - } - } -} diff --git a/src/parser/declarations.rs b/src/parser/declarations.rs deleted file mode 100644 index 88cb8ac..0000000 --- a/src/parser/declarations.rs +++ /dev/null @@ -1,45 +0,0 @@ -use crate::ast::*; -use crate::lexer::{Token, TokenType}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - // Variable declarations - pub fn parse_variable_declaration(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap_or_else(|| Token::new(TokenType::EOF, 0, 0, 0)); - - let kind = match token.token_type { - TokenType::Var => VariableKind::Var, - TokenType::Let => VariableKind::Let, - TokenType::Const => VariableKind::Const, - _ => unreachable!(), - }; - - // Parse first declarator (required) - let mut declarations = vec![self.parse_variable_declarator()?]; - - // Parse additional declarators separated by commas - while self.match_token(&TokenType::Comma) { - declarations.push(self.parse_variable_declarator()?); - } - - // Consume semicolon unless we're in a for-in/of loop - if !self.state.in_loop { - self.consume_semicolon("Expected ';' after variable declaration")?; - } - - Ok(VariableDeclaration { declarations, kind }) - } - - pub fn parse_variable_declarator(&mut self) -> ParseResult { - let id = self.parse_pattern()?; - - // Parse optional initializer - let init = self.match_token(&TokenType::Equal) - .then(|| self.parse_expression()) - .transpose()?; - - Ok(VariableDeclarator { id, init }) - } -} diff --git a/src/parser/error.rs b/src/parser/error.rs index dc301e2..8e7504f 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,30 +1,283 @@ -use crate::lexer::LexerError; +use crate::lexer::{LexerError, TemplatePart, Token}; +use super::parser::Parser; use std::fmt; -/// Represents an error that occurred during parsing #[derive(Debug, Clone)] pub struct ParserError { pub message: String, pub line: usize, pub column: usize, + pub source_line: Option, + pub source_span: Option<(usize, usize)>, + pub context_stack: Vec, + pub token_stack: Vec, } impl ParserError { - pub fn new(message: &str, line: usize, column: usize) -> Self { + + pub fn new(parser: &Parser, message: &str) -> Self { + + let context_stack = parser.get_context_stack_info(); + + let token_stack = parser.get_token_stack_info(); + + let token = parser.peek(); + + // Infer token length based on its type + let token_length = match token { + + Token::EOS => 0, + + Token::LeftParen + | Token::RightParen + | Token::LeftBrace + | Token::RightBrace + | Token::LeftBracket + | Token::RightBracket + | Token::Comma + | Token::Dot + | Token::Semicolon + | Token::Colon + | Token::Question + | Token::Hash + | Token::Plus + | Token::Minus + | Token::Star + | Token::Slash + | Token::Percent + | Token::Equal + | Token::Bang + | Token::Greater + | Token::Caret + | Token::Less + | Token::Pipe + | Token::Ampersand + | Token::Tilde => 1, + + Token::PlusPlus + | Token::PlusEqual + | Token::MinusMinus + | Token::MinusEqual + | Token::StarEqual + | Token::SlashEqual + | Token::PercentEqual + | Token::EqualEqual + | Token::BangEqual + | Token::GreaterEqual + | Token::GreaterGreater + | Token::LessEqual + | Token::LessLess + | Token::Arrow + | Token::StarStar + | Token::AmpersandEqual + | Token::AmpersandAmpersand + | Token::PipeEqual + | Token::PipePipe + | Token::CaretEqual + | Token::QuestionQuestion + | Token::If + | Token::In + | Token::Of + | Token::Do + | Token::As + | Token::QuestionDot => 2, + + Token::EqualEqualEqual + | Token::BangEqualEqual + | Token::GreaterGreaterEqual + | Token::GreaterGreaterGreater + | Token::LessLessEqual + | Token::AmpersandAmpersandEqual + | Token::PipePipeEqual + | Token::Ellipsis + | Token::StarStarEqual + | Token::Var + | Token::Let + | Token::For + | Token::New + | Token::Try + | Token::Get + | Token::Set + | Token::QuestionQuestionEqual => 3, + + Token::Null + | Token::GreaterGreaterGreaterEqual + | Token::With + | Token::Else + | Token::Void + | Token::This + | Token::Case + | Token::Eval + | Token::Enum + | Token::From + | Token::True => 4, + + Token::Const + | Token::While + | Token::Break + | Token::Super + | Token::Await + | Token::Class + | Token::Throw + | Token::Catch + | Token::Yield + | Token::Async + | Token::False => 5, + + Token::Return + | Token::Export + | Token::Import + | Token::Switch + | Token::Typeof + | Token::Target + | Token::Public + | Token::Delete + | Token::Static => 6, + + Token::Extends + | Token::Default + | Token::Finally + | Token::Package + | Token::Private => 7, + + Token::Debugger + | Token::Continue + | Token::Function => 8, + + Token::Undefined + | Token::Interface + | Token::Protected + | Token::Arguments => 9, + + Token::Implements + | Token::InstanceOf => 10, + + Token::Constructor => 11, + + // Literals + Token::Identifier(ref name) => name.len(), + Token::StringLiteral(ref value) => value.len() + 2, // Account for quotation marks + Token::NumberLiteral(ref value) => value.to_string().len(), + Token::BigIntLiteral(ref value) => value.len() + 1, // Account for the trailing 'n' + Token::RegExpLiteral(ref pattern, ref flags) => pattern.len() + flags.len() + 2, // Account for the slashes + Token::TemplateLiteral(ref parts) => parts.iter().fold(2, |acc, part| { + acc + match part { + TemplatePart::String(s) => s.len(), + TemplatePart::Expression(e) => e.len(), + } + }), + + }; + + let [line, column] = parser.get_current_position(); + + let col = column - token_length; + + let source = parser.get_source_text(); + + let source_line = extract_source_line_with_context(&source, line, col, 60); + let span_end = column; + + let (adjusted_column, adjusted_span_end) = if source_line.starts_with("...") { + let adjusted_col = col.min(60) + 3; + let adjusted_end = adjusted_col + token_length; + (adjusted_col, adjusted_end) + } else { + (col, span_end) + }; + ParserError { message: message.to_string(), line, - column, + column: col, + source_line: Some(source_line), + source_span: Some((adjusted_column, adjusted_span_end)), + context_stack, + token_stack, } } + + /// Create a parser error from the current token with an immutable reference + pub fn at_current(parser: &Parser, message: &str) -> Self { + Self::new(parser, message) + } + } impl fmt::Display for ParserError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ParserError at line {}, column {}: {}", self.line, self.column, self.message) + writeln!(f, "error: {}", self.message)?; + + if let Some(source_line) = &self.source_line { + // Calculate width needed for line numbers + let line_width = num_digits(self.line); + + // Show location information + writeln!(f, " --> line {}, column {}", self.line, self.column)?; + writeln!(f, "{:width$}|", "", width = line_width + 1)?; + + // Show the error line with context + writeln!(f, "{:>width$} | {}", self.line, source_line, width = line_width)?; + + // Print the error indicator + let (start, end) = self.source_span.unwrap_or((self.column, self.column + 1)); + write!(f, "{:width$} | ", "", width = line_width)?; + + // Print spaces up to the start position + for _ in 0..start { + write!(f, " ")?; + } + + // Calculate how many carets to print (limited by the actual line length) + let visible_end = if let Some(line) = &self.source_line { + end.min(start + line.len() - start.min(line.len())) + } else { + end + }; + + // Print carets for the span length + for _ in start..visible_end.max(start+1) { + write!(f, "^")?; + } + + writeln!(f)?; + } else { + writeln!(f, "at line {}, column {}", self.line, self.column)?; + } + + if !self.token_stack.is_empty() { + writeln!(f, "\nToken stack:")?; + for (i, token) in self.token_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, token)?; + } + } + + if !self.context_stack.is_empty() { + writeln!(f, "\nLexical context stack:")?; + for (i, context) in self.context_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, context)?; + } + } + + Ok(()) } } +/// Helper function to calculate the number of digits in a number +#[inline] +fn num_digits(n: usize) -> usize { + if n == 0 { + return 1; + } + let mut count = 0; + let mut num = n; + while num > 0 { + count += 1; + num /= 10; + } + count +} + impl std::error::Error for ParserError {} impl From for ParserError { @@ -33,9 +286,58 @@ impl From for ParserError { message: error.message, line: error.line, column: error.column, + source_line: None, + source_span: None, + context_stack: Vec::new(), + token_stack: Vec::new(), } } } -/// Type alias for parser results -pub type ParseResult = Result; \ No newline at end of file +/// Extract a specific line from source code with limited context around the error position +#[inline] +fn extract_source_line_with_context(source: &str, line_number: usize, column: usize, context_size: usize) -> String { + let line = source.lines() + .nth(line_number - 1) + .unwrap_or(""); + + if line.len() <= context_size * 2 { + // Line is short enough to show in full + return line.to_string(); + } + + // Calculate start and end positions with context + let start = if column > context_size { + column - context_size + } else { + 0 + }; + + let end = if column + context_size < line.len() { + column + context_size + } else { + line.len() + }; + + // Create the context string with ellipses as needed + let mut result = String::with_capacity(context_size * 2 + 6); // +6 for possible ellipses + + if start > 0 { + result.push_str("..."); + } + + // Get the substring with proper UTF-8 character boundaries + let context_str = line.chars() + .skip(start) + .take(end - start) + .collect::(); + result.push_str(&context_str); + + if end < line.len() { + result.push_str("..."); + } + + result +} + +pub type ParseResult = Result; diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs deleted file mode 100644 index f402c2f..0000000 --- a/src/parser/expressions.rs +++ /dev/null @@ -1,983 +0,0 @@ -use crate::ast::*; -use crate::lexer::{Token, TokenType, TemplatePart}; -use super::error::ParseResult; -use super::core::Parser; - -// Define operator precedence levels and associativity -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub enum Precedence { - None, - Comma, // , - Assignment, // = += -= etc. - Conditional, // ?: - LogicalOr, // || ?? - LogicalAnd, // && - BitwiseOr, // | - BitwiseXor, // ^ - BitwiseAnd, // & - Equality, // == != === !== - Relational, // < > <= >= in instanceof - Shift, // << >> >>> - Additive, // + - - Multiplicative, // * / % - Exponentiation, // ** - Prefix, // ! ~ + - ++ -- typeof void delete - Postfix, // ++ -- - Call, // . [] () - Primary -} - -impl Parser { - - pub fn parse_expression(&mut self) -> ParseResult { - self.parse_expression_with_precedence(Precedence::Comma) - } - - pub fn parse_expression_with_precedence(&mut self, precedence: Precedence) -> ParseResult { - // Parse prefix expressions - let mut expr = match self.peek_token_type() { - // Unary prefix operators - Some(TokenType::Bang) | - Some(TokenType::Tilde) | - Some(TokenType::Plus) | - Some(TokenType::Minus) | - Some(TokenType::PlusPlus) | - Some(TokenType::MinusMinus) | - Some(TokenType::Typeof) | - Some(TokenType::Void) | - Some(TokenType::Delete) => { - self.advance(); - let operator = match self.previous().unwrap().token_type { - TokenType::Bang => UnaryOperator::Not, - TokenType::Tilde => UnaryOperator::BitwiseNot, - TokenType::Plus => UnaryOperator::Plus, - TokenType::Minus => UnaryOperator::Minus, - TokenType::PlusPlus => UnaryOperator::Increment, - TokenType::MinusMinus => UnaryOperator::Decrement, - TokenType::Typeof => UnaryOperator::Typeof, - TokenType::Void => UnaryOperator::Void, - TokenType::Delete => UnaryOperator::Delete, - _ => unreachable!(), - }; - - let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; - - Expression::Unary { - operator, - argument: Box::new(argument), - prefix: true, - } - }, - // Await expression - Some(TokenType::Await) if self.state.allow_await => { - self.advance(); - let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; - Expression::Await(Box::new(argument)) - }, - // Yield expression - Some(TokenType::Yield) if self.state.allow_yield => { - self.advance(); - let delegate = self.match_token(&TokenType::Star); - - // Yield can be used without an argument - let argument = if self.check(&TokenType::Semicolon) || - self.check(&TokenType::RightBrace) || - self.check(&TokenType::Comma) || - self.check(&TokenType::RightParen) || - self.check(&TokenType::Colon) || - self.is_at_end() { - None - } else { - Some(Box::new(self.parse_expression_with_precedence(Precedence::Assignment)?)) - }; - - Expression::Yield { - argument, - delegate, - } - }, - // Primary expressions - Some(TokenType::This) => { - self.advance(); - Expression::This - }, - Some(TokenType::Arguments) => { - self.advance(); - Expression::Identifier("arguments".into()) - }, - Some(TokenType::Super) => { - self.advance(); - Expression::Super - }, - Some(TokenType::Null) => { - self.advance(); - Expression::Literal(Literal::Null) - }, - Some(TokenType::Undefined) => { - self.advance(); - Expression::Literal(Literal::Undefined) - }, - Some(TokenType::True) => { - self.advance(); - Expression::Literal(Literal::Boolean(true)) - }, - Some(TokenType::False) => { - self.advance(); - Expression::Literal(Literal::Boolean(false)) - }, - Some(TokenType::NumberLiteral(n)) => { - let value = *n; - self.advance(); - Expression::Literal(Literal::Number(value)) - }, - Some(TokenType::StringLiteral(_)) => { - if let TokenType::StringLiteral(s) = &self.advance().unwrap().token_type { - Expression::Literal(Literal::String(s.clone().into_boxed_str())) - } else { - unreachable!() - } - }, - Some(TokenType::RegExpLiteral(_, _)) => { - if let TokenType::RegExpLiteral(pattern, flags) = self.advance().unwrap().token_type.clone() { - Expression::Literal(Literal::RegExp { - pattern: pattern.into_boxed_str(), - flags: flags.into_boxed_str(), - }) - } else { - unreachable!() - } - }, - Some(TokenType::BigIntLiteral(_)) => { - if let TokenType::BigIntLiteral(s) = self.advance().unwrap().token_type.clone() { - Expression::Literal(Literal::BigInt(s.into_boxed_str())) - } else { - unreachable!() - } - }, - Some(TokenType::TemplateLiteral(_)) => { - if let TokenType::TemplateLiteral(parts) = self.advance().unwrap().token_type.clone() { - let token_line = self.previous().unwrap().line; - let token_column = self.previous().unwrap().column; - - let mut quasis = Vec::new(); - let mut expressions = Vec::new(); - - for (i, part) in parts.iter().enumerate() { - match part { - TemplatePart::String(s) => { - // Add the string part to quasis - quasis.push(s.clone().into_boxed_str()); - - // If this is the last part and it's a string, we need to ensure - // we have one more expression than quasis (as per JS spec) - if i == parts.len() - 1 && !expressions.is_empty() { - quasis.push("".into()); - } - }, - TemplatePart::Expression(expr_str) => { - // Create a temporary parser to parse the expression - let expr_str_clone = expr_str.clone(); - let mut temp_lexer = crate::lexer::Lexer::new(&expr_str_clone); - match temp_lexer.scan_tokens() { - Ok(tokens) => { - let mut temp_parser = Parser::new(tokens); - match temp_parser.parse_expression() { - Ok(expr) => expressions.push(expr), - Err(e) => { - return Err(super::error::ParserError::new( - &format!("Invalid expression in template literal: {}", e.message), - token_line, - token_column - )); - } - } - }, - Err(e) => { - return Err(super::error::ParserError::new( - &format!("Error tokenizing expression in template literal: {}", e.message), - token_line, - token_column - )); - } - } - - // If this is the last part and it's an expression, we need to add an empty string - if i == parts.len() - 1 { - quasis.push("".into()); - } - } - } - } - - // Validate that we have one more quasi than expressions (as per JS spec) - if quasis.len() != expressions.len() + 1 { - // Add an empty string at the end if needed - if quasis.len() == expressions.len() { - quasis.push("".into()); - } else { - return Err(super::error::ParserError::new( - &format!( - "Invalid template literal: expected {} quasis but got {}", - expressions.len() + 1, - quasis.len() - ), - token_line, - token_column - )); - } - } - - Expression::TemplateLiteral { quasis, expressions } - } else { - unreachable!("Expected TemplateLiteral token") - } - }, - // TODO everything but Identifier hoists matches below, need a better approach to var as = e.class; scenarios - Some(TokenType::Identifier(_)) | - Some(TokenType::As) | - Some(TokenType::Target) | - Some(TokenType::Class) | - Some(TokenType::Get) | - Some(TokenType::Set) | - Some(TokenType::From) => { - let name = self.expect_identifier("Expected identifier in expression")?; - if self.check(&TokenType::Arrow) { - let param = Pattern::Identifier(name); - self.advance(); - return self.parse_arrow_function_body(vec![param], false); - } - Expression::Identifier(name) - }, - Some(TokenType::LeftParen) => { - self.advance(); // consume '(' - - let start_pos = self.current; - let is_arrow = self.is_arrow_function_parameters(); - - if is_arrow { - self.current = start_pos; - let params = if self.check(&TokenType::RightParen) { - self.advance(); - vec![] - } else { - let mut params = vec![]; - loop { - if self.match_token(&TokenType::Ellipsis) { - let arg = self.parse_pattern()?; - params.push(Pattern::RestElement(Box::new(arg))); - break; - } else { - params.push(self.parse_pattern()?); - } - if !self.match_token(&TokenType::Comma) { - break; - } - if self.match_token(&TokenType::RightParen) { - break; - } - } - self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - params - }; - - self.consume(&TokenType::Arrow, "Expected '=>' after parameters")?; - let body = self.parse_arrow_function_body(params, false)?; - return Ok(body); - } else { - let expr = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - expr - } - }, - Some(TokenType::LeftBracket) => { - self.advance(); // consume '[' - - let mut elements = Vec::new(); - - while !self.check(&TokenType::RightBracket) && !self.is_at_end() { - if self.match_token(&TokenType::Comma) { - // Elision (hole) - elements.push(None); - } else { - if self.match_token(&TokenType::Ellipsis) { - // Spread element - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(Some(ArrayElement::Spread(expr))); - } else { - // Regular element - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(Some(ArrayElement::Expression(expr))); - } - - if !self.check(&TokenType::RightBracket) { - self.consume(&TokenType::Comma, "Expected ',' after array element")?; - } - } - } - - self.consume(&TokenType::RightBracket, "Expected ']' after array elements")?; - - Expression::Array(elements) - }, - Some(TokenType::LeftBrace) => { - self.advance(); // consume '{' - - let mut properties = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - if self.match_token(&TokenType::Ellipsis) { - // Spread property - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - properties.push(ObjectProperty::Spread(expr)); - } else { - // Method or property - let start_pos = self.current; - let is_async = self.match_token(&TokenType::Async); - let is_generator = self.match_token(&TokenType::Star); - - // Check for getter/setter - let mut kind = PropertyKind::Init; - if !is_async && !is_generator { - // Check if the next token is 'get' or 'set' - if self.check(&TokenType::Get) || self.check(&TokenType::Set) { - // Look ahead to see if it's followed by a colon - let is_property_name = if let Some(next_token) = self.tokens.get(self.current + 1) { - matches!(next_token.token_type, TokenType::Colon) - } else { - false - }; - - // Only treat as getter/setter if not followed by a colon - if !is_property_name { - if self.match_token(&TokenType::Get) { - kind = PropertyKind::Get; - } else if self.match_token(&TokenType::Set) { - kind = PropertyKind::Set; - } - } - } - } - - // Parse property key - let key = if self.match_token(&TokenType::LeftBracket) { - // Computed property key - let expr = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property key")?; - PropertyKey::Computed(expr) - } else if self.match_token(&TokenType::Hash) { - // Private identifier (class fields/methods) - let name = self.expect_identifier("Expected private identifier name")?; - PropertyKey::PrivateIdentifier(name) - } else if let Some(TokenType::StringLiteral(_)) = self.peek_token_type() { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { - PropertyKey::StringLiteral(s.into_boxed_str()) - } else { - unreachable!() - } - } else if let Some(TokenType::NumberLiteral(_)) = self.peek_token_type() { - if let TokenType::NumberLiteral(n) = self.advance().unwrap().token_type { - PropertyKey::NumericLiteral(n) - } else { - unreachable!() - } - } else if self.check(&TokenType::Default) { - self.advance(); - PropertyKey::Identifier("default".into()) - } else if self.check(&TokenType::Get) { - self.advance(); - PropertyKey::Identifier("get".into()) - } else if self.check(&TokenType::Set) { - self.advance(); - PropertyKey::Identifier("set".into()) - } else if self.check(&TokenType::From) { - self.advance(); - PropertyKey::Identifier("from".into()) - } else if self.check(&TokenType::As) { - self.advance(); - PropertyKey::Identifier("as".into()) - } else if self.check(&TokenType::For) { - self.advance(); - PropertyKey::Identifier("for".into()) - } else { - // Identifier - let name = self.expect_identifier("Expected property name 1")?; - PropertyKey::Identifier(name) - }; - - let computed = matches!(key, PropertyKey::Computed(_)); - - // Method definition - if self.check(&TokenType::LeftParen) || is_generator || is_async { - let method_kind = match kind { - PropertyKind::Get => MethodKind::Getter, - PropertyKind::Set => MethodKind::Setter, - _ => MethodKind::Method, - }; - - let params = self.parse_function_params()?; - let body = self.parse_function_body(is_async, is_generator)?; - - properties.push(ObjectProperty::Method { - key, - value: MethodDefinition { - params, - body, - is_async, - is_generator, - }, - kind: method_kind, - computed, - }); - } else { - // Regular property - let shorthand = !computed && - !self.check(&TokenType::Colon) && - matches!(key, PropertyKey::Identifier(_)); - - let value = if shorthand { - if let PropertyKey::Identifier(name) = &key { - Expression::Identifier(name.clone()) - } else { - unreachable!() - } - } else { - self.consume(&TokenType::Colon, "Expected ':' after property name")?; - self.parse_expression_with_precedence(Precedence::Assignment)? - }; - - properties.push(ObjectProperty::Property { - key, - value, - kind, - computed, - shorthand, - }); - } - } - - if !self.check(&TokenType::RightBrace) { - self.consume(&TokenType::Comma, "Expected ',' after property")?; - - // Allow trailing comma - if self.check(&TokenType::RightBrace) { - break; - } - } else { - break; - } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after object literal")?; - - Expression::Object(properties) - }, - Some(TokenType::Function) => self.parse_function_expression()?, - Some(TokenType::Class) => self.parse_class_expression()?, - Some(TokenType::New) => { - self.advance(); // consume 'new' - - // Handle new.target meta property - if self.match_token(&TokenType::Dot) { - if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { - if name == "target" { - self.advance(); // consume 'target' - Expression::MetaProperty { - meta: "new".into(), - property: "target".into(), - } - } else { - let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::new("Expected 'target' after 'new.'", token.line, token.column)); - } - } else { - let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::new("Expected 'target' after 'new.'", token.line, token.column)); - } - } else { - // Regular new expression - let callee = self.parse_expression_with_precedence(Precedence::Call)?; - - // Optional arguments - let arguments = if self.match_token(&TokenType::LeftParen) { - let mut args = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Spread argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; - args - } else { - Vec::new() - }; - - Expression::New { - callee: Box::new(callee), - arguments, - } - } - }, - Some(TokenType::Import) => { - self.advance(); // consume 'import' - self.consume(&TokenType::LeftParen, "Expected '(' after 'import'")?; - let source = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&TokenType::RightParen, "Expected ')' after import source")?; - - Expression::Import(Box::new(source)) - }, - Some(TokenType::Hash) => { - self.advance(); // consume '#' - let name = self.expect_identifier("Expected private identifier name")?; - Expression::PrivateName(name) - }, - Some(TokenType::Async) if self.is_async_function() => self.parse_async_function_expression()?, - _ => { - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap()); - return Err(super::error::ParserError::new( - &format!("Unexpected token in expression: {:?}", token.token_type), - token.line, - token.column - )); - } - }; - - // Parse infix and postfix expressions based on precedence - while !self.is_at_end() { - let current_precedence = match self.peek_token_type() { - Some(TokenType::Comma) => Precedence::Comma, - Some(TokenType::Question) => { - if self.tokens.get(self.current + 1).map_or(false, |t| matches!(t.token_type, TokenType::Dot)) { - Precedence::Call - } else { - Precedence::Conditional - } - }, - Some(TokenType::Equal) | - Some(TokenType::PlusEqual) | - Some(TokenType::MinusEqual) | - Some(TokenType::StarEqual) | - Some(TokenType::SlashEqual) | - Some(TokenType::PercentEqual) | - Some(TokenType::StarStarEqual) | - Some(TokenType::AmpersandEqual) | - Some(TokenType::PipeEqual) | - Some(TokenType::CaretEqual) | - Some(TokenType::LessLessEqual) | - Some(TokenType::GreaterGreaterEqual) | - Some(TokenType::GreaterGreaterGreaterEqual) | - Some(TokenType::AmpersandAmpersandEqual) | - Some(TokenType::PipePipeEqual) | - Some(TokenType::QuestionQuestionEqual) => Precedence::Assignment, - Some(TokenType::PipePipe) | - Some(TokenType::QuestionQuestion) => Precedence::LogicalOr, - Some(TokenType::AmpersandAmpersand) => Precedence::LogicalAnd, - Some(TokenType::Pipe) => Precedence::BitwiseOr, - Some(TokenType::Caret) => Precedence::BitwiseXor, - Some(TokenType::Ampersand) => Precedence::BitwiseAnd, - Some(TokenType::EqualEqual) | - Some(TokenType::BangEqual) | - Some(TokenType::EqualEqualEqual) | - Some(TokenType::BangEqualEqual) => Precedence::Equality, - Some(TokenType::Less) | - Some(TokenType::LessEqual) | - Some(TokenType::Greater) | - Some(TokenType::GreaterEqual) | - Some(TokenType::In) | - Some(TokenType::InstanceOf) => Precedence::Relational, - Some(TokenType::LessLess) | - Some(TokenType::GreaterGreater) | - Some(TokenType::GreaterGreaterGreater) => Precedence::Shift, - Some(TokenType::Plus) | - Some(TokenType::Minus) => Precedence::Additive, - Some(TokenType::Star) | - Some(TokenType::Slash) | - Some(TokenType::Percent) => Precedence::Multiplicative, - Some(TokenType::StarStar) => Precedence::Exponentiation, - Some(TokenType::PlusPlus) | - Some(TokenType::MinusMinus) if !self.previous_line_terminator() => Precedence::Postfix, - Some(TokenType::Dot) | - Some(TokenType::LeftBracket) | - Some(TokenType::LeftParen) | - Some(TokenType::QuestionDot) => Precedence::Call, - _ => Precedence::None, - }; - - if precedence > current_precedence { - break; - } - - // Handle postfix operators - if current_precedence == Precedence::Postfix { - if self.match_any(&[TokenType::PlusPlus, TokenType::MinusMinus]) { - if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. }) { - let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( - "Invalid left-hand side in postfix operation", - token.line, - token.column - )); - } - - let operator = match self.previous().unwrap().token_type { - TokenType::PlusPlus => UnaryOperator::Increment, - TokenType::MinusMinus => UnaryOperator::Decrement, - _ => unreachable!(), - }; - - expr = Expression::Unary { - operator, - argument: Box::new(expr), - prefix: false, - }; - continue; - } - } - - // Handle infix operators - match current_precedence { - Precedence::Comma => { - self.advance(); // consume comma - let right = self.parse_expression_with_precedence(Precedence::Assignment)?; - expr = Expression::Sequence(vec![expr, right]); - }, - Precedence::Assignment => { - // Match assignment operator - let op = if self.match_token(&TokenType::Equal) { - AssignmentOperator::Assign - } else if self.match_token(&TokenType::PlusEqual) { - AssignmentOperator::AddAssign - } else if self.match_token(&TokenType::MinusEqual) { - AssignmentOperator::SubtractAssign - } else if self.match_token(&TokenType::StarEqual) { - AssignmentOperator::MultiplyAssign - } else if self.match_token(&TokenType::SlashEqual) { - AssignmentOperator::DivideAssign - } else if self.match_token(&TokenType::PercentEqual) { - AssignmentOperator::ModuloAssign - } else if self.match_token(&TokenType::StarStarEqual) { - AssignmentOperator::ExponentAssign - } else if self.match_token(&TokenType::AmpersandEqual) { - AssignmentOperator::BitwiseAndAssign - } else if self.match_token(&TokenType::PipeEqual) { - AssignmentOperator::BitwiseOrAssign - } else if self.match_token(&TokenType::CaretEqual) { - AssignmentOperator::BitwiseXorAssign - } else if self.match_token(&TokenType::LessLessEqual) { - AssignmentOperator::LeftShiftAssign - } else if self.match_token(&TokenType::GreaterGreaterEqual) { - AssignmentOperator::RightShiftAssign - } else if self.match_token(&TokenType::GreaterGreaterGreaterEqual) { - AssignmentOperator::UnsignedRightShiftAssign - } else if self.match_token(&TokenType::AmpersandAmpersandEqual) { - AssignmentOperator::LogicalAndAssign - } else if self.match_token(&TokenType::PipePipeEqual) { - AssignmentOperator::LogicalOrAssign - } else if self.match_token(&TokenType::QuestionQuestionEqual) { - AssignmentOperator::NullishAssign - } else { - break; // No assignment operator found - }; - - // Validate left-hand side - if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. } | Expression::Array(_) | Expression::Object(_)) { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.previous().unwrap_or(&binding); - return Err(super::error::ParserError::new( - "Invalid left-hand side in assignment", - token.line, - token.column - )); - } - - let right = self.parse_expression_with_precedence(Precedence::Assignment)?; - - expr = Expression::Assignment { - operator: op, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Conditional => { - self.advance(); // consume ? - - // Check if this is part of optional chaining - if self.check(&TokenType::Dot) { - // This is optional chaining - self.advance(); // consume . - - // Now handle the optional chaining - if self.match_token(&TokenType::LeftBracket) { - let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: true, - }; - } else if self.match_token(&TokenType::LeftParen) { - let arguments = self.parse_arguments()?; - expr = Expression::Call { - callee: Box::new(expr), - arguments, - optional: true, - }; - } else { - let property = self.expect_identifier("Expected property name 2")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: true, - }; - } - } else { - // This is a ternary operator - let consequent = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&TokenType::Colon, "Expected ':' in conditional expression")?; - let alternate = self.parse_expression_with_precedence(Precedence::Assignment)?; - - expr = Expression::Conditional { - test: Box::new(expr), - consequent: Box::new(consequent), - alternate: Box::new(alternate), - }; - } - }, - Precedence::LogicalOr => { - let operator = if self.match_token(&TokenType::PipePipe) { - LogicalOperator::Or - } else if self.match_token(&TokenType::QuestionQuestion) { - LogicalOperator::NullishCoalescing - } else { - break; - }; - - let right = self.parse_expression_with_precedence(Precedence::LogicalAnd)?; - - expr = Expression::Logical { - operator, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::LogicalAnd => { - self.advance(); // consume && - let right = self.parse_expression_with_precedence(Precedence::BitwiseOr)?; - - expr = Expression::Logical { - operator: LogicalOperator::And, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::BitwiseOr | - Precedence::BitwiseXor | - Precedence::BitwiseAnd | - Precedence::Equality | - Precedence::Relational | - Precedence::Shift | - Precedence::Additive | - Precedence::Multiplicative => { - self.advance(); - let token_type = self.previous().unwrap().token_type.clone(); - - let operator = match token_type { - TokenType::Plus => BinaryOperator::Add, - TokenType::Minus => BinaryOperator::Subtract, - TokenType::Star => BinaryOperator::Multiply, - TokenType::Slash => BinaryOperator::Divide, - TokenType::Percent => BinaryOperator::Modulo, - TokenType::StarStar => BinaryOperator::Exponent, - TokenType::Pipe => BinaryOperator::BitwiseOr, - TokenType::Ampersand => BinaryOperator::BitwiseAnd, - TokenType::Caret => BinaryOperator::BitwiseXor, - TokenType::LessLess => BinaryOperator::LeftShift, - TokenType::GreaterGreater => BinaryOperator::RightShift, - TokenType::GreaterGreaterGreater => BinaryOperator::UnsignedRightShift, - TokenType::EqualEqual => BinaryOperator::Equal, - TokenType::BangEqual => BinaryOperator::NotEqual, - TokenType::EqualEqualEqual => BinaryOperator::StrictEqual, - TokenType::BangEqualEqual => BinaryOperator::StrictNotEqual, - TokenType::Less => BinaryOperator::LessThan, - TokenType::LessEqual => BinaryOperator::LessThanEqual, - TokenType::Greater => BinaryOperator::GreaterThan, - TokenType::GreaterEqual => BinaryOperator::GreaterThanEqual, - TokenType::In => BinaryOperator::In, - TokenType::InstanceOf => BinaryOperator::InstanceOf, - _ => { - let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( - &format!("Unexpected token: {:?}", token_type), - token.line, - token.column - )); - } - }; - - // Determine next precedence level - let next_precedence = match current_precedence { - Precedence::BitwiseOr => Precedence::BitwiseXor, - Precedence::BitwiseXor => Precedence::BitwiseAnd, - Precedence::BitwiseAnd => Precedence::Equality, - Precedence::Equality => Precedence::Relational, - Precedence::Relational => Precedence::Shift, - Precedence::Shift => Precedence::Additive, - Precedence::Additive => Precedence::Multiplicative, - Precedence::Multiplicative => Precedence::Exponentiation, - _ => unreachable!(), - }; - - let right = self.parse_expression_with_precedence(next_precedence)?; - - expr = Expression::Binary { - operator, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Exponentiation => { - self.advance(); // consume ** - // Exponentiation is right-associative - let right = self.parse_expression_with_precedence(Precedence::Exponentiation)?; - - expr = Expression::Binary { - operator: BinaryOperator::Exponent, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Call => { - if self.match_token(&TokenType::Dot) { - let property = if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { - self.advance(); - name.into_boxed_str() - } - - else if self.check(&TokenType::Default) { - self.advance(); - "default".into() - } else if self.check(&TokenType::Get) { - self.advance(); - "get".into() - } else if self.check(&TokenType::Set) { - self.advance(); - "set".into() - } else if self.check(&TokenType::From) { - self.advance(); - "from".into() - } else if self.check(&TokenType::As) { - self.advance(); - "as".into() - } else if self.check(&TokenType::For) { - self.advance(); - "for".into() - } else { - return Err(super::error::ParserError::new( - "Expected property name 3", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column - )); - }; - - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: false, - }; - } else if self.match_token(&TokenType::LeftBracket) { - // Member access with bracket notation - let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: false, - }; - } else if self.match_token(&TokenType::QuestionDot) { - // Optional chaining - if self.match_token(&TokenType::LeftBracket) { - let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: true, - }; - } else { - let property = self.expect_identifier("Expected property name 4")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: true, - }; - } - } else if self.match_token(&TokenType::LeftParen) { - // Function call - let mut args = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Spread argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; - - expr = Expression::Call { - callee: Box::new(expr), - arguments: args, - optional: false, - }; - } else { - break; - } - }, - _ => break, - } - } - - Ok(expr) - } - -} diff --git a/src/parser/functions.rs b/src/parser/functions.rs deleted file mode 100644 index a2ea818..0000000 --- a/src/parser/functions.rs +++ /dev/null @@ -1,307 +0,0 @@ -use crate::ast::*; -use crate::lexer::TokenType; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - pub fn parse_function_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'function' - - let is_generator = self.match_token(&TokenType::Star); - let id = self.expect_identifier("Expected function name")?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield) = (self.state.in_function, self.state.allow_yield); - self.state.in_function = true; - self.state.allow_yield = is_generator; - - let params = self.parse_function_params()?; - let body = self.parse_function_body(false, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - - Ok(FunctionDeclaration { - id, - params, - body, - is_async: false, - is_generator, - }) - } - - pub fn parse_function_expression(&mut self) -> ParseResult { - self.advance(); // consume 'function' - - let is_generator = self.match_token(&TokenType::Star); - - // Optional function name for function expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) - .then(|| self.expect_identifier("Expected function name")) - .transpose()?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield) = (self.state.in_function, self.state.allow_yield); - self.state.in_function = true; - self.state.allow_yield = is_generator; - - let params = self.parse_function_params()?; - let body = self.parse_function_body(false, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - - Ok(Expression::Function { - id, - params, - body, - is_async: false, - is_generator, - }) - } - - pub fn parse_async_function_expression(&mut self) -> ParseResult { - self.advance(); // consume 'async' - self.advance(); // consume 'function' - - let is_generator = self.match_token(&TokenType::Star); - - // Optional function name for function expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) - .then(|| self.expect_identifier("Expected function name")) - .transpose()?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield, prev_allow_await) = - (self.state.in_function, self.state.allow_yield, self.state.allow_await); - self.state.in_function = true; - self.state.allow_yield = is_generator; - self.state.allow_await = true; - - let params = self.parse_function_params()?; - let body = self.parse_function_body(true, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - self.state.allow_await = prev_allow_await; - - Ok(Expression::Function { - id, - params, - body, - is_async: true, - is_generator, - }) - } - - pub fn parse_function_params(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; - - let mut params = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Rest parameter - let arg = self.parse_pattern()?; - params.push(Pattern::RestElement(Box::new(arg))); - break; // Rest parameter must be the last one - } else { - params.push(self.parse_pattern()?); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after function parameters")?; - - Ok(params) - } - - pub fn parse_function_body(&mut self, _is_async: bool, _is_generator: bool) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; - - let mut body = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - body.push(self.parse_statement()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; - - Ok(body) - } - - pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { - // Save and update parser state - let (prev_in_function, prev_allow_await) = (self.state.in_function, self.state.allow_await); - self.state.in_function = true; - self.state.allow_await = is_async; - - let body = if self.check(&TokenType::LeftBrace) { - // Block body - let statements = self.parse_function_body(is_async, false)?; - ArrowFunctionBody::Block(statements) - } else { - // Expression body - let expr = self.parse_expression()?; - ArrowFunctionBody::Expression(Box::new(expr)) - }; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_await = prev_allow_await; - - Ok(Expression::ArrowFunction { - params, - body, - is_async, - }) - } - - pub fn parse_async_function_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'async' - self.consume(&TokenType::Function, "Expected 'function' after 'async'")?; - - let is_generator = self.match_token(&TokenType::Star); - let id = self.expect_identifier("Expected function name")?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield, prev_allow_await) = - (self.state.in_function, self.state.allow_yield, self.state.allow_await); - self.state.in_function = true; - self.state.allow_yield = is_generator; - self.state.allow_await = true; - - let params = self.parse_function_params()?; - let body = self.parse_function_body(true, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - self.state.allow_await = prev_allow_await; - - Ok(FunctionDeclaration { - id, - params, - body, - is_async: true, - is_generator, - }) - } - - // Helper method to check if we're looking at an async function - pub fn is_async_function(&self) -> bool { - if let Some(TokenType::Async) = self.peek_token_type() { - if let Some(next_token) = self.tokens.get(self.current + 1) { - return matches!(next_token.token_type, TokenType::Function); - } - } - false - } - - pub fn is_arrow_function_parameters(&mut self) -> bool { - // Save current position - let start_pos = self.current; - - // Check for a single parameter without parentheses (like y => 2) - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); // consume identifier - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - if is_arrow { - return true; - } - } - - // Check for spread operator at the beginning (like (...e) => {}) - if self.check(&TokenType::Ellipsis) { - self.advance(); // consume '...' - - // We need an identifier after the spread - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); // consume identifier - - // Check for right parenthesis and then arrow - if self.match_token(&TokenType::RightParen) && self.check(&TokenType::Arrow) { - self.current = start_pos; - return true; - } - } - - // Reset position if not an arrow function - self.current = start_pos; - } - - // Empty parameter list - no need to check for left parenthesis, it's already consumed - if self.match_token(&TokenType::RightParen) { - // Check for arrow - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - return is_arrow; - } - - // Try to parse a parameter list - let mut has_rest = false; - - loop { - if has_rest { - // Rest parameter must be the last one - self.current = start_pos; - return false; - } - - if self.match_token(&TokenType::Ellipsis) { - has_rest = true; - - // We need an identifier after the spread - if !matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { - self.current = start_pos; - return false; - } - } - - // Skip the parameter - if let Some(token_type) = self.peek_token_type() { - if matches!(token_type, TokenType::Identifier(_)) || - token_type == &TokenType::LeftBrace || - token_type == &TokenType::LeftBracket { - self.advance(); - } else { - self.current = start_pos; - return false; - } - } else { - self.current = start_pos; - return false; - } - - if self.match_token(&TokenType::RightParen) { - break; - } - - if !self.match_token(&TokenType::Comma) { - self.current = start_pos; - return false; - } - } - - // Check for arrow - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - is_arrow - } - -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e588743..0e8814a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,18 +1,10 @@ -//! JavaScript parser module -//! -//! This module contains the parser for JavaScript code. -//! It has been split into multiple files for better maintainability. - +mod asi; mod error; -mod state; -mod core; -mod expressions; -mod statements; -mod declarations; -mod patterns; -mod functions; -mod classes; -mod modules; - +mod stream; +mod context; +mod parser; +mod combinator; -pub use self::core::Parser; +pub use self::parser::Parser; +pub use self::combinator::ParserCombinator; +pub use self::error::{ParserError, ParseResult}; \ No newline at end of file diff --git a/src/parser/modules.rs b/src/parser/modules.rs deleted file mode 100644 index 54537be..0000000 --- a/src/parser/modules.rs +++ /dev/null @@ -1,385 +0,0 @@ -use crate::ast::*; -use crate::lexer::{Token, TokenType}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - pub fn parse_program(&mut self) -> ParseResult { - let mut body = Vec::new(); - - while !self.is_at_end() { - body.push(self.parse_statement()?); - } - - // Extract comments - let comments = std::mem::take(&mut self.comments); - - Ok(Program { - source_type: SourceType::Script, - body, - comments, - }) - } - - pub fn parse_module(&mut self) -> ParseResult { - // Set strict mode for modules - self.state.in_strict_mode = true; - - let mut body = Vec::new(); - - while !self.is_at_end() { - body.push(self.parse_statement()?); - } - - // Extract comments - let comments = std::mem::take(&mut self.comments); - - Ok(Program { - source_type: SourceType::Module, - body, - comments, - }) - } - - pub fn parse_import_statement(&mut self) -> ParseResult { - let start_token = self.advance().unwrap(); // consume 'import' - - // Handle import() expression vs import statement - if self.check(&TokenType::LeftParen) { - // This is an import() expression, not an import statement - // Rewind and parse as expression statement - self.current -= 1; - return self.parse_expression_statement(); - } - - let mut specifiers = Vec::new(); - let mut source: Option> = None; - - // Handle different import forms - if matches!(self.peek_token_type(), Some(TokenType::StringLiteral(_))) { - // import "module-name"; (side-effect import) - source = self.parse_module_source()?; - } else if self.match_token(&TokenType::Star) { - // import * as name from "module-name"; (namespace import) - specifiers.push(self.parse_namespace_import()?); - source = self.parse_from_clause()?; - } else { - // import defaultExport, { named1, named2 } from "module-name"; - // or just { named1, named2 } from "module-name"; - - // Check for default import - if !self.check(&TokenType::LeftBrace) && !self.check(&TokenType::From) { - specifiers.push(self.parse_default_import()?); - - // Optional comma before named imports - if self.match_token(&TokenType::Comma) && !self.check(&TokenType::From) { - // Continue to named imports - } else if !self.check(&TokenType::From) { - // If no comma and not 'from', it's an error - return Err(self.error_unexpected("Expected ',' or 'from' after default import")); - } - } - - // Named imports - if self.match_token(&TokenType::LeftBrace) { - let named_imports = self.parse_named_imports()?; - specifiers.extend(named_imports); - } - - // Module source - if !specifiers.is_empty() { - source = self.parse_from_clause()?; - } else { - return Err(self.error_unexpected("Expected import specifiers")); - } - } - - // Parse import assertions if present - let assertions = if self.match_token(&TokenType::With) { - self.parse_import_assertions()? - } else { - Vec::new() - }; - - self.consume_semicolon("Expected ';' after import statement")?; - - if let Some(src) = source { - Ok(Statement::Import { - specifiers, - source: src, - assertions, - }) - } else { - Err(self.error_unexpected("Missing module source in import statement")) - } - } - - // Helper method to parse a module source string - pub fn parse_module_source(&mut self) -> ParseResult>> { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { - Ok(Some(s.into_boxed_str())) - } else { - Err(self.error_unexpected("Expected string literal for module source")) - } - } - - // Helper method to parse the 'from "module-name"' part - pub fn parse_from_clause(&mut self) -> ParseResult>> { - self.consume(&TokenType::From, "Expected 'from' after import specifiers")?; - self.parse_module_source() - } - - // Helper method to parse namespace import: * as name - pub fn parse_namespace_import(&mut self) -> ParseResult { - self.consume(&TokenType::As, "Expected 'as' after '*'")?; - // First advance to get the token - let token = self.advance().unwrap().clone(); - // Then use the cloned token for identifier_name - let local = self.identifier_name(&token)?; - Ok(ImportSpecifier::Namespace(local)) - } - - // Helper method to parse default import: defaultExport - pub fn parse_default_import(&mut self) -> ParseResult { - let local = self.expect_identifier("Expected default import name")?; - Ok(ImportSpecifier::Default(local)) - } - - // Helper method to parse named imports: { name1, name2 as alias2 } - pub fn parse_named_imports(&mut self) -> ParseResult> { - let mut specifiers = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - let imported = self.expect_identifier("Expected imported name")?; - - let local = if self.match_token(&TokenType::As) { - self.expect_identifier("Expected local name after 'as'")? - } else { - imported.clone() - }; - - specifiers.push(ImportSpecifier::Named { - imported, - local, - }); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; - } - } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after named imports")?; - Ok(specifiers) - } - - pub fn parse_import_assertions(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' after 'with'")?; - - let mut assertions = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - let key = self.expect_identifier("Expected assertion key")?; - self.consume(&TokenType::Colon, "Expected ':' after assertion key")?; - - let value = if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { - s.into_boxed_str() - } else { - return Err(self.error_unexpected("Expected string literal for assertion value")); - }; - - assertions.push(ImportAssertion { key, value }); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; - } - } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after import assertions")?; - - Ok(assertions) - } - - pub fn parse_export_statement(&mut self) -> ParseResult { - let start_token = self.advance().unwrap().clone(); // consume 'export' - - // Handle export * from "module" or export * as name from "module" - if self.match_token(&TokenType::Star) { - return self.parse_export_all(&start_token); - } - - // Handle export default ... - if self.match_token(&TokenType::Default) { - return self.parse_export_default(&start_token); - } - - // Handle export declaration (var, let, const, function, class) - if self.is_declaration_start() { - return self.parse_export_declaration(&start_token); - } - - // Handle export { ... } [from "..."] - if self.match_token(&TokenType::LeftBrace) { - return self.parse_export_named_specifiers(&start_token); - } - - // If we get here, it's an invalid export statement - Err(super::error::ParserError::new( - "Invalid export statement. Expected '*', default, declaration, or named exports", - start_token.line, - start_token.column - )) - } - - // Helper method for export * from "module" or export * as name from "module" - pub fn parse_export_all(&mut self, start_token: &Token) -> ParseResult { - let exported = if self.match_token(&TokenType::As) { - Some(self.expect_identifier("Expected exported name after 'as'")?) - } else { - None - }; - - if !self.match_token(&TokenType::From) { - return Err(super::error::ParserError::new( - "Expected 'from' after export *", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column - )); - } - - let source = self.parse_module_source()? - .ok_or_else(|| super::error::ParserError::new( - "Expected string literal for module source", - self.previous().unwrap().line, - self.previous().unwrap().column - ))?; - - self.consume_semicolon("Expected ';' after export statement")?; - - Ok(Statement::Export(ExportDeclaration::All { source, exported })) - } - - // Helper method for export default ... - pub fn parse_export_default(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&TokenType::Function) { - let func_decl = self.parse_function_declaration()?; - ExportDefaultDeclaration::Function(func_decl) - } else if self.check(&TokenType::Class) { - let class_decl = self.parse_class_declaration()?; - ExportDefaultDeclaration::Class(class_decl) - } else if self.check(&TokenType::Async) && self.is_async_function() { - // Handle async function - let func_decl = self.parse_async_function_declaration()?; - ExportDefaultDeclaration::Function(func_decl) - } else { - // export default expression; - let expr = self.parse_expression()?; - self.consume_semicolon("Expected ';' after export default expression")?; - ExportDefaultDeclaration::Expression(expr) - }; - - Ok(Statement::Export(ExportDeclaration::Default(Box::new(declaration)))) - } - - // Helper method for export declaration - pub fn parse_export_declaration(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&TokenType::Function) { - Declaration::Function(self.parse_function_declaration()?) - } else if self.check(&TokenType::Class) { - Declaration::Class(self.parse_class_declaration()?) - } else if self.check(&TokenType::Async) && self.is_async_function() { - Declaration::Function(self.parse_async_function_declaration()?) - } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { - Declaration::Variable(self.parse_variable_declaration()?) - } else { - return Err(super::error::ParserError::new( - "Expected declaration in export statement", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column - )); - }; - - Ok(Statement::Export(ExportDeclaration::Named { - declaration: Some(Box::new(declaration)), - specifiers: Vec::new(), - source: None, - })) - } - - // Helper method for export { ... } [from "..."] - pub fn parse_export_named_specifiers(&mut self, start_token: &Token) -> ParseResult { - let specifiers = self.parse_export_specifiers()?; - - // Optional from clause - let source = if self.match_token(&TokenType::From) { - Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::new( - "Expected string literal for module source", - self.previous().unwrap().line, - self.previous().unwrap().column - ))?) - } else { - None - }; - - self.consume_semicolon("Expected ';' after export statement")?; - - Ok(Statement::Export(ExportDeclaration::Named { - declaration: None, - specifiers, - source, - })) - } - - // Helper method to parse export specifiers: { name1, name2 as alias2 } - pub fn parse_export_specifiers(&mut self) -> ParseResult> { - let mut specifiers = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - let local = self.expect_identifier("Expected exported identifier")?; - let exported = if self.match_token(&TokenType::As) { - self.expect_identifier("Expected exported name after 'as'")? - } else { - local.clone() - }; - - specifiers.push(ExportSpecifier { local, exported }); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; - } - } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after export specifiers")?; - Ok(specifiers) - } - - // Helper method to check if the current token starts a declaration - pub fn is_declaration_start(&self) -> bool { - self.check(&TokenType::Var) || - self.check(&TokenType::Let) || - self.check(&TokenType::Const) || - self.check(&TokenType::Function) || - self.check(&TokenType::Class) || - (self.check(&TokenType::Async) && self.is_async_function()) - } -} diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..a0f7342 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,237 @@ +use crate::ast::*; +use crate::lexer::{Token, LexicalContext}; +use super::error::{ParserError, ParseResult}; +use super::stream::TokenStream; +use super::combinator::ParserCombinator; +use super::context::ParserContext; +use crate::grammar::*; + +/* +use std::borrow::Cow; + +pub struct TokenAccess<'a> { + token: Cow<'a, Token>, +} + +impl<'a> std::ops::Deref for TokenAccess<'a> { + type Target = Token; + + fn deref(&self) -> &Self::Target { + self.token.as_ref() + } +}*/ + +pub struct Parser<'a> { + stream: TokenStream<'a>, + context: ParserContext, +} + +impl<'a> Parser<'a> { + pub fn new(tokens: &'a [(Token, [usize; 2])]) -> Self { + Self { + stream: TokenStream::new(tokens), + context: ParserContext::new(), + } + } + + // Main parsing methods + pub fn parse_module(&mut self) -> ParseResult { + ModuleNode::new().parse(self) + } + + pub fn parse_script(&mut self) -> ParseResult { + ScriptNode::new().parse(self) + } + + pub fn parse_expression(&mut self) -> ParseResult { + ExpressionNode::new().parse(self) + } + + pub fn parse_statement(&mut self) -> ParseResult { + StatementNode::new().parse(self) + } + + // Source handling + pub fn attach_source(&mut self, source: &'a str) { + self.stream.attach_source(source); + } + + pub fn get_source_text(&self) -> &str { + self.stream.get_source_text() + } + + // Error handling + pub fn error_at_current(&self, message: &str) -> ParserError { + ParserError::at_current(self, message) + } + + // TokenStream delegations + pub fn is_at_end(&self) -> bool { + self.stream.is_at_end() + } + + /* + #[inline(always)] + fn peek_internal(&self) -> Option> { + self.stream.peek().map(|token| { + let cow = if !matches!(token, Token::Identifier(_)) && self.current_context().allows_token_as_identifier(token) { + if let Some(text) = token.keyword_text() { + Cow::Owned(Token::Identifier(text.to_string())) + } else { + Cow::Borrowed(token) + } + } else { + Cow::Borrowed(token) + }; + + TokenAccess { token: cow } + }) + + } + */ + + pub fn peek(&self) -> &Token { + self.stream.peek() + } + + pub fn peek_previous(&self) -> &Token { + self.stream.peek_previous() + } + + pub fn peek_next(&self, offset: usize) -> &Token { + self.stream.peek_next(offset) + } + + pub fn advance(&mut self) -> bool { + self.stream.advance() + } + + pub fn check(&self, token_type: &Token) -> bool { + self.stream.check(token_type) + } + + pub fn consume(&mut self, token_type: &Token) -> bool { + self.stream.consume(token_type) + } + + pub fn previous_line_terminator(&self) -> bool { + self.stream.previous_line_terminator() + } + + pub fn save_position(&self) -> usize { + self.stream.save_position() + } + + pub fn restore_position(&mut self, position: usize) { + self.stream.restore_position(position) + } + + pub fn get_current_position(&self) -> [usize; 2] { + self.stream.peek_position() + } + + pub fn assert_consume(&mut self, token_type: &Token, message: &str) -> ParseResult<&Token> { + if self.consume(token_type) { + Ok(self.peek_previous()) + } else { + Err(self.error_at_current(message)) + } + } + + pub fn get_token_stack_info(&self) -> Vec { + self.stream.get_token_stack_info() + } + + // ParserContext delegations + pub fn get_context_stack_info(&self) -> Vec { + self.context.get_context_stack_info() + } + + pub fn is_in_function(&self) -> bool { + self.context.is_in_function() + } + + pub fn is_in_loop_body(&self) -> bool { + self.context.is_in_loop_body() + } + + pub fn is_in_switch(&self) -> bool { + self.context.is_in_switch() + } + + pub fn allows_yield(&self) -> bool { + self.context.allows_yield() + } + + pub fn allows_await(&self) -> bool { + self.context.allows_await() + } + + // TODO maybe + /* + + pub struct ContextGuard<'a, 'b> { + parser: &'a mut Parser<'b>, + } + + impl<'a, 'b> Drop for ContextGuard<'a, 'b> { + fn drop(&mut self) { + self.parser.pop_context(); + } + } + + impl<'a> Parser<'a> { + pub fn with_context_guard(&mut self, context: LexicalContext) -> ContextGuard<'_, 'a> { + self.push_context(context); + ContextGuard { parser: self } + } + } + */ + + pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + self.context.push_context(context); + let result = f(self); + self.context.pop_context(); + result + } + + // Label management + pub fn add_label(&mut self, label: Box) { + self.context.labels.insert(label); + } + + pub fn remove_label(&mut self, label: &str) { + self.context.labels.remove(label); + } + + pub fn has_label(&self, label: &str) -> bool { + self.context.labels.contains(label) + } + + // Strict mode handling + pub fn set_strict_mode(&mut self, strict: bool) { + self.context.in_strict_mode = strict; + } + + pub fn is_strict_mode(&self) -> bool { + self.context.in_strict_mode + } + +} + + +/* + #[inline(always)] + fn coalesce_identifier<'t>(&self, token: &'t mut Token) { + // Only transform if not already an identifier and context allows it + if !matches!(token, Token::Identifier(_)) && self.current_context().allows_token_as_identifier(token) { + if let Some(text) = token.keyword_text() { + // Transform the token in place to an identifier + *token = Token::Identifier(text.to_string()); + } + } + } + */ \ No newline at end of file diff --git a/src/parser/patterns.rs b/src/parser/patterns.rs deleted file mode 100644 index 6b27415..0000000 --- a/src/parser/patterns.rs +++ /dev/null @@ -1,233 +0,0 @@ -use crate::ast::*; -use crate::lexer::TokenType; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - pub fn parse_pattern(&mut self) -> ParseResult { - match self.peek_token_type() { - // Identifier pattern - Some(TokenType::Identifier(_)) | - Some(TokenType::Default) | - Some(TokenType::As) | - Some(TokenType::From) => { - let name = self.expect_identifier("Expected identifier in pattern")?; - Ok(Pattern::Identifier(name)) - }, - // Object pattern: { x, y } - Some(TokenType::LeftBrace) => { - self.advance(); // consume '{' - - let mut properties = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Rest element - let argument = self.parse_pattern()?; - properties.push(ObjectPatternProperty::Rest(Box::new(argument))); - - // Rest element must be the last one - if !self.check(&TokenType::RightBrace) { - return Err(self.error_unexpected("Rest element must be the last element in object pattern")); - } - break; - } else { - // Regular property - let key = self.parse_property_key()?; - - // Handle shorthand: { x } - let (value, computed, shorthand) = if !self.check(&TokenType::Colon) { - if let PropertyKey::Identifier(name) = &key { - // Shorthand property: { x } - let pattern = Pattern::Identifier(name.clone()); - - // Check for default value: { x = 1 } - if self.match_token(&TokenType::Equal) { - let default = self.parse_expression()?; - (Pattern::AssignmentPattern { - left: Box::new(pattern), - right: default, - }, false, true) - } else { - (pattern, false, true) - } - } else { - return Err(self.error_unexpected("Invalid shorthand property in object pattern")); - } - } else { - // Full syntax: { key: value } - self.advance(); // consume ':' - let pattern = self.parse_pattern()?; - - // Check for default value: { key: value = 1 } - if self.match_token(&TokenType::Equal) { - let default = self.parse_expression()?; - (Pattern::AssignmentPattern { - left: Box::new(pattern), - right: default, - }, matches!(key, PropertyKey::Computed(_)), false) - } else { - (pattern, matches!(key, PropertyKey::Computed(_)), false) - } - }; - - properties.push(ObjectPatternProperty::Property { - key, - value, - computed, - shorthand, - }); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; - } - } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after object pattern")?; - - Ok(Pattern::ObjectPattern(properties)) - }, - - // Array pattern: [x, y, z = 1] - Some(TokenType::LeftBracket) => { - self.advance(); // consume '[' - - let mut elements = Vec::new(); - - while !self.check(&TokenType::RightBracket) && !self.is_at_end() { - if self.match_token(&TokenType::Comma) { - // Elision (hole) - elements.push(None); - } else { - if self.match_token(&TokenType::Ellipsis) { - // Rest element - let argument = self.parse_pattern()?; - elements.push(Some(Pattern::RestElement(Box::new(argument)))); - - // Rest element must be the last one - if !self.check(&TokenType::RightBracket) { - if self.match_token(&TokenType::Comma) { - if !self.check(&TokenType::RightBracket) { - return Err(self.error_unexpected("Rest element must be the last element in array pattern")); - } - } else { - return Err(self.error_unexpected("Expected ',' or ']' after rest element in array pattern")); - } - } - break; - } else { - // Regular element - let pattern = self.parse_pattern()?; - - // Check for default value: [x = 1] - if self.match_token(&TokenType::Equal) { - let default = self.parse_expression()?; - elements.push(Some(Pattern::AssignmentPattern { - left: Box::new(pattern), - right: default, - })); - } else { - elements.push(Some(pattern)); - } - } - - if !self.check(&TokenType::RightBracket) { - self.consume(&TokenType::Comma, "Expected ',' after array pattern element")?; - } - } - } - - self.consume(&TokenType::RightBracket, "Expected ']' after array pattern")?; - - Ok(Pattern::ArrayPattern(elements)) - }, - - // Assignment pattern: x = 1 (handled by the caller) - - _ => { - Err(self.error_unexpected("Expected pattern")) - } - } - } - // Helper method to convert an expression to a pattern (for arrow function parameters) - pub fn expression_to_pattern(&self, expr: Expression) -> ParseResult { - match expr { - Expression::Identifier(name) => Ok(Pattern::Identifier(name)), - Expression::Object(props) => { - // Convert object expression to object pattern - let mut pattern_props = Vec::new(); - - for prop in props { - match prop { - ObjectProperty::Property { key, value, computed, shorthand, .. } => { - if let Expression::Identifier(name) = value { - pattern_props.push(ObjectPatternProperty::Property { - key, - value: Pattern::Identifier(name), - computed, - shorthand, - }); - } else { - return Err(self.error_unexpected("Invalid object pattern")); - } - }, - ObjectProperty::Spread(expr) => { - if let Expression::Identifier(name) = expr { - pattern_props.push(ObjectPatternProperty::Rest( - Box::new(Pattern::Identifier(name)) - )); - } else { - return Err(self.error_unexpected("Invalid rest pattern")); - } - }, - _ => return Err(self.error_unexpected("Invalid object pattern")), - } - } - - Ok(Pattern::ObjectPattern(pattern_props)) - }, - Expression::Array(elements) => { - // Convert array expression to array pattern - let mut pattern_elements = Vec::new(); - - for element in elements { - match element { - None => pattern_elements.push(None), - Some(ArrayElement::Expression(expr)) => { - if let Expression::Identifier(name) = expr { - pattern_elements.push(Some(Pattern::Identifier(name))); - } else { - return Err(self.error_unexpected("Invalid array pattern")); - } - }, - Some(ArrayElement::Spread(expr)) => { - if let Expression::Identifier(name) = expr { - pattern_elements.push(Some(Pattern::RestElement( - Box::new(Pattern::Identifier(name)) - ))); - } else { - return Err(self.error_unexpected("Invalid rest pattern")); - } - }, - Some(ArrayElement::Hole) => { - // Handle hole elements (like [,,,]) by adding None to the pattern elements - pattern_elements.push(None); - }, - } - } - - Ok(Pattern::ArrayPattern(pattern_elements)) - }, - _ => Err(self.error_unexpected("Invalid pattern")), - } - } -} diff --git a/src/parser/state.rs b/src/parser/state.rs deleted file mode 100644 index 8f14400..0000000 --- a/src/parser/state.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::collections::HashSet; - -pub struct ParserState { - pub in_strict_mode: bool, - pub allow_yield: bool, - pub allow_await: bool, - pub in_loop: bool, - pub in_switch: bool, - pub in_function: bool, - pub labels: HashSet>, -} - -impl ParserState { - pub fn new() -> Self { - Self { - in_strict_mode: false, - allow_yield: false, - allow_await: false, - in_loop: false, - in_switch: false, - in_function: false, - labels: HashSet::new(), - } - } -} \ No newline at end of file diff --git a/src/parser/statements.rs b/src/parser/statements.rs deleted file mode 100644 index 5911089..0000000 --- a/src/parser/statements.rs +++ /dev/null @@ -1,684 +0,0 @@ -use crate::ast::*; -use crate::lexer::TokenType; -use super::error::{ParserError, ParseResult}; -use super::core::Parser; - -impl Parser { - - pub fn parse_statement(&mut self) -> ParseResult { - match self.peek_token_type() { - // Empty statement (just a semicolon) - Some(TokenType::Semicolon) => { - self.advance(); - Ok(Statement::Empty) - }, - - // Block statement { ... } - Some(TokenType::LeftBrace) => self.parse_block(), - - // Declaration statements - Some(TokenType::Var) | Some(TokenType::Let) | Some(TokenType::Const) => - self.parse_variable_statement(), - Some(TokenType::Function) => - self.parse_function_statement(), - Some(TokenType::Class) => - self.parse_class_statement(), - - // Control flow statements - Some(TokenType::If) => self.parse_if(), - Some(TokenType::Switch) => self.parse_switch(), - Some(TokenType::For) => self.parse_for(), - Some(TokenType::While) => self.parse_while(), - Some(TokenType::Do) => self.parse_do_while(), - - // Exception handling - Some(TokenType::Try) => self.parse_try(), - Some(TokenType::Throw) => self.parse_throw(), - - // Function control - Some(TokenType::Return) => self.parse_return(), - Some(TokenType::Break) => self.parse_break(), - Some(TokenType::Continue) => self.parse_continue(), - - // Module statements - Some(TokenType::Import) => self.parse_import_statement(), - Some(TokenType::Export) => self.parse_export_statement(), - - // Other statements - Some(TokenType::With) => self.parse_with(), - Some(TokenType::Debugger) => self.parse_debugger(), - - // Labeled statement - Some(TokenType::Identifier(_)) if self.is_label() => self.parse_labeled(), - - // Default: expression statement - _ => self.parse_expression_statement(), - } - } - - /// Parse a block statement: { statements... } - fn parse_block(&mut self) -> ParseResult { - self.consume(&TokenType::LeftBrace, "Expected '{'")?; - - let mut statements = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - statements.push(self.parse_statement()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}'")?; - - Ok(Statement::Block(statements)) - } - - /// Parse variable declarations as a statement - fn parse_variable_statement(&mut self) -> ParseResult { - let declaration = self.parse_variable_declaration()?; - Ok(Statement::Declaration(Declaration::Variable(declaration))) - } - - /// Parse function declaration as a statement - fn parse_function_statement(&mut self) -> ParseResult { - let declaration = self.parse_function_declaration()?; - Ok(Statement::Declaration(Declaration::Function(declaration))) - } - - /// Parse class declaration as a statement - fn parse_class_statement(&mut self) -> ParseResult { - let declaration = self.parse_class_declaration()?; - Ok(Statement::Declaration(Declaration::Class(declaration))) - } - - /// Parse if statement: if (condition) consequent else alternate - fn parse_if(&mut self) -> ParseResult { - self.advance(); // consume 'if' - self.consume(&TokenType::LeftParen, "Expected '(' after 'if'")?; - - let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after if condition")?; - - let consequent = Box::new(self.parse_statement()?); - let alternate = self.match_token(&TokenType::Else) - .then(|| self.parse_statement().map(Box::new)) - .transpose()?; - - Ok(Statement::If { test, consequent, alternate }) - } - - /// Parse switch statement: switch (discriminant) { case/default... } - fn parse_switch(&mut self) -> ParseResult { - self.advance(); // consume 'switch' - self.consume(&TokenType::LeftParen, "Expected '(' after 'switch'")?; - - let discriminant = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after switch expression")?; - self.consume(&TokenType::LeftBrace, "Expected '{' to start switch block")?; - - // Save previous switch state - let prev_in_switch = self.state.in_switch; - self.state.in_switch = true; - - let mut cases = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - cases.push(self.parse_switch_case()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' to end switch block")?; - - // Restore previous switch state - self.state.in_switch = prev_in_switch; - - Ok(Statement::Switch { discriminant, cases }) - } - - /// Parse a single case in a switch statement - fn parse_switch_case(&mut self) -> ParseResult { - let test = if self.match_token(&TokenType::Case) { - Some(self.parse_expression()?) - } else if self.match_token(&TokenType::Default) { - None - } else { - return Err(self.error_unexpected("Expected 'case' or 'default'")); - }; - - self.consume(&TokenType::Colon, "Expected ':' after case value")?; - - let mut consequent = Vec::new(); - - // Parse statements until next case, default, or end of switch - while !self.check(&TokenType::Case) && - !self.check(&TokenType::Default) && - !self.check(&TokenType::RightBrace) && - !self.is_at_end() { - consequent.push(self.parse_statement()?); - } - - Ok(SwitchCase { test, consequent }) - } - - /// Parse while statement: while (test) statement - fn parse_while(&mut self) -> ParseResult { - self.advance(); // consume 'while' - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - let body = Box::new(self.parse_statement()?); - - // Restore previous loop state - self.state.in_loop = prev_in_loop; - - Ok(Statement::Loop(LoopStatement::While { test, body })) - } - - /// Parse do-while statement: do statement while (test); - fn parse_do_while(&mut self) -> ParseResult { - self.advance(); // consume 'do' - - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - let body = Box::new(self.parse_statement()?); - - // Restore previous loop state - self.state.in_loop = prev_in_loop; - - self.consume(&TokenType::While, "Expected 'while' after do block")?; - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - self.consume_semicolon("Expected ';' after do-while statement")?; - - Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) - } - - /// Parse try statement: try block [catch] [finally] - fn parse_try(&mut self) -> ParseResult { - self.advance(); // consume 'try' - - let block = Box::new(self.parse_block()?); - - // Parse optional catch clause - let handler = self.match_token(&TokenType::Catch) - .then(|| self.parse_catch_clause()) - .transpose()?; - - // Parse optional finally clause - let finalizer = self.match_token(&TokenType::Finally) - .then(|| self.parse_block().map(Box::new)) - .transpose()?; - - // Either catch or finally must be present - if handler.is_none() && finalizer.is_none() { - return Err(self.error_unexpected("Expected 'catch' or 'finally' after try block")); - } - - Ok(Statement::Try { block, handler, finalizer }) - } - - /// Parse catch clause: catch ([param]) block - fn parse_catch_clause(&mut self) -> ParseResult { - // Optional catch parameter - let param = self.match_token(&TokenType::LeftParen) - .then(|| { - let param = self.parse_pattern()?; - self.consume(&TokenType::RightParen, "Expected ')' after catch parameter")?; - // Explicitly specify the error type as ParserError - Ok::<_, super::error::ParserError>(param) - }) - .transpose()?; - - let body = Box::new(self.parse_block()?); - - Ok(CatchClause { param, body }) - } - - /// Parse throw statement: throw expression; - fn parse_throw(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'throw' - - // No line terminator allowed between throw and expression - if self.previous_line_terminator() { - return Err(ParserError::new("Illegal newline after throw", token.line, token.column)); - } - - let expr = self.parse_expression()?; - self.consume_semicolon("Expected ';' after throw statement")?; - - Ok(Statement::Throw(expr)) - } - - /// Parse return statement: return [expression]; - fn parse_return(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'return' - - // Check if we're in a function - if !self.state.in_function { - return Err(ParserError::new("'return' statement outside of function", token.line, token.column)); - } - - // Return with no value if semicolon or end of block - let argument = (!self.check(&TokenType::Semicolon) && - !self.check(&TokenType::RightBrace) && - !self.is_at_end() && - !self.previous_line_terminator()) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume_semicolon("Expected ';' after return statement")?; - - Ok(Statement::Return(argument)) - } - - /// Parse break statement: break [label]; - fn parse_break(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'break' - - // Check if we're in a loop or switch - if !self.state.in_loop && !self.state.in_switch { - return Err(ParserError::new("'break' statement outside of loop or switch", token.line, token.column)); - } - - // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(ParserError::new(&format!("Undefined label '{}'", label_name), token.line, token.column)); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume_semicolon("Expected ';' after break statement")?; - - Ok(Statement::Break(label)) - } - - /// Parse continue statement: continue [label]; - fn parse_continue(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'continue' - - // Check if we're in a loop - if !self.state.in_loop { - return Err(ParserError::new("'continue' statement outside of loop", token.line, token.column)); - } - - // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(ParserError::new(&format!("Undefined label '{}'", label_name), token.line, token.column)); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume_semicolon("Expected ';' after continue statement")?; - - Ok(Statement::Continue(label)) - } - - /// Parse with statement: with (object) statement - fn parse_with(&mut self) -> ParseResult { - self.advance(); // consume 'with' - - // Check if in strict mode - if self.state.in_strict_mode { - return Err(ParserError::new( - "'with' statements are not allowed in strict mode", - self.previous().unwrap().line, - self.previous().unwrap().column - )); - } - - self.consume(&TokenType::LeftParen, "Expected '(' after 'with'")?; - - let object = self.parse_expression()?; - - self.consume(&TokenType::RightParen, "Expected ')' after with expression")?; - - let body = Box::new(self.parse_statement()?); - - Ok(Statement::With { object, body }) - } - - /// Parse debugger statement: debugger; - fn parse_debugger(&mut self) -> ParseResult { - self.advance(); // consume 'debugger' - - self.consume_semicolon("Expected ';' after debugger statement")?; - - Ok(Statement::Debugger) - } - - /// Parse labeled statement: identifier: statement - fn parse_labeled(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); - let label = self.identifier_name(&token)?; - - self.consume(&TokenType::Colon, "Expected ':' after label")?; - - // Add label to the set of active labels - let label_exists = !self.state.labels.insert(label.clone()); - if label_exists { - return Err(ParserError::new( - &format!("Label '{}' has already been declared", label), - token.line, - token.column - )); - } - - // Parse the labeled statement - let body = Box::new(self.parse_statement()?); - - // Remove the label from the set - self.state.labels.remove(&label); - - Ok(Statement::Labeled { label, body }) - } - - /// Check if the current token is a label - fn is_label(&self) -> bool { - // Check if the current token is an identifier and the next token is a colon - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - if let Some(next_token) = self.tokens.get(self.current + 1) { - return matches!(next_token.token_type, TokenType::Colon); - } - } - false - } - - /// Parse expression statement: expression; - pub fn parse_expression_statement(&mut self) -> ParseResult { - // Handle directives (like "use strict") - let start_pos = self.current; - - let expr = self.parse_expression()?; - - // Check for directive prologue - let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { - // Only consider as directive if it's at the beginning of a function/program - // and is a simple string literal (not an expression) - start_pos == 0 || self.previous().unwrap().token_type == TokenType::LeftBrace - } else { - false - }; - - self.consume_semicolon("Expected ';' after expression")?; - - // If this is a "use strict" directive, update parser state - if is_directive { - if let Expression::Literal(Literal::String(value)) = &expr { - if value.as_ref() == "use strict" { - self.state.in_strict_mode = true; - } - } - } - - Ok(Statement::Expression(expr)) - } - - /// Parse for statement: for ([init]; [test]; [update]) statement - fn parse_for(&mut self) -> ParseResult { - self.advance(); // consume 'for' - - // Check for for-await-of - let is_await = self.match_token(&TokenType::Await); - - self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?; - - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - // Parse initialization - let result = if self.match_token(&TokenType::Semicolon) { - // No initialization - standard for loop with empty init - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: None, - test, - update, - body - } - } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { - // Variable declaration initialization - let decl = self.parse_variable_declaration()?; - - // Check for for-in or for-of - if self.check(&TokenType::In) { - // for-in loop with variable declaration - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForIn { - left: ForInOfLeft::Declaration(decl), - right, - body - } - } else if self.check(&TokenType::Of) { - // for-of loop with variable declaration - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForOf { - left: ForInOfLeft::Declaration(decl), - right, - body, - is_await - } - } else { - // Standard for loop with variable declaration - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: Some(ForInit::Variable(decl)), - test, - update, - body - } - } - } else if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - // For identifiers, we need to check if they're followed by 'in', 'of', or other tokens - // that would indicate different types of for loops - - // First, check if the next tokens form a for-in or for-of loop - // Save current position to backtrack if needed - let start_pos = self.current; - - // Parse the identifier - let token = self.advance().unwrap().clone(); - let name = self.identifier_name(&token)?; - let left = Expression::Identifier(name); - - // Check what follows the identifier - if self.check(&TokenType::In) { - // for-in loop with identifier - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForIn { - left: ForInOfLeft::Pattern(left), - right, - body - } - } else if self.check(&TokenType::Of) { - // for-of loop with identifier - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForOf { - left: ForInOfLeft::Pattern(left), - right, - body, - is_await - } - } else { - // Not a for-in or for-of loop, so it must be a standard for loop - // Reset position and parse the full initialization expression - self.current = start_pos; - - // Parse the initialization expression - let init_expr = self.parse_expression()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body - } - } - } else { - // For other expressions (including array/object literals and complex expressions) - // Parse the full initialization expression - let init_expr = self.parse_expression()?; - - // Check if this is a for-in or for-of loop - if self.check(&TokenType::In) { - // for-in loop with expression - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForIn { - left: ForInOfLeft::Pattern(init_expr), - right, - body - } - } else if self.check(&TokenType::Of) { - // for-of loop with expression - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForOf { - left: ForInOfLeft::Pattern(init_expr), - right, - body, - is_await - } - } else { - // Standard for loop with expression initialization - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body - } - } - }; - - // Restore previous loop state - self.state.in_loop = prev_in_loop; - - Ok(Statement::Loop(result)) - } - -} diff --git a/src/parser/stream.rs b/src/parser/stream.rs new file mode 100644 index 0000000..fa3138f --- /dev/null +++ b/src/parser/stream.rs @@ -0,0 +1,121 @@ +use crate::lexer::Token; + +pub struct TokenStream<'a> { + tokens: &'a [(Token, [usize; 2])], + current: usize, + source: &'a str, +} + +impl<'a> TokenStream<'a> { + pub fn new(tokens: &'a [(Token, [usize; 2])]) -> Self { + Self { + tokens, + current: 0, + source: "", + } + } + + pub fn attach_source(&mut self, source: &'a str) { + self.source = source; + } + + pub fn get_source_text(&self) -> &str { + self.source + } + + pub fn is_at_end(&self) -> bool { + matches!(self.peek(), Token::EOS) + } + + pub fn peek(&self) -> &Token { + &self.tokens[self.current].0 + } + + pub fn peek_position(&self) -> [usize; 2] { + self.tokens[self.current].1 + } + + pub fn peek_previous(&self) -> &Token { + if self.current > 0 { + &self.tokens[self.current - 1].0 + } else { + &Token::EOS + } + } + + pub fn peek_next(&self, offset: usize) -> &Token { + let index = self.current + offset; + if index < self.tokens.len() { + &self.tokens[index].0 + } else { + &Token::EOS + } + } + + pub fn advance(&mut self) -> bool { + if self.current < self.tokens.len() { + self.current += 1; + true + } else { + false + } + } + + pub fn check(&self, token_type: &Token) -> bool { + &self.tokens[self.current].0 == token_type + } + + pub fn consume(&mut self, token_type: &Token) -> bool { + if self.current < self.tokens.len() && &self.tokens[self.current].0 == token_type { + self.current += 1; + true + } else { + false + } + } + + pub fn previous_line_terminator(&self) -> bool { + if self.current > 0 && self.current < self.tokens.len() { + let prev_line = self.tokens[self.current - 1].1[0]; + let curr_line = self.tokens[self.current].1[0]; + prev_line < curr_line + } else { + false + } + } + + pub fn save_position(&self) -> usize { + self.current + } + + pub fn restore_position(&mut self, position: usize) { + self.current = position; + } + + pub fn get_token_stack_info(&self) -> Vec { + + let count = 10; + + let mut history = Vec::with_capacity(count); + + // Start from the current position and go backwards + let start_pos = if self.current >= count - 1 { + self.current - (count - 1) + } else { + 0 + }; + + // Add tokens from start_pos up to and including the current position + for i in start_pos..=self.current { + if i < self.tokens.len() { + history.push(self.tokens[i].0.clone()); + } + } + + history + .iter() + .rev() + .map(|token| format!("{}", token.to_string())) + .collect() + } +} diff --git a/src/unparser/combinator.rs b/src/unparser/combinator.rs new file mode 100644 index 0000000..473aa3e --- /dev/null +++ b/src/unparser/combinator.rs @@ -0,0 +1,5 @@ +use super::unparser::Unparser; + +pub trait UnparserCombinator { + fn unparse(&self, unparser: &mut Unparser, node: &T); +} diff --git a/src/unparser/formatter.rs b/src/unparser/formatter.rs new file mode 100644 index 0000000..a6603c6 --- /dev/null +++ b/src/unparser/formatter.rs @@ -0,0 +1,109 @@ +use std::fmt::Write; + +#[derive(Debug, Clone)] +pub enum FormatStyle { + Compact, + Pretty { + indent_size: usize, + }, +} + +pub struct Formatter { + buffer: String, + style: FormatStyle, + current_indent: usize, + line_start: bool, +} + +impl Formatter { + pub fn new(style: FormatStyle) -> Self { + Self { + buffer: String::new(), + style, + current_indent: 0, + line_start: true, + } + } + + pub fn write_str(&mut self, s: &str) { + if self.line_start { + match &self.style { + FormatStyle::Pretty { indent_size } => { + for _ in 0..self.current_indent * indent_size { + self.buffer.push(' '); + } + }, + FormatStyle::Compact => {} + } + self.line_start = false; + } + self.buffer.push_str(s); + } + + pub fn write_char(&mut self, c: char) { + if self.line_start { + match &self.style { + FormatStyle::Pretty { indent_size } => { + for _ in 0..self.current_indent * indent_size { + self.buffer.push(' '); + } + }, + FormatStyle::Compact => {} + } + self.line_start = false; + } + self.buffer.push(c); + } + + pub fn newline(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.buffer.push('\n'); + self.line_start = true; + }, + FormatStyle::Compact => { + } + } + } + + pub fn space(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.buffer.push(' '); + }, + FormatStyle::Compact => {} + } + } + + pub fn undefined(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.write_str("undefined"); + }, + FormatStyle::Compact => { + self.write_str("void 0"); + } + } + } + + pub fn indent(&mut self) { + self.current_indent += 1; + } + + pub fn dedent(&mut self) { + if self.current_indent > 0 { + self.current_indent -= 1; + } + } + + pub fn as_str(&self) -> &str { + &self.buffer + } +} + +impl Write for Formatter { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.write_str(s); + Ok(()) + } +} diff --git a/src/unparser/mod.rs b/src/unparser/mod.rs new file mode 100644 index 0000000..dca0ef3 --- /dev/null +++ b/src/unparser/mod.rs @@ -0,0 +1,7 @@ +mod formatter; +mod unparser; +mod combinator; + +pub use self::unparser::Unparser; +pub use self::combinator::UnparserCombinator; +pub use self::formatter::FormatStyle; \ No newline at end of file diff --git a/src/unparser/unparser.rs b/src/unparser/unparser.rs new file mode 100644 index 0000000..a2bbdf1 --- /dev/null +++ b/src/unparser/unparser.rs @@ -0,0 +1,79 @@ +use crate::ast::*; +use super::formatter::{Formatter, FormatStyle}; +use super::combinator::UnparserCombinator; + +use crate::grammar::*; + +pub struct Unparser { + formatter: Formatter, +} + +impl Unparser { + pub fn new(style: FormatStyle) -> Self { + Self { + formatter: Formatter::new(style), + } + } + + // Main unparse methods + pub fn unparse_module(&mut self, program: &Program) -> &str { + // TODO generic + ModuleNode::new().unparse(self, program); + self.formatter.as_str() + } + + pub fn unparse_script(&mut self, program: &Program) -> &str { + ScriptNode::new().unparse(self, program); + self.formatter.as_str() + } + + /* + pub fn unparse_expression(&mut self, expr: &Expression) -> String { + ExpressionNode::new().unparse(self, expr); + self.formatter.into_string() + } + + pub fn unparse_statement(&mut self, stmt: &Statement) -> String { + StatementNode::new().unparse(self, stmt); + self.formatter.into_string() + } + */ + + // Formatter delegations + pub fn write_str(&mut self, s: &str) { + self.formatter.write_str(s); + } + + pub fn write_char(&mut self, c: char) { + self.formatter.write_char(c); + } + + pub fn newline(&mut self) { + self.formatter.newline(); + } + + pub fn undefined(&mut self) { + self.formatter.undefined(); + } + + pub fn space(&mut self) { + self.formatter.space(); + } + + pub fn indent(&mut self) { + self.formatter.indent(); + } + + pub fn dedent(&mut self) { + self.formatter.dedent(); + } + + pub fn with_indent(&mut self, f: F) + where + F: FnOnce(&mut Self) + { + self.indent(); + f(self); + self.dedent(); + } +}