From 7f2e0e110a04dbaf0df1c0a46e38dab78dafd1d6 Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Sun, 4 May 2025 22:45:12 +0200 Subject: [PATCH 1/7] more robust arrow functions and expressions parsing --- examples/tricky/index.js | 21 +++- src/main.rs | 1 + src/parser/classes.rs | 4 +- src/parser/core.rs | 166 ++++++++++++++++++------- src/parser/declarations.rs | 2 +- src/parser/error.rs | 127 ++++++++++++++++++- src/parser/expressions.rs | 246 +++++++++++++++++++++++++++++-------- src/parser/functions.rs | 94 +++++--------- src/parser/modules.rs | 38 +++--- src/parser/statements.rs | 105 ++++++++++++---- 10 files changed, 599 insertions(+), 205 deletions(-) diff --git a/examples/tricky/index.js b/examples/tricky/index.js index 7b953c5..ab5d30f 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -1,3 +1,18 @@ -({ - FETCH_CONTEXT: () => (/* binding */ FETCH_CONTEXT), -}); \ No newline at end of file +//OK +//(x=>3); +//x=>3; +//[x=>3]; +//x=>({}); +//(x=>({})); +//{ x: y => 3 }; +//({ x: y => 3 }); +//x=>{}; +//(x,)=>3; + +// ERROR +//((x)=>3); +//(x=>3)(3); +//((x)=>3)(3); +//(x)=>({}); +//((x)=>({})); +//(x)=>{}; diff --git a/src/main.rs b/src/main.rs index 6609b39..61973dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -42,6 +42,7 @@ fn run(file: &str) -> Result<(), Box> where st let tokens = lexer.scan_tokens()?; let mut parser = Parser::new(tokens); + parser.attach_source(&source); let ast = parser.parse()?; println!("AST: {:#?}", ast); diff --git a/src/parser/classes.rs b/src/parser/classes.rs index 2a2446c..95bd5cf 100644 --- a/src/parser/classes.rs +++ b/src/parser/classes.rs @@ -119,8 +119,8 @@ impl Parser { let value = self.match_token(&TokenType::Equal) .then(|| self.parse_expression()) .transpose()?; - - self.consume_semicolon("Expected ';' after class field")?; + + self.consume(&TokenType::Semicolon, "Expected ';' after class field")?; Ok(ClassMember::Property { key, diff --git a/src/parser/core.rs b/src/parser/core.rs index 89e4466..0bdf556 100644 --- a/src/parser/core.rs +++ b/src/parser/core.rs @@ -4,23 +4,33 @@ use super::error::{ParserError, ParseResult}; use super::state::ParserState; use std::collections::HashSet; + pub struct Parser { pub tokens: Vec, pub current: usize, pub comments: Vec, pub state: ParserState, + pub source: Option, } impl Parser { + pub fn new(tokens: Vec) -> Self { Parser { tokens, current: 0, comments: Vec::new(), state: ParserState::new(), + source: None, } } + + // Method to attach source code to an existing parser + pub fn attach_source(&mut self, source: &str) { + self.source = Some(source.to_string()); + } + // Token navigation methods pub fn is_at_end(&self) -> bool { self.current >= self.tokens.len() || matches!(self.peek_token_type(), Some(TokenType::EOF)) @@ -75,14 +85,56 @@ impl Parser { false } - pub fn consume(&mut self, token_type: &TokenType, message: &str) -> ParseResult<&Token> { - if self.check(token_type) { - Ok(self.advance().unwrap()) - } else { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - Err(ParserError::new(message, token.line, token.column)) + pub fn consume(&mut self, token_type: &TokenType, message: &str) -> ParseResult { + thread_local! { + static DUMMY_TOKEN: Token = Token::new(TokenType::EOF, 0, 0, 0); } + + if token_type == &TokenType::Semicolon { + if self.match_token(&TokenType::Semicolon) { + return Ok(self.previous().unwrap().clone()); + } + + // ASI rules: insert semicolon if + // 1. The current token is on a new line from the previous token + // 2. The current token is a closing brace + // 3. We've reached the end of input + if self.previous_line_terminator() || + self.check(&TokenType::RightBrace) || + self.is_at_end() { + if let Some(prev) = self.previous() { + return Ok(prev.clone()); + } else { + return DUMMY_TOKEN.with(|token| Ok(token.clone())); + } + } + } else if self.check(token_type) { + return Ok(self.advance().unwrap().clone()); + } + + let token = if let Some(t) = self.peek_token() { + t + } else if let Some(t) = self.previous() { + t + } else { + return DUMMY_TOKEN.with(|token| { + Err(ParserError::with_token_span( + message, + token.line, + token.column, + token.length, + &self.get_source_text() + )) + }); + }; + + Err(ParserError::with_token_span( + message, + token.line, + token.column, + token.length, + &self.get_source_text() + )) } pub fn previous_line_terminator(&self) -> bool { @@ -94,33 +146,17 @@ impl Parser { false } - pub fn consume_semicolon(&mut self, message: &str) -> ParseResult<()> { - // Handle automatic semicolon insertion (ASI) - if self.match_token(&TokenType::Semicolon) { - return Ok(()); - } - - // ASI rules: insert semicolon if - // 1. The current token is on a new line from the previous token - // 2. The current token is a closing brace - // 3. We've reached the end of input - if self.previous_line_terminator() || - self.check(&TokenType::RightBrace) || - self.is_at_end() { - return Ok(()); - } - - // Otherwise, it's an error - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - Err(ParserError::new(message, token.line, token.column)) - } - // TODO delete pub fn identifier_name(&self, token: &Token) -> ParseResult> { match &token.token_type { TokenType::Identifier(name) => Ok(name.clone().into_boxed_str()), - _ => Err(ParserError::new("Expected identifier", token.line, token.column)), + _ => Err(ParserError::with_token_span( + "Expected identifier", + token.line, + token.column, + token.length, + &self.get_source_text() + )), } } @@ -133,7 +169,13 @@ impl Parser { Some(t) => t, None => { let last = self.previous().unwrap_or(&binding); - return Err(ParserError::new(message, last.line, last.column)); + return Err(ParserError::with_token_span( + message, + last.line, + last.column, + last.length, + &self.get_source_text() + )); } }; @@ -147,10 +189,12 @@ impl Parser { TokenType::Class => Ok("class".into()), TokenType::Get => Ok("get".into()), TokenType::Set => Ok("set".into()), - _ => Err(ParserError::new( + _ => Err(ParserError::with_token_span( &format!("Expected identifier, found {:?}", token.token_type), token.line, - token.column + token.column, + token.length, + &self.get_source_text() )), } } @@ -165,7 +209,17 @@ impl Parser { None => &binding } }; - ParserError::new(message, token.line, token.column) + ParserError::with_token_span( + message, + token.line, + token.column, + token.length, + &self.get_source_text() + ) + } + + pub fn get_source_text(&self) -> String { + self.source.clone().unwrap_or_default() } // Main parse methods @@ -203,7 +257,13 @@ impl Parser { // Ensure we've consumed all tokens if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { let token = self.peek_token().unwrap(); - return Err(ParserError::new("Unexpected token after statement", token.line, token.column)); + return Err(ParserError::with_token_span( + "Unexpected token after statement", + token.line, + token.column, + token.length, + &self.get_source_text() + )); } Ok(stmt) @@ -211,7 +271,13 @@ impl Parser { pub fn parse_single_expression(&mut self) -> ParseResult { if self.tokens.is_empty() { - return Err(ParserError::new("Empty input", 0, 0)); + return Err(ParserError::with_token_span( + "Empty input", + 0, + 0, + 0, + &self.get_source_text() + )); } let expr = self.parse_expression()?; @@ -219,7 +285,13 @@ impl Parser { // Ensure we've consumed all tokens if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { let token = self.peek_token().unwrap(); - return Err(ParserError::new("Unexpected token after expression", token.line, token.column)); + return Err(ParserError::with_token_span( + "Unexpected token after expression", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } Ok(expr) @@ -241,10 +313,12 @@ impl Parser { if reserved_words.contains(&word) { let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("'{}' is a reserved word in strict mode", word), token.line, token.column, + token.length, + &self.get_source_text(), )); } } @@ -257,10 +331,12 @@ impl Parser { if self.state.in_strict_mode { if name == "eval" || name == "arguments" { let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("'{}' cannot be used as a variable name in strict mode", name), token.line, token.column, + token.length, + &self.get_source_text(), )); } } @@ -275,18 +351,22 @@ impl Parser { for param in params { if let Pattern::Identifier(name) = param { if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("'{}' cannot be used as a parameter name in strict mode", name), self.previous().unwrap().line, self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text(), )); } if !seen_params.insert(name.clone()) { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("Duplicate parameter name '{}'", name), self.previous().unwrap().line, self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text(), )); } } @@ -298,10 +378,12 @@ impl Parser { // Helper method to handle octal literals in strict mode pub fn validate_octal_literal(&self, value: &str) -> ParseResult<()> { if self.state.in_strict_mode && value.starts_with('0') && !value.starts_with("0x") && !value.starts_with("0b") && !value.starts_with("0o") { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Octal literals are not allowed in strict mode", self.previous().unwrap().line, self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text(), )); } diff --git a/src/parser/declarations.rs b/src/parser/declarations.rs index 88cb8ac..6347cdc 100644 --- a/src/parser/declarations.rs +++ b/src/parser/declarations.rs @@ -26,7 +26,7 @@ impl Parser { // Consume semicolon unless we're in a for-in/of loop if !self.state.in_loop { - self.consume_semicolon("Expected ';' after variable declaration")?; + self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; } Ok(VariableDeclaration { declarations, kind }) diff --git a/src/parser/error.rs b/src/parser/error.rs index dc301e2..c28a348 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,12 +1,13 @@ use crate::lexer::LexerError; use std::fmt; -/// Represents an error that occurred during parsing #[derive(Debug, Clone)] pub struct ParserError { pub message: String, pub line: usize, pub column: usize, + pub source_line: Option, + pub source_span: Option<(usize, usize)>, } impl ParserError { @@ -15,16 +16,88 @@ impl ParserError { message: message.to_string(), line, column, + source_line: None, + source_span: None, } } + + pub fn with_token_span(message: &str, line: usize, column: usize, token_length: usize, source: &str) -> Self { + // Extract just the relevant line with limited context + let source_line = extract_source_line_with_context(source, line, column, 60); + let span_end = column + token_length; + + // Adjust column if we've added ellipsis at the start + let (adjusted_column, adjusted_span_end) = if source_line.starts_with("...") { + (column.min(60) + 3, span_end.min(60) + 3) + } else { + (column, span_end) + }; + + ParserError { + message: message.to_string(), + line, + column, + source_line: Some(source_line), + source_span: Some((adjusted_column, adjusted_span_end)), + } + } + } impl fmt::Display for ParserError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ParserError at line {}, column {}: {}", self.line, self.column, self.message) + writeln!(f, "error: {}", self.message)?; + + if let Some(source_line) = &self.source_line { + // Calculate width needed for line numbers + let line_width = num_digits(self.line); + + // Show location information + writeln!(f, " --> line {}, column {}", self.line, self.column)?; + writeln!(f, "{:width$}|", "", width = line_width + 1)?; + + // Show the error line with context + writeln!(f, "{:>width$} | {}", self.line, source_line, width = line_width)?; + + // Print the error indicator + let (start, end) = self.source_span.unwrap_or((self.column, self.column + 1)); + write!(f, "{:width$} | ", "", width = line_width)?; + + // Print spaces up to the start position + for _ in 0..start { + write!(f, " ")?; + } + + // Print carets for the span length + for _ in start..end.max(start+1) { + write!(f, "^")?; + } + + writeln!(f)?; + } else { + writeln!(f, "at line {}, column {}", self.line, self.column)?; + } + + Ok(()) + } +} + +/// Helper function to calculate the number of digits in a number +#[inline] +fn num_digits(n: usize) -> usize { + if n == 0 { + return 1; + } + let mut count = 0; + let mut num = n; + while num > 0 { + count += 1; + num /= 10; } + count } + impl std::error::Error for ParserError {} impl From for ParserError { @@ -33,9 +106,57 @@ impl From for ParserError { message: error.message, line: error.line, column: error.column, + source_line: None, + source_span: None, } } } +/// Extract a specific line from source code with limited context around the error position +#[inline] +fn extract_source_line_with_context(source: &str, line_number: usize, column: usize, context_size: usize) -> String { + let line = source.lines() + .nth(line_number - 1) + .unwrap_or(""); + + if line.len() <= context_size * 2 { + // Line is short enough to show in full + return line.to_string(); + } + + // Calculate start and end positions with context + let start = if column > context_size { + column - context_size + } else { + 0 + }; + + let end = if column + context_size < line.len() { + column + context_size + } else { + line.len() + }; + + // Create the context string with ellipses as needed + let mut result = String::with_capacity(context_size * 2 + 6); // +6 for possible ellipses + + if start > 0 { + result.push_str("..."); + } + + // Get the substring with proper UTF-8 character boundaries + let context_str = line.chars() + .skip(start) + .take(end - start) + .collect::(); + result.push_str(&context_str); + + if end < line.len() { + result.push_str("..."); + } + + result +} + /// Type alias for parser results -pub type ParseResult = Result; \ No newline at end of file +pub type ParseResult = Result; diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index f402c2f..68bc8d1 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -30,6 +30,33 @@ pub enum Precedence { impl Parser { pub fn parse_expression(&mut self) -> ParseResult { + +// match self.parse_expression_with_precedence(Precedence::Comma) { +// Ok(expr) => { +// +// match expr { +// Expression::ArrowFunction { params: _, body: _, is_async: _ } if self.check(&TokenType::RightParen) => { +// self.advance(); +// //println!("Parsed Arrow Function!!!"); +// }, +// _ => {}, +// }; +// +// println!("Parsed expression: {:#?}", expr); +// Ok(expr) +// }, +// err => err +// } + + //let expr =; + + //if + //if let Expression::ArrowFunction = &expr { + // println!("Consumed arrow function"); + //} + + //expr + self.parse_expression_with_precedence(Precedence::Comma) } @@ -158,6 +185,7 @@ impl Parser { if let TokenType::TemplateLiteral(parts) = self.advance().unwrap().token_type.clone() { let token_line = self.previous().unwrap().line; let token_column = self.previous().unwrap().column; + let token_length = self.previous().unwrap().length; let mut quasis = Vec::new(); let mut expressions = Vec::new(); @@ -184,19 +212,23 @@ impl Parser { match temp_parser.parse_expression() { Ok(expr) => expressions.push(expr), Err(e) => { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("Invalid expression in template literal: {}", e.message), token_line, - token_column + token_column, + token_length, + &self.get_source_text() )); } } }, Err(e) => { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("Error tokenizing expression in template literal: {}", e.message), token_line, - token_column + token_column, + token_length, + &self.get_source_text() )); } } @@ -215,14 +247,16 @@ impl Parser { if quasis.len() == expressions.len() { quasis.push("".into()); } else { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!( "Invalid template literal: expected {} quasis but got {}", expressions.len() + 1, - quasis.len() + quasis.len(), ), token_line, - token_column + token_column, + token_length, + &self.get_source_text() )); } } @@ -249,45 +283,133 @@ impl Parser { Expression::Identifier(name) }, Some(TokenType::LeftParen) => { + + // TODO tricky tricky + self.advance(); // consume '(' - let start_pos = self.current; - let is_arrow = self.is_arrow_function_parameters(); + println!("In ("); - if is_arrow { - self.current = start_pos; - let params = if self.check(&TokenType::RightParen) { - self.advance(); - vec![] - } else { - let mut params = vec![]; - loop { - if self.match_token(&TokenType::Ellipsis) { - let arg = self.parse_pattern()?; - params.push(Pattern::RestElement(Box::new(arg))); - break; - } else { + match self.parse_expression() { + Ok(expr) => { + println!("Parsed expr {:#?}", expr); + println!("Current token {:#?}", self.peek_token_type()); + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + return Ok(expr); + }, + Err(err) => { + println!("Now I go here"); + let mut consumed_paren = false; + let start_pos = self.current; + + + + //println!("Before check is arrow"); + + let (is_arrow, arrow_consumed_right_paren) = self.is_arrow_function_parameters(); + + //println!("After check is arrow"); + + if !is_arrow { + println!("Not arrow at all"); + return Err(err); + } + + //println!("In Arrow Function"); + + //println!("At token {:#?}", self.peek_token_type()); + + self.current = start_pos; + + let params = if self.match_token(&TokenType::RightParen) { + vec![] + } else { + let mut params = vec![]; + loop { + if self.match_token(&TokenType::Ellipsis) { + //println!("found ... in parameters"); + let arg = self.parse_pattern()?; + params.push(Pattern::RestElement(Box::new(arg))); + self.advance(); + break; + } + + if self.match_token(&TokenType::RightParen) { + //self.advance(); + break; + } + + //println!("found identifier in parameters"); params.push(self.parse_pattern()?); + + // if self.match_token(&TokenType::RightParen) { + // println!("found ) in parameters"); + // //consumed_paren = true; + // //self.advance(); + // break; + // } + + if !self.match_token(&TokenType::Comma) { + if self.match_token(&TokenType::RightParen) { + break; + } + // println!("Not comma bailing"); + break; + + } + + + //println!("At end of parameters"); + } - if !self.match_token(&TokenType::Comma) { - break; - } - if self.match_token(&TokenType::RightParen) { - break; - } - } - self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - params - }; - self.consume(&TokenType::Arrow, "Expected '=>' after parameters")?; - let body = self.parse_arrow_function_body(params, false)?; - return Ok(body); - } else { - let expr = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - expr + //println!("Am here"); + //self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; + //println!("Am there"); + params + }; + + self.consume(&TokenType::Arrow, "Expected '=>' after parameters")?; + + //println!("Currently before parsing body {:#?}", self.peek_token_type()); + let body = self.parse_arrow_function_body(params, false)?; + //println!("Currently after parsing body {:#?}", self.peek_token_type()); + + //if !arrow_consumed_right_paren && self.check(&TokenType::RightParen) { + // self.advance(); + //} + + //println!("Currently before closing ) {:#?}", self.peek_token_type()); + +// if self.match_token(&TokenType::LeftParen) { +// let arguments = self.parse_arguments()?; +// body = Expression::Call { +// callee: Box::new(body), +// arguments, +// optional: false, +// }; +// } + + + //println!("Currently immedietaly invoked {:#?}", self.peek_token_type()); + + //self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; + //let params = if self.check(&TokenType::RightParen) {} + // TODO ) not consumed + + //println!("Here all done for ("); + // if self.check(&TokenType::RightParen) { + // self.advance(); + // println!("Consuming dangling )"); + // } + return Ok(body); + + + }, } + + //println!("At expr {:#?}", expr); + }, Some(TokenType::LeftBracket) => { self.advance(); // consume '[' @@ -487,11 +609,23 @@ impl Parser { } } else { let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::new("Expected 'target' after 'new.'", token.line, token.column)); + return Err(super::error::ParserError::with_token_span( + "Expected 'target' after 'new.'", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } } else { let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::new("Expected 'target' after 'new.'", token.line, token.column)); + return Err(super::error::ParserError::with_token_span( + "Expected 'target' after 'new.'", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } } else { // Regular new expression @@ -552,10 +686,12 @@ impl Parser { Some(TokenType::Async) if self.is_async_function() => self.parse_async_function_expression()?, _ => { let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap()); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("Unexpected token in expression: {:?}", token.token_type), token.line, - token.column + token.column, + token.length, + &self.get_source_text() )); } }; @@ -630,10 +766,12 @@ impl Parser { if self.match_any(&[TokenType::PlusPlus, TokenType::MinusMinus]) { if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. }) { let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Invalid left-hand side in postfix operation", token.line, - token.column + token.column, + token.length, + &self.get_source_text() )); } @@ -701,10 +839,12 @@ impl Parser { if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. } | Expression::Array(_) | Expression::Object(_)) { let binding = Token::new(TokenType::EOF, 0, 0, 0); let token = self.previous().unwrap_or(&binding); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Invalid left-hand side in assignment", token.line, - token.column + token.column, + token.length, + &self.get_source_text() )); } @@ -826,10 +966,12 @@ impl Parser { TokenType::InstanceOf => BinaryOperator::InstanceOf, _ => { let token = self.previous().unwrap(); - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( &format!("Unexpected token: {:?}", token_type), token.line, - token.column + token.column, + token.length, + &self.get_source_text() )); } }; @@ -892,10 +1034,12 @@ impl Parser { self.advance(); "for".into() } else { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Expected property name 3", self.peek_token().unwrap().line, - self.peek_token().unwrap().column + self.peek_token().unwrap().column, + self.peek_token().unwrap().length, + &self.get_source_text() )); }; diff --git a/src/parser/functions.rs b/src/parser/functions.rs index a2ea818..f6d60c1 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -212,96 +212,64 @@ impl Parser { false } - pub fn is_arrow_function_parameters(&mut self) -> bool { - // Save current position + pub fn is_arrow_function_parameters(&mut self) -> (bool, bool) { let start_pos = self.current; - // Check for a single parameter without parentheses (like y => 2) if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); // consume identifier + self.advance(); let is_arrow = self.check(&TokenType::Arrow); self.current = start_pos; if is_arrow { - return true; + return (true, false); } } - - // Check for spread operator at the beginning (like (...e) => {}) - if self.check(&TokenType::Ellipsis) { - self.advance(); // consume '...' - - // We need an identifier after the spread + + if self.match_token(&TokenType::Ellipsis) { if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); // consume identifier - - // Check for right parenthesis and then arrow + self.advance(); if self.match_token(&TokenType::RightParen) && self.check(&TokenType::Arrow) { self.current = start_pos; - return true; + return (true, true); } } - - // Reset position if not an arrow function self.current = start_pos; } - - // Empty parameter list - no need to check for left parenthesis, it's already consumed + if self.match_token(&TokenType::RightParen) { - // Check for arrow let is_arrow = self.check(&TokenType::Arrow); self.current = start_pos; - return is_arrow; + return (is_arrow, true); } - // Try to parse a parameter list - let mut has_rest = false; - + let mut has_close_paren = false; + loop { - if has_rest { - // Rest parameter must be the last one - self.current = start_pos; - return false; - } - - if self.match_token(&TokenType::Ellipsis) { - has_rest = true; - - // We need an identifier after the spread - if !matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { - self.current = start_pos; - return false; - } - } - - // Skip the parameter - if let Some(token_type) = self.peek_token_type() { - if matches!(token_type, TokenType::Identifier(_)) || - token_type == &TokenType::LeftBrace || - token_type == &TokenType::LeftBracket { - self.advance(); - } else { + match self.parse_pattern() { + Ok(arg) => { + continue; + }, + Err(_) if self.match_token(&TokenType::Comma) => { + continue; + }, + Err(_) => if self.match_token(&TokenType::RightParen) { + has_close_paren = true; + break; + }, + Err(_) => { self.current = start_pos; - return false; - } - } else { - self.current = start_pos; - return false; + return (false, has_close_paren) + }, } + break; + } - if self.match_token(&TokenType::RightParen) { - break; - } - - if !self.match_token(&TokenType::Comma) { - self.current = start_pos; - return false; - } + if self.match_token(&TokenType::RightParen) { + has_close_paren = true; } - - // Check for arrow + let is_arrow = self.check(&TokenType::Arrow); self.current = start_pos; - is_arrow + return (is_arrow, has_close_paren) } } diff --git a/src/parser/modules.rs b/src/parser/modules.rs index 54537be..b209379 100644 --- a/src/parser/modules.rs +++ b/src/parser/modules.rs @@ -101,7 +101,7 @@ impl Parser { Vec::new() }; - self.consume_semicolon("Expected ';' after import statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after import statement")?; if let Some(src) = source { Ok(Statement::Import { @@ -237,10 +237,12 @@ impl Parser { } // If we get here, it's an invalid export statement - Err(super::error::ParserError::new( + Err(super::error::ParserError::with_token_span( "Invalid export statement. Expected '*', default, declaration, or named exports", start_token.line, - start_token.column + start_token.column, + start_token.length, + &self.get_source_text() )) } @@ -253,21 +255,25 @@ impl Parser { }; if !self.match_token(&TokenType::From) { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Expected 'from' after export *", self.peek_token().unwrap().line, - self.peek_token().unwrap().column + self.peek_token().unwrap().column, + self.peek_token().unwrap().length, + &self.get_source_text() )); } let source = self.parse_module_source()? - .ok_or_else(|| super::error::ParserError::new( + .ok_or_else(|| super::error::ParserError::with_token_span( "Expected string literal for module source", self.previous().unwrap().line, - self.previous().unwrap().column + self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text() ))?; - self.consume_semicolon("Expected ';' after export statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after export statement")?; Ok(Statement::Export(ExportDeclaration::All { source, exported })) } @@ -287,7 +293,7 @@ impl Parser { } else { // export default expression; let expr = self.parse_expression()?; - self.consume_semicolon("Expected ';' after export default expression")?; + self.consume(&TokenType::Semicolon, "Expected ';' after export default expression")?; ExportDefaultDeclaration::Expression(expr) }; @@ -305,10 +311,12 @@ impl Parser { } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { Declaration::Variable(self.parse_variable_declaration()?) } else { - return Err(super::error::ParserError::new( + return Err(super::error::ParserError::with_token_span( "Expected declaration in export statement", self.peek_token().unwrap().line, - self.peek_token().unwrap().column + self.peek_token().unwrap().column, + self.peek_token().unwrap().length, + &self.get_source_text() )); }; @@ -325,16 +333,18 @@ impl Parser { // Optional from clause let source = if self.match_token(&TokenType::From) { - Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::new( + Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::with_token_span( "Expected string literal for module source", self.previous().unwrap().line, - self.previous().unwrap().column + self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text() ))?) } else { None }; - self.consume_semicolon("Expected ';' after export statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after export statement")?; Ok(Statement::Export(ExportDeclaration::Named { declaration: None, diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 5911089..2d8c1fe 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -12,17 +12,14 @@ impl Parser { self.advance(); Ok(Statement::Empty) }, - + // Block statement { ... } Some(TokenType::LeftBrace) => self.parse_block(), // Declaration statements - Some(TokenType::Var) | Some(TokenType::Let) | Some(TokenType::Const) => - self.parse_variable_statement(), - Some(TokenType::Function) => - self.parse_function_statement(), - Some(TokenType::Class) => - self.parse_class_statement(), + Some(TokenType::Var) | Some(TokenType::Let) | Some(TokenType::Const) => self.parse_variable_statement(), + Some(TokenType::Function) => self.parse_function_statement(), + Some(TokenType::Class) => self.parse_class_statement(), // Control flow statements Some(TokenType::If) => self.parse_if(), @@ -195,7 +192,7 @@ impl Parser { let test = self.parse_expression()?; self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - self.consume_semicolon("Expected ';' after do-while statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after do-while statement")?; Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) } @@ -247,11 +244,17 @@ impl Parser { // No line terminator allowed between throw and expression if self.previous_line_terminator() { - return Err(ParserError::new("Illegal newline after throw", token.line, token.column)); + return Err(ParserError::with_token_span( + "Illegal newline after throw", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } let expr = self.parse_expression()?; - self.consume_semicolon("Expected ';' after throw statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after throw statement")?; Ok(Statement::Throw(expr)) } @@ -262,7 +265,13 @@ impl Parser { // Check if we're in a function if !self.state.in_function { - return Err(ParserError::new("'return' statement outside of function", token.line, token.column)); + return Err(ParserError::with_token_span( + "'return' statement outside of function", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } // Return with no value if semicolon or end of block @@ -273,7 +282,7 @@ impl Parser { .then(|| self.parse_expression()) .transpose()?; - self.consume_semicolon("Expected ';' after return statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after return statement")?; Ok(Statement::Return(argument)) } @@ -284,7 +293,13 @@ impl Parser { // Check if we're in a loop or switch if !self.state.in_loop && !self.state.in_switch { - return Err(ParserError::new("'break' statement outside of loop or switch", token.line, token.column)); + return Err(ParserError::with_token_span( + "'break' statement outside of loop or switch", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } // Optional label @@ -295,7 +310,13 @@ impl Parser { // Verify label exists let label_name = name.into_boxed_str(); if !self.state.labels.contains(&label_name) { - return Err(ParserError::new(&format!("Undefined label '{}'", label_name), token.line, token.column)); + return Err(ParserError::with_token_span( + &format!("Undefined label '{}'", label_name), + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } Some(label_name) @@ -306,7 +327,7 @@ impl Parser { None }; - self.consume_semicolon("Expected ';' after break statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after break statement")?; Ok(Statement::Break(label)) } @@ -317,7 +338,13 @@ impl Parser { // Check if we're in a loop if !self.state.in_loop { - return Err(ParserError::new("'continue' statement outside of loop", token.line, token.column)); + return Err(ParserError::with_token_span( + "'continue' statement outside of loop", + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } // Optional label @@ -328,7 +355,13 @@ impl Parser { // Verify label exists let label_name = name.into_boxed_str(); if !self.state.labels.contains(&label_name) { - return Err(ParserError::new(&format!("Undefined label '{}'", label_name), token.line, token.column)); + return Err(ParserError::with_token_span( + &format!("Undefined label '{}'", label_name), + token.line, + token.column, + token.length, + &self.get_source_text(), + )); } Some(label_name) @@ -339,7 +372,7 @@ impl Parser { None }; - self.consume_semicolon("Expected ';' after continue statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after continue statement")?; Ok(Statement::Continue(label)) } @@ -350,10 +383,12 @@ impl Parser { // Check if in strict mode if self.state.in_strict_mode { - return Err(ParserError::new( + return Err(ParserError::with_token_span( "'with' statements are not allowed in strict mode", self.previous().unwrap().line, - self.previous().unwrap().column + self.previous().unwrap().column, + self.previous().unwrap().length, + &self.get_source_text(), )); } @@ -372,7 +407,7 @@ impl Parser { fn parse_debugger(&mut self) -> ParseResult { self.advance(); // consume 'debugger' - self.consume_semicolon("Expected ';' after debugger statement")?; + self.consume(&TokenType::Semicolon, "Expected ';' after debugger statement")?; Ok(Statement::Debugger) } @@ -387,10 +422,12 @@ impl Parser { // Add label to the set of active labels let label_exists = !self.state.labels.insert(label.clone()); if label_exists { - return Err(ParserError::new( + return Err(ParserError::with_token_span( &format!("Label '{}' has already been declared", label), token.line, - token.column + token.column, + token.length, + &self.get_source_text(), )); } @@ -418,19 +455,35 @@ impl Parser { pub fn parse_expression_statement(&mut self) -> ParseResult { // Handle directives (like "use strict") let start_pos = self.current; - + + //println!("Before parse expression"); let expr = self.parse_expression()?; - + //println!("After parse expression"); + + println!("After parse expression: {:#?}", expr); + // Check for directive prologue let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { + //println!("This case"); // Only consider as directive if it's at the beginning of a function/program // and is a simple string literal (not an expression) start_pos == 0 || self.previous().unwrap().token_type == TokenType::LeftBrace } else { + //println!("That case"); false }; - self.consume_semicolon("Expected ';' after expression")?; + //println!("now need a ;"); + + + //if self.check(&TokenType::) + //if self.check(&TokenType::LeftParen) { + //println!("Immediately invoked?"); + //} + + + //println!(self.peek_token_type()) + self.consume(&TokenType::Semicolon, "Expected ';' after expression statement")?; // If this is a "use strict" directive, update parser state if is_directive { From b444a2b78a146cb934304cdb452fbd00b3dd530d Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Mon, 5 May 2025 00:20:44 +0200 Subject: [PATCH 2/7] working on improving params parsing --- examples/tricky/index.js | 13 +-- src/ast.rs | 50 +++++---- src/lexer/lexer.rs | 2 +- src/parser/core.rs | 4 +- src/parser/expressions.rs | 217 ++++++++++++++++---------------------- src/parser/functions.rs | 6 +- src/parser/patterns.rs | 109 ++++--------------- src/parser/statements.rs | 7 +- 8 files changed, 154 insertions(+), 254 deletions(-) diff --git a/examples/tricky/index.js b/examples/tricky/index.js index ab5d30f..5613cdf 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -1,18 +1,19 @@ //OK //(x=>3); +//(x=>({})); +//((x)=>3); +//((x)=>({})); +//(x)=>{}; +//(x,y,z)=>({}); +//(x,y) => 3; //x=>3; //[x=>3]; //x=>({}); -//(x=>({})); //{ x: y => 3 }; //({ x: y => 3 }); //x=>{}; -//(x,)=>3; // ERROR -//((x)=>3); //(x=>3)(3); //((x)=>3)(3); -//(x)=>({}); -//((x)=>({})); -//(x)=>{}; +(x,y,z,a,b,c,d)=>3; diff --git a/src/ast.rs b/src/ast.rs index ae2f0ef..a5fd926 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -104,7 +104,7 @@ pub struct VariableDeclaration { #[derive(Debug, Clone)] pub struct FunctionDeclaration { pub id: Box, - pub params: Vec, + pub params: Vec, pub body: Vec, pub is_async: bool, pub is_generator: bool, @@ -120,7 +120,7 @@ pub struct ClassDeclaration { #[derive(Debug, Clone)] pub enum ClassMember { Constructor { - params: Vec, + params: Vec, body: Vec, }, Method { @@ -141,7 +141,7 @@ pub enum ClassMember { #[derive(Debug, Clone)] pub struct MethodDefinition { - pub params: Vec, + pub params: Vec, pub body: Vec, pub is_async: bool, pub is_generator: bool, @@ -199,36 +199,40 @@ pub enum VariableKind { #[derive(Debug, Clone)] pub struct VariableDeclarator { - pub id: Pattern, + pub id: Expression, // TODO maybe tighter pub init: Option, } -#[derive(Debug, Clone)] -pub enum Pattern { - Identifier(Box), - ObjectPattern(Vec), - ArrayPattern(Vec>), - RestElement(Box), - AssignmentPattern { - left: Box, - right: Expression, - }, -} - +// +//// TODO delete +//#[derive(Debug, Clone)] +//pub enum Pattern { + //Identifier(Box), + //ObjectPattern(Vec), + //ArrayPattern(Vec>), + //RestElement(Box), + //AssignmentPattern { + //left: Box, + //right: Expression, + //}, +//} + +/* #[derive(Debug, Clone)] pub enum ObjectPatternProperty { Property { key: PropertyKey, - value: Pattern, + value: Expression, computed: bool, shorthand: bool, }, - Rest(Box), -} + Spread(Box), + //Rest(Box), +}*/ #[derive(Debug, Clone)] pub struct CatchClause { - pub param: Option, + pub param: Option, pub body: Box, } @@ -266,17 +270,17 @@ pub enum Expression { This, Super, Literal(Literal), - Array(Vec>), + Array(Vec), Object(Vec), Function { id: Option>, - params: Vec, + params: Vec, body: Vec, is_async: bool, is_generator: bool, }, ArrowFunction { - params: Vec, + params: Vec, body: ArrowFunctionBody, is_async: bool, }, diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 5f8d827..b1cba5f 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -38,7 +38,7 @@ impl<'a> Lexer<'a> { self.start = self.current; self.scan_token()?; } - let eof_column = self.column; + let _eof_column = self.column; add_token!(self, TokenType::EOF, 0); Ok(std::mem::take(&mut self.tokens)) } diff --git a/src/parser/core.rs b/src/parser/core.rs index 0bdf556..bbc079c 100644 --- a/src/parser/core.rs +++ b/src/parser/core.rs @@ -345,11 +345,11 @@ impl Parser { } // Helper method to validate function parameters - pub fn validate_function_params(&self, params: &[Pattern]) -> ParseResult<()> { + pub fn validate_function_params(&self, params: &[Expression]) -> ParseResult<()> { let mut seen_params = HashSet::new(); for param in params { - if let Pattern::Identifier(name) = param { + if let Expression::Identifier(name) = param { if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { return Err(super::error::ParserError::with_token_span( &format!("'{}' cannot be used as a parameter name in strict mode", name), diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 68bc8d1..6dc297a 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -267,16 +267,16 @@ impl Parser { } }, // TODO everything but Identifier hoists matches below, need a better approach to var as = e.class; scenarios - Some(TokenType::Identifier(_)) | - Some(TokenType::As) | - Some(TokenType::Target) | - Some(TokenType::Class) | - Some(TokenType::Get) | - Some(TokenType::Set) | - Some(TokenType::From) => { + Some(TokenType::Identifier(_)) => { + //Some(TokenType::As) | + //Some(TokenType::Target) | + //Some(TokenType::Class) | + //Some(TokenType::Get) | + //Some(TokenType::Set) | + //Some(TokenType::From) => { let name = self.expect_identifier("Expected identifier in expression")?; if self.check(&TokenType::Arrow) { - let param = Pattern::Identifier(name); + let param = Expression::Identifier(name); self.advance(); return self.parse_arrow_function_body(vec![param], false); } @@ -290,186 +290,146 @@ impl Parser { println!("In ("); + // TODO IIFE + +// if self.match_token(&TokenType::LeftParen) { +// let arguments = self.parse_arguments()?; +// body = Expression::Call { +// callee: Box::new(body), +// arguments, +// optional: false, +// }; +// } + + + match self.parse_expression() { Ok(expr) => { println!("Parsed expr {:#?}", expr); - println!("Current token {:#?}", self.peek_token_type()); - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - return Ok(expr); - }, - Err(err) => { - println!("Now I go here"); - let mut consumed_paren = false; - let start_pos = self.current; - + //println!("Current token {:#?}", self.peek_token_type()); - - //println!("Before check is arrow"); - - let (is_arrow, arrow_consumed_right_paren) = self.is_arrow_function_parameters(); - - //println!("After check is arrow"); - - if !is_arrow { - println!("Not arrow at all"); - return Err(err); + match expr { + Expression::ArrowFunction { .. } => { + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + return Ok(expr); + } + _ => {}, } - //println!("In Arrow Function"); - - //println!("At token {:#?}", self.peek_token_type()); - - self.current = start_pos; + let mut params = vec![]; - let params = if self.match_token(&TokenType::RightParen) { - vec![] - } else { - let mut params = vec![]; - loop { - if self.match_token(&TokenType::Ellipsis) { - //println!("found ... in parameters"); - let arg = self.parse_pattern()?; - params.push(Pattern::RestElement(Box::new(arg))); - self.advance(); - break; - } + // TODO better way to extract Expressions into Patterns - if self.match_token(&TokenType::RightParen) { - //self.advance(); - break; - } - - //println!("found identifier in parameters"); - params.push(self.parse_pattern()?); - - // if self.match_token(&TokenType::RightParen) { - // println!("found ) in parameters"); - // //consumed_paren = true; - // //self.advance(); - // break; - // } - - if !self.match_token(&TokenType::Comma) { - if self.match_token(&TokenType::RightParen) { - break; + match expr { + // TODO sequences are tuples or maybe just keep it as is? + Expression::Sequence(ref seq) => { + println!("Found sequence of identifiers {:#?}", seq); + for item in seq { + match item { + Expression::Identifier(ref name) => { + params.push(Expression::Identifier(name.clone())); + }, + _ => {}, // TODO rest } - // println!("Not comma bailing"); - break; - } - - - //println!("At end of parameters"); - + }, + Expression::Identifier(ref name) => { + println!("Found identifier {:#?}", name); + params.push(Expression::Identifier(name.clone())); + }, + /* + Expression::ArrowFunction { .. } => { + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + return Ok(expr); } + */ + _ => {}, + } - //println!("Am here"); - //self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - //println!("Am there"); - params - }; + if self.match_token(&TokenType::Arrow) { + println!("Now I am in body of arrow function"); - self.consume(&TokenType::Arrow, "Expected '=>' after parameters")?; + let body = self.parse_arrow_function_body(params, false)?; - //println!("Currently before parsing body {:#?}", self.peek_token_type()); - let body = self.parse_arrow_function_body(params, false)?; - //println!("Currently after parsing body {:#?}", self.peek_token_type()); + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + return Ok(body); + } + + //println!("Not in body of arrow function just generic expression"); - //if !arrow_consumed_right_paren && self.check(&TokenType::RightParen) { - // self.advance(); - //} + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - //println!("Currently before closing ) {:#?}", self.peek_token_type()); + if self.match_token(&TokenType::Arrow) { -// if self.match_token(&TokenType::LeftParen) { -// let arguments = self.parse_arguments()?; -// body = Expression::Call { -// callee: Box::new(body), -// arguments, -// optional: false, -// }; -// } + println!("Acumulated params {:#?}", params); + + println!("Backtrack it was actually arrow function"); + let body = self.parse_arrow_function_body(params, false)?; + return Ok(body); + } + return Ok(expr); + }, + Err(err) => { + println!("In error"); + + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - //println!("Currently immedietaly invoked {:#?}", self.peek_token_type()); + println!("Current token {:#?}", self.peek_token_type()); - //self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - //let params = if self.check(&TokenType::RightParen) {} - // TODO ) not consumed + if self.match_token(&TokenType::Arrow) { + println!("Now I am in body of arrow function"); - //println!("Here all done for ("); - // if self.check(&TokenType::RightParen) { - // self.advance(); - // println!("Consuming dangling )"); - // } - return Ok(body); - + // TODO skipped over params + let params = vec![]; + let body = self.parse_arrow_function_body(params, false)?; + return Ok(body); + } + return Err(err); }, } - - //println!("At expr {:#?}", expr); - }, Some(TokenType::LeftBracket) => { - self.advance(); // consume '[' - + self.advance(); let mut elements = Vec::new(); - while !self.check(&TokenType::RightBracket) && !self.is_at_end() { if self.match_token(&TokenType::Comma) { - // Elision (hole) - elements.push(None); + elements.push(ArrayElement::Hole); } else { if self.match_token(&TokenType::Ellipsis) { - // Spread element let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(Some(ArrayElement::Spread(expr))); + elements.push(ArrayElement::Spread(expr)); } else { - // Regular element let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(Some(ArrayElement::Expression(expr))); + elements.push(ArrayElement::Expression(expr)); } - if !self.check(&TokenType::RightBracket) { self.consume(&TokenType::Comma, "Expected ',' after array element")?; } } } - self.consume(&TokenType::RightBracket, "Expected ']' after array elements")?; - Expression::Array(elements) }, Some(TokenType::LeftBrace) => { - self.advance(); // consume '{' - + self.advance(); let mut properties = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { if self.match_token(&TokenType::Ellipsis) { - // Spread property let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; properties.push(ObjectProperty::Spread(expr)); } else { - // Method or property - let start_pos = self.current; let is_async = self.match_token(&TokenType::Async); let is_generator = self.match_token(&TokenType::Star); - - // Check for getter/setter let mut kind = PropertyKind::Init; if !is_async && !is_generator { - // Check if the next token is 'get' or 'set' if self.check(&TokenType::Get) || self.check(&TokenType::Set) { - // Look ahead to see if it's followed by a colon let is_property_name = if let Some(next_token) = self.tokens.get(self.current + 1) { matches!(next_token.token_type, TokenType::Colon) } else { false }; - - // Only treat as getter/setter if not followed by a colon if !is_property_name { if self.match_token(&TokenType::Get) { kind = PropertyKind::Get; @@ -795,6 +755,7 @@ impl Parser { Precedence::Comma => { self.advance(); // consume comma let right = self.parse_expression_with_precedence(Precedence::Assignment)?; + // TODO if None or Empty its fine expr = Expression::Sequence(vec![expr, right]); }, Precedence::Assignment => { diff --git a/src/parser/functions.rs b/src/parser/functions.rs index f6d60c1..cef325c 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -98,7 +98,7 @@ impl Parser { }) } - pub fn parse_function_params(&mut self) -> ParseResult> { + pub fn parse_function_params(&mut self) -> ParseResult> { self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; let mut params = Vec::new(); @@ -108,7 +108,7 @@ impl Parser { if self.match_token(&TokenType::Ellipsis) { // Rest parameter let arg = self.parse_pattern()?; - params.push(Pattern::RestElement(Box::new(arg))); + params.push(Expression::Spread(Box::new(arg))); break; // Rest parameter must be the last one } else { params.push(self.parse_pattern()?); @@ -144,7 +144,7 @@ impl Parser { Ok(body) } - pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { + pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { // Save and update parser state let (prev_in_function, prev_allow_await) = (self.state.in_function, self.state.allow_await); self.state.in_function = true; diff --git a/src/parser/patterns.rs b/src/parser/patterns.rs index 6b27415..e2070c8 100644 --- a/src/parser/patterns.rs +++ b/src/parser/patterns.rs @@ -5,7 +5,7 @@ use super::core::Parser; impl Parser { - pub fn parse_pattern(&mut self) -> ParseResult { + pub fn parse_pattern(&mut self) -> ParseResult { match self.peek_token_type() { // Identifier pattern Some(TokenType::Identifier(_)) | @@ -13,7 +13,7 @@ impl Parser { Some(TokenType::As) | Some(TokenType::From) => { let name = self.expect_identifier("Expected identifier in pattern")?; - Ok(Pattern::Identifier(name)) + Ok(Expression::Identifier(name)) }, // Object pattern: { x, y } Some(TokenType::LeftBrace) => { @@ -26,7 +26,7 @@ impl Parser { if self.match_token(&TokenType::Ellipsis) { // Rest element let argument = self.parse_pattern()?; - properties.push(ObjectPatternProperty::Rest(Box::new(argument))); + properties.push(ObjectProperty::Spread(argument)); // Rest element must be the last one if !self.check(&TokenType::RightBrace) { @@ -41,14 +41,15 @@ impl Parser { let (value, computed, shorthand) = if !self.check(&TokenType::Colon) { if let PropertyKey::Identifier(name) = &key { // Shorthand property: { x } - let pattern = Pattern::Identifier(name.clone()); + let pattern = Expression::Identifier(name.clone()); // Check for default value: { x = 1 } if self.match_token(&TokenType::Equal) { let default = self.parse_expression()?; - (Pattern::AssignmentPattern { + (Expression::Assignment { + operator: AssignmentOperator::Assign, left: Box::new(pattern), - right: default, + right: Box::new(default), }, false, true) } else { (pattern, false, true) @@ -64,20 +65,22 @@ impl Parser { // Check for default value: { key: value = 1 } if self.match_token(&TokenType::Equal) { let default = self.parse_expression()?; - (Pattern::AssignmentPattern { + (Expression::Assignment { + operator: AssignmentOperator::Assign, left: Box::new(pattern), - right: default, + right: Box::new(default), }, matches!(key, PropertyKey::Computed(_)), false) } else { (pattern, matches!(key, PropertyKey::Computed(_)), false) } }; - properties.push(ObjectPatternProperty::Property { + properties.push(ObjectProperty::Property { key, value, computed, shorthand, + kind: PropertyKind::Init, // FIXME not true Get/Set }); } @@ -94,7 +97,7 @@ impl Parser { self.consume(&TokenType::RightBrace, "Expected '}' after object pattern")?; - Ok(Pattern::ObjectPattern(properties)) + Ok(Expression::Object(properties)) }, // Array pattern: [x, y, z = 1] @@ -106,12 +109,12 @@ impl Parser { while !self.check(&TokenType::RightBracket) && !self.is_at_end() { if self.match_token(&TokenType::Comma) { // Elision (hole) - elements.push(None); + elements.push(ArrayElement::Hole); // TODO could use } else { if self.match_token(&TokenType::Ellipsis) { // Rest element let argument = self.parse_pattern()?; - elements.push(Some(Pattern::RestElement(Box::new(argument)))); + elements.push(ArrayElement::Spread(Expression::Spread(Box::new(argument)))); // Rest element must be the last one if !self.check(&TokenType::RightBracket) { @@ -131,12 +134,13 @@ impl Parser { // Check for default value: [x = 1] if self.match_token(&TokenType::Equal) { let default = self.parse_expression()?; - elements.push(Some(Pattern::AssignmentPattern { + elements.push(ArrayElement::Expression(Expression::Assignment { + operator: AssignmentOperator::Assign, left: Box::new(pattern), - right: default, + right: Box::new(default), })); } else { - elements.push(Some(pattern)); + elements.push(ArrayElement::Expression(pattern)); } } @@ -148,7 +152,7 @@ impl Parser { self.consume(&TokenType::RightBracket, "Expected ']' after array pattern")?; - Ok(Pattern::ArrayPattern(elements)) + Ok(Expression::Array(elements)) }, // Assignment pattern: x = 1 (handled by the caller) @@ -158,76 +162,5 @@ impl Parser { } } } - // Helper method to convert an expression to a pattern (for arrow function parameters) - pub fn expression_to_pattern(&self, expr: Expression) -> ParseResult { - match expr { - Expression::Identifier(name) => Ok(Pattern::Identifier(name)), - Expression::Object(props) => { - // Convert object expression to object pattern - let mut pattern_props = Vec::new(); - - for prop in props { - match prop { - ObjectProperty::Property { key, value, computed, shorthand, .. } => { - if let Expression::Identifier(name) = value { - pattern_props.push(ObjectPatternProperty::Property { - key, - value: Pattern::Identifier(name), - computed, - shorthand, - }); - } else { - return Err(self.error_unexpected("Invalid object pattern")); - } - }, - ObjectProperty::Spread(expr) => { - if let Expression::Identifier(name) = expr { - pattern_props.push(ObjectPatternProperty::Rest( - Box::new(Pattern::Identifier(name)) - )); - } else { - return Err(self.error_unexpected("Invalid rest pattern")); - } - }, - _ => return Err(self.error_unexpected("Invalid object pattern")), - } - } - - Ok(Pattern::ObjectPattern(pattern_props)) - }, - Expression::Array(elements) => { - // Convert array expression to array pattern - let mut pattern_elements = Vec::new(); - - for element in elements { - match element { - None => pattern_elements.push(None), - Some(ArrayElement::Expression(expr)) => { - if let Expression::Identifier(name) = expr { - pattern_elements.push(Some(Pattern::Identifier(name))); - } else { - return Err(self.error_unexpected("Invalid array pattern")); - } - }, - Some(ArrayElement::Spread(expr)) => { - if let Expression::Identifier(name) = expr { - pattern_elements.push(Some(Pattern::RestElement( - Box::new(Pattern::Identifier(name)) - ))); - } else { - return Err(self.error_unexpected("Invalid rest pattern")); - } - }, - Some(ArrayElement::Hole) => { - // Handle hole elements (like [,,,]) by adding None to the pattern elements - pattern_elements.push(None); - }, - } - } - - Ok(Pattern::ArrayPattern(pattern_elements)) - }, - _ => Err(self.error_unexpected("Invalid pattern")), - } - } + } diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 2d8c1fe..70cbb83 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -226,7 +226,9 @@ impl Parser { // Optional catch parameter let param = self.match_token(&TokenType::LeftParen) .then(|| { - let param = self.parse_pattern()?; + //let param = self.parse_pattern()?; + // TODO fixme + let param = None; self.consume(&TokenType::RightParen, "Expected ')' after catch parameter")?; // Explicitly specify the error type as ParserError Ok::<_, super::error::ParserError>(param) @@ -235,7 +237,7 @@ impl Parser { let body = Box::new(self.parse_block()?); - Ok(CatchClause { param, body }) + Ok(CatchClause { param: param.expect("REASON"), body }) } /// Parse throw statement: throw expression; @@ -458,7 +460,6 @@ impl Parser { //println!("Before parse expression"); let expr = self.parse_expression()?; - //println!("After parse expression"); println!("After parse expression: {:#?}", expr); From 91769f64a6934fbc7f57d075c1cecab64371608e Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Mon, 5 May 2025 06:46:25 +0200 Subject: [PATCH 3/7] remove dead code --- examples/simple/math.js | 6 ++-- examples/tricky/index.js | 8 +++++- src/parser/error.rs | 11 +------ src/parser/expressions.rs | 6 ++-- src/parser/functions.rs | 60 --------------------------------------- src/parser/statements.rs | 4 +-- 6 files changed, 16 insertions(+), 79 deletions(-) diff --git a/examples/simple/math.js b/examples/simple/math.js index 020e32b..f5fe0ff 100644 --- a/examples/simple/math.js +++ b/examples/simple/math.js @@ -11,8 +11,8 @@ export function multiply(a, b) { } export function divide(a, b) { -// if (b === 0) { -// throw new Error('Cannot divide by zero'); -// } + if (b === 0) { + throw new Error('Cannot divide by zero'); + } return a / b; } \ No newline at end of file diff --git a/examples/tricky/index.js b/examples/tricky/index.js index 5613cdf..9156bcb 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -12,8 +12,14 @@ //{ x: y => 3 }; //({ x: y => 3 }); //x=>{}; +//async function* foo(a, b, ...c) {} +//function* foo(a) {} +//(function foo() {}()) + // ERROR //(x=>3)(3); //((x)=>3)(3); -(x,y,z,a,b,c,d)=>3; +//(x,y,z,a,b,c,d)=>3; +//function foo(a=1) {} +//(function() {}) \ No newline at end of file diff --git a/src/parser/error.rs b/src/parser/error.rs index c28a348..8ce3549 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -11,16 +11,7 @@ pub struct ParserError { } impl ParserError { - pub fn new(message: &str, line: usize, column: usize) -> Self { - ParserError { - message: message.to_string(), - line, - column, - source_line: None, - source_span: None, - } - } - + pub fn with_token_span(message: &str, line: usize, column: usize, token_length: usize, source: &str) -> Self { // Extract just the relevant line with limited context let source_line = extract_source_line_with_context(source, line, column, 60); diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 6dc297a..0cfaadc 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -24,7 +24,7 @@ pub enum Precedence { Prefix, // ! ~ + - ++ -- typeof void delete Postfix, // ++ -- Call, // . [] () - Primary + //Primary } impl Parser { @@ -288,7 +288,7 @@ impl Parser { self.advance(); // consume '(' - println!("In ("); + //println!("In ("); // TODO IIFE @@ -305,7 +305,7 @@ impl Parser { match self.parse_expression() { Ok(expr) => { - println!("Parsed expr {:#?}", expr); + //println!("Parsed expr {:#?}", expr); //println!("Current token {:#?}", self.peek_token_type()); match expr { diff --git a/src/parser/functions.rs b/src/parser/functions.rs index cef325c..4f4e2c4 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -212,64 +212,4 @@ impl Parser { false } - pub fn is_arrow_function_parameters(&mut self) -> (bool, bool) { - let start_pos = self.current; - - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - if is_arrow { - return (true, false); - } - } - - if self.match_token(&TokenType::Ellipsis) { - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - self.advance(); - if self.match_token(&TokenType::RightParen) && self.check(&TokenType::Arrow) { - self.current = start_pos; - return (true, true); - } - } - self.current = start_pos; - } - - if self.match_token(&TokenType::RightParen) { - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - return (is_arrow, true); - } - - let mut has_close_paren = false; - - loop { - match self.parse_pattern() { - Ok(arg) => { - continue; - }, - Err(_) if self.match_token(&TokenType::Comma) => { - continue; - }, - Err(_) => if self.match_token(&TokenType::RightParen) { - has_close_paren = true; - break; - }, - Err(_) => { - self.current = start_pos; - return (false, has_close_paren) - }, - } - break; - } - - if self.match_token(&TokenType::RightParen) { - has_close_paren = true; - } - - let is_arrow = self.check(&TokenType::Arrow); - self.current = start_pos; - return (is_arrow, has_close_paren) - } - } diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 70cbb83..202dffd 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -461,7 +461,7 @@ impl Parser { //println!("Before parse expression"); let expr = self.parse_expression()?; - println!("After parse expression: {:#?}", expr); + //println!("After parse expression: {:#?}", expr); // Check for directive prologue let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { @@ -607,7 +607,7 @@ impl Parser { let token = self.advance().unwrap().clone(); let name = self.identifier_name(&token)?; let left = Expression::Identifier(name); - + // Check what follows the identifier if self.check(&TokenType::In) { // for-in loop with identifier From 3d76ecf1a4163e08bb37197596d41ce71f35663d Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Tue, 6 May 2025 00:41:02 +0200 Subject: [PATCH 4/7] added lexical context, improvement control flow of parser --- examples/tricky/index.js | 41 +- src/lexer/context.rs | 155 ++++++ src/lexer/interner.rs | 25 + src/lexer/lexer.rs | 503 ++++++++++++------ src/lexer/mod.rs | 5 +- src/lexer/token.rs | 119 ++++- src/parser/asi.rs | 127 +++++ src/parser/classes.rs | 16 +- src/parser/core.rs | 297 +++++------ src/parser/declarations.rs | 45 -- src/parser/error.rs | 178 ++++++- src/parser/expressions.rs | 382 +++++--------- src/parser/functions.rs | 207 ++++---- src/parser/mod.rs | 7 +- src/parser/modules.rs | 191 ++++--- src/parser/patterns.rs | 24 +- src/parser/prelude.rs | 6 + src/parser/state.rs | 14 +- src/parser/statements.rs | 1000 ++++++++++++++++++++---------------- 19 files changed, 2010 insertions(+), 1332 deletions(-) create mode 100644 src/lexer/context.rs create mode 100644 src/lexer/interner.rs create mode 100644 src/parser/asi.rs delete mode 100644 src/parser/declarations.rs create mode 100644 src/parser/prelude.rs diff --git a/examples/tricky/index.js b/examples/tricky/index.js index 9156bcb..c387cb9 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -1,25 +1,24 @@ //OK -//(x=>3); -//(x=>({})); -//((x)=>3); -//((x)=>({})); -//(x)=>{}; -//(x,y,z)=>({}); -//(x,y) => 3; -//x=>3; -//[x=>3]; -//x=>({}); -//{ x: y => 3 }; -//({ x: y => 3 }); -//x=>{}; -//async function* foo(a, b, ...c) {} -//function* foo(a) {} -//(function foo() {}()) - +(x=>3); +(x=>({})); +((x)=>3); +((x)=>({})); +(x)=>{}; +(x,y,z)=>({}); +(x,y) => 3; +x=>3; +[x=>3]; +x=>({}); +(x=>3)(3); +x=>{}; +async function* foo(a, b, ...c) {} +function* foo(a) {} +(function foo() {}()) +((x)=>3)(3); +(x,y,z,a,b,c,d)=>3; +(function() {}) +({ x: y => 3 }); // ERROR -//(x=>3)(3); -//((x)=>3)(3); -//(x,y,z,a,b,c,d)=>3; //function foo(a=1) {} -//(function() {}) \ No newline at end of file +{ x: y => 3 }; diff --git a/src/lexer/context.rs b/src/lexer/context.rs new file mode 100644 index 0000000..6e81e9a --- /dev/null +++ b/src/lexer/context.rs @@ -0,0 +1,155 @@ +use std::fmt; +use crate::lexer::TokenType; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LexicalContext { + Default, + PropertyKey, + MemberAccess, + ImportExport, + ObjectPattern, + ParameterName { strict_mode: bool }, + FunctionBody { allow_yield: bool, allow_await: bool }, + LoopParameters, + LoopBody, + SwitchBody, +} + +impl fmt::Display for LexicalContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Default => write!(f, "default"), + Self::PropertyKey => write!(f, "property key"), + Self::MemberAccess => write!(f, "member access"), + Self::ImportExport => write!(f, "import export"), + Self::ObjectPattern => write!(f, "object pattern"), + Self::ParameterName { strict_mode: false } => write!(f, "param name"), + Self::ParameterName { strict_mode: true } => write!(f, "strict param name"), + Self::FunctionBody { allow_yield: false, allow_await: false } => write!(f, "function body"), + Self::FunctionBody { allow_yield: true, allow_await: false } => write!(f, "generator function body"), + Self::FunctionBody { allow_yield: false, allow_await: true } => write!(f, "async function body"), + Self::FunctionBody { allow_yield: true, allow_await: true } => write!(f, "async generator function body"), + Self::LoopParameters => write!(f, "loop test"), + Self::LoopBody => write!(f, "loop body"), + Self::SwitchBody => write!(f, "switch body"), + } + } +} + +impl LexicalContext { + + // Fast check if this context allows any keywords as identifiers + pub fn has_keywords_as_identifiers(&self) -> bool { + // Return true if this context might allow any keywords as identifiers + // This is a quick filter to avoid processing tokens unnecessarily + match self { + LexicalContext::Default => false, // Default context doesn't allow keywords as identifiers + // Add other cases based on your implementation + _ => true, + } + } + + /* + // Check if a specific token type can be used as an identifier in this context + fn allows_token_as_identifier(&self, token_type: &TokenType) -> bool { + // First check if the token is a keyword at all + match token_type { + // Match specific keywords that might be allowed as identifiers + TokenType::Await => self.allows_keyword_as_identifier("await"), + TokenType::Yield => self.allows_keyword_as_identifier("yield"), + // Add other keywords that might be allowed as identifiers + TokenType::Let | + TokenType::Static | + TokenType::Implements | + TokenType::Interface | + TokenType::Package | + TokenType::Private | + TokenType::Protected | + TokenType::Public => self.allows_keyword_as_identifier(token_type.keyword_text().unwrap()), + + // Non-keywords or other token types don't need conversion + _ => false, + } + } +*/ + + pub fn allows_token_as_identifier(&self, token_type: &TokenType) -> bool { + match self { + // In property contexts, all keywords can be identifiers except a few special ones + Self::MemberAccess => { + + //let result = matches!(keyword, "default"); + + //println!("Checking in MemberAccess with {:#?}", keyword); + + if token_type == &TokenType::Default { + true + } else { + false + } + + //result + //false + }, + Self::PropertyKey => { + //println!("Currently in PropertyKey with {:#?}", keyword); + false + }, + + // In import/export contexts, specific keywords are allowed as identifiers + Self::ImportExport => { + //println!("Currently in ImportExport with {:#?}", keyword); + false + }, + + // In object patterns, allow destructuring with keywords except special ones + Self::ObjectPattern => { + //println!("Currently in ObjectPattern with {:#?}", keyword); + false + }, + + // In parameter names, most keywords can be identifiers in non-strict mode + Self::ParameterName { strict_mode } => { + //println!("Currently in ParameterName strict={:#?} with {:#?}", strict_mode, keyword); + if *strict_mode { + false + } else { + false + } + }, + Self::LoopParameters => { + //println!("Currently in LoopParameters with {:#?}", keyword); + false + }, + // In function bodies, yield and await have special handling + Self::FunctionBody { allow_yield, allow_await } => { + //println!("Currently in FunctionBody with {:#?}", keyword); + + if (*allow_yield && token_type == &TokenType::Yield) || (*allow_await && token_type == &TokenType::Await) { + false + } else { + // Default to not allowing keywords as identifiers in function bodies + false + } + }, + + // In loop bodies, break and continue are special + Self::LoopBody => { + //println!("Currently in LoopBody with {:#?}", keyword); + false + }, + + // In switch bodies, case and default are special + Self::SwitchBody => { + //println!("Currently in SwitchBody with {:#?}", keyword); + false + }, + + // In default context, keywords are not identifiers + Self::Default => { + //println!("Currently in Default with {:#?}", keyword); + false + }, + } + } +} diff --git a/src/lexer/interner.rs b/src/lexer/interner.rs new file mode 100644 index 0000000..c37de4b --- /dev/null +++ b/src/lexer/interner.rs @@ -0,0 +1,25 @@ +use std::collections::HashMap; +use std::rc::Rc; + +pub struct StringInterner { + strings: HashMap>, +} + +impl StringInterner { + pub fn new() -> Self { + StringInterner { + strings: HashMap::new(), + } + } + + pub fn intern(&mut self, s: &str) -> Rc { + if let Some(interned) = self.strings.get(s) { + interned.clone() + } else { + // Fix: Use explicit type annotation + let rc: Rc = s.into(); + self.strings.insert(s.to_string(), rc.clone()); + rc + } + } +} \ No newline at end of file diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index b1cba5f..9cd98cc 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,14 +1,19 @@ use std::collections::HashSet; use crate::lexer::{Token, TokenType, TemplatePart, LexerError}; +// TODO Specialized Token Handling for Lexer Optimization + pub struct Lexer<'a> { source: &'a str, - chars: Vec, // TODO chars: Peekable>, + bytes: &'a [u8], // Direct access to the underlying bytes + source_len: usize, tokens: Vec, start: usize, current: usize, line: usize, column: usize, + current_char: char, + previous_char: char, } macro_rules! add_token { @@ -21,17 +26,181 @@ macro_rules! add_token { } impl<'a> Lexer<'a> { + + #[inline] pub fn new(source: &'a str) -> Self { Lexer { - chars: source.chars().collect(), source, + bytes: source.as_bytes(), + source_len: source.len(), tokens: Vec::with_capacity(source.len() / 4), start: 0, current: 0, line: 1, column: 0, + current_char: '\0', + previous_char: '\0', } } + + #[inline(always)] + fn identifier(&mut self) { + let start_column = self.column - 1; + + // Track whether the identifier is all ASCII + let mut is_all_ascii = true; + + // Fast path for identifiers (most common case) + while !self.is_at_end() { + if self.current < self.source_len { + let b = self.bytes[self.current]; + + // Fast check for ASCII alphanumeric characters + if (b >= b'a' && b <= b'z') || + (b >= b'A' && b <= b'Z') || + (b >= b'0' && b <= b'9') || + b == b'_' || + b == b'$' { + // Advance without the overhead of UTF-8 decoding + self.previous_char = self.current_char; + self.current_char = b as char; + self.current += 1; + self.column += 1; + continue; + } else if b >= 128 { + // Found a non-ASCII byte + is_all_ascii = false; + // Process it with the regular advance method + self.advance(); + continue; + } + } + + // If we reach here, either we're at the end or the next character + // is not an identifier character + if !self.is_at_end() && self.is_alphanumeric(self.peek()) { + let c = self.advance(); + // Check if we just processed a non-ASCII character + if !c.is_ascii() { + is_all_ascii = false; + } + } else { + break; + } + } + + // Calculate the length of the identifier + let length = self.current - self.start; + + // Only check for keywords if the identifier is within the length range of keywords + // and is all ASCII (since all keywords are ASCII) + let token_type = if is_all_ascii && length >= 2 && length <= 10 { + // For ASCII identifiers, we can do direct byte comparisons + let bytes = &self.bytes[self.start..self.current]; + + // First check by length for faster matching + match bytes.len() { + 2 => match bytes { + b"do" => TokenType::Do, + b"if" => TokenType::If, + b"in" => TokenType::In, + b"of" => TokenType::Of, + b"as" => TokenType::As, + _ => self.create_identifier_token(), + }, + 3 => match bytes { + b"for" => TokenType::For, + b"let" => TokenType::Let, + b"new" => TokenType::New, + b"try" => TokenType::Try, + b"var" => TokenType::Var, + b"get" => TokenType::Get, + b"set" => TokenType::Set, + _ => self.create_identifier_token(), + }, + 4 => match bytes { + b"case" => TokenType::Case, + b"else" => TokenType::Else, + b"enum" => TokenType::Enum, + b"from" => TokenType::From, + b"null" => TokenType::Null, + b"this" => TokenType::This, + b"true" => TokenType::True, + b"void" => TokenType::Void, + b"with" => TokenType::With, + b"eval" => TokenType::Eval, + _ => self.create_identifier_token(), + }, + 5 => match bytes { + b"async" => TokenType::Async, + b"await" => TokenType::Await, + b"break" => TokenType::Break, + b"catch" => TokenType::Catch, + b"class" => TokenType::Class, + b"const" => TokenType::Const, + b"false" => TokenType::False, + b"super" => TokenType::Super, + b"throw" => TokenType::Throw, + b"while" => TokenType::While, + b"yield" => TokenType::Yield, + _ => self.create_identifier_token(), + }, + 6 => match bytes { + b"delete" => TokenType::Delete, + b"export" => TokenType::Export, + b"import" => TokenType::Import, + b"public" => TokenType::Public, + b"return" => TokenType::Return, + b"static" => TokenType::Static, + b"switch" => TokenType::Switch, + b"target" => TokenType::Target, + b"typeof" => TokenType::Typeof, + _ => self.create_identifier_token(), + }, + 7 => match bytes { + b"default" => TokenType::Default, + b"extends" => TokenType::Extends, + b"finally" => TokenType::Finally, + b"package" => TokenType::Package, + b"private" => TokenType::Private, + _ => self.create_identifier_token(), + }, + 8 => match bytes { + b"continue" => TokenType::Continue, + b"debugger" => TokenType::Debugger, + b"function" => TokenType::Function, + _ => self.create_identifier_token(), + }, + 9 => match bytes { + b"arguments" => TokenType::Arguments, + b"interface" => TokenType::Interface, + b"protected" => TokenType::Protected, + b"undefined" => TokenType::Undefined, + _ => self.create_identifier_token(), + }, + 10 => match bytes { + b"instanceof" => TokenType::InstanceOf, + b"implements" => TokenType::Implements, + b"constructor" => TokenType::Constructor, + _ => self.create_identifier_token(), + }, + _ => self.create_identifier_token(), + } + } else { + // For non-ASCII identifiers or identifiers with lengths outside keyword range + self.create_identifier_token() + }; + + // Add the token + add_token!(self, token_type, length as usize); + } + + // Helper method to create an identifier token + #[inline] + fn create_identifier_token(&self) -> TokenType { + let text = &self.source[self.start..self.current]; + TokenType::Identifier(text.to_string()) + } pub fn scan_tokens(&mut self) -> Result, LexerError> { while !self.is_at_end() { @@ -240,6 +409,7 @@ impl<'a> Lexer<'a> { Ok(()) } + #[inline(always)] fn line_comment(&mut self) { while !self.is_at_end() && self.peek() != '\n' { self.advance(); @@ -274,7 +444,7 @@ impl<'a> Lexer<'a> { Ok(()) } - /// Handles a forward slash character, which could be division, regexp, or comment + #[inline] fn handle_slash(&mut self) -> Result<(), LexerError> { if self.match_char('/') { self.line_comment(); @@ -290,8 +460,7 @@ impl<'a> Lexer<'a> { Ok(()) } - /// Determines if a forward slash should be interpreted as the start of a regular expression - /// rather than a division operator based on JavaScript syntax rules. + #[inline] fn is_regexp_start(&self) -> bool { if self.tokens.is_empty() { return true; @@ -407,9 +576,7 @@ impl<'a> Lexer<'a> { Ok(()) } - - - #[inline] + #[inline(always)] fn is_regexp_flag(&self, c: char) -> bool { matches!(c, 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd') } @@ -635,8 +802,9 @@ impl<'a> Lexer<'a> { Ok(()) } - fn parse_unicode_escape(&mut self, start_line: usize, start_column: usize) -> Result { - if self.peek() == '{' { + fn parse_unicode_escape(&mut self, start_line: usize, start_column: usize) -> Result { + if self.peek() == '{' { + // Unicode code point escape \u{XXXXXX} - this part is already correct // Unicode code point escape \u{XXXXXX} self.advance(); // Consume '{' @@ -687,41 +855,104 @@ impl<'a> Lexer<'a> { start_column )) } - } else { - // Fixed 4-digit Unicode escape \uXXXX - let mut hex_string = String::with_capacity(4); - - for _ in 0..4 { - if self.is_at_end() || !self.is_hex_digit(self.peek()) { - return Err(LexerError::new( - "Invalid Unicode escape sequence: expected 4 hex digits", - start_line, - start_column - )); - } - hex_string.push(self.advance()); - } - - match u16::from_str_radix(&hex_string, 16) { - Ok(code_unit) => { - match std::char::from_u32(code_unit as u32) { - Some(c) => Ok(c), - None => Err(LexerError::new( - &format!("Invalid Unicode code unit: {}", hex_string), - start_line, - start_column - )) - } - }, - Err(_) => Err(LexerError::new( - &format!("Invalid Unicode escape sequence: \\u{}", hex_string), - start_line, - start_column - )) - } - } + } else { + // Fixed 4-digit Unicode escape \uXXXX + let mut hex_string = String::with_capacity(4); + + for _ in 0..4 { + if self.is_at_end() || !self.is_hex_digit(self.peek()) { + return Err(LexerError::new( + "Invalid Unicode escape sequence: expected 4 hex digits", + start_line, + start_column + )); + } + hex_string.push(self.advance()); + } + + match u16::from_str_radix(&hex_string, 16) { + Ok(code_unit) => { + // Check if this is a high surrogate + if (0xD800..=0xDBFF).contains(&code_unit) { + // This is a high surrogate, we need to look for a low surrogate + if self.peek() == '\\' && self.peek_next() == 'u' { + // Save current position in case we need to revert + let save_current = self.current; + let save_line = self.line; + let save_column = self.column; + + // Consume the \u + self.advance(); // \ + self.advance(); // u + + // Parse the next 4 hex digits + let mut low_hex = String::with_capacity(4); + let mut valid_low_surrogate = true; + + for _ in 0..4 { + if self.is_at_end() || !self.is_hex_digit(self.peek()) { + valid_low_surrogate = false; + break; + } + low_hex.push(self.advance()); + } + + if valid_low_surrogate { + if let Ok(low_code_unit) = u16::from_str_radix(&low_hex, 16) { + if (0xDC00..=0xDFFF).contains(&low_code_unit) { + // Valid surrogate pair, calculate the Unicode code point + let code_point = 0x10000 + ((code_unit - 0xD800) as u32 * 0x400) + (low_code_unit - 0xDC00) as u32; + return match std::char::from_u32(code_point) { + Some(c) => Ok(c), + None => Err(LexerError::new( + &format!("Invalid Unicode surrogate pair: \\u{}\\u{}", hex_string, low_hex), + start_line, + start_column + )) + }; + } + } + } + + // If we get here, the sequence after the high surrogate wasn't a valid low surrogate + // Revert to the position after the high surrogate + self.current = save_current; + self.line = save_line; + self.column = save_column; + } + + // Lone high surrogate without a following low surrogate + // In strict mode, this should be an error, but JavaScript allows it + // and replaces it with a replacement character + return Ok('\u{FFFD}'); // Unicode replacement character + } + + // Check if this is a low surrogate without a preceding high surrogate + if (0xDC00..=0xDFFF).contains(&code_unit) { + // Lone low surrogate, also replace with replacement character + return Ok('\u{FFFD}'); + } + + // Regular BMP character + match std::char::from_u32(code_unit as u32) { + Some(c) => Ok(c), + None => Err(LexerError::new( + &format!("Invalid Unicode code unit: {}", hex_string), + start_line, + start_column + )) + } + }, + Err(_) => Err(LexerError::new( + &format!("Invalid Unicode escape sequence: \\u{}", hex_string), + start_line, + start_column + )) + } + } } + #[inline] fn parse_hex_escape(&mut self, start_line: usize, start_column: usize) -> Result { // Hexadecimal escape sequence \xXX let mut hex_string = String::with_capacity(2); @@ -852,6 +1083,7 @@ impl<'a> Lexer<'a> { } } + #[inline] fn binary_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -909,6 +1141,7 @@ impl<'a> Lexer<'a> { } } + #[inline] fn octal_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -966,7 +1199,7 @@ impl<'a> Lexer<'a> { } } - + #[inline] fn hex_number(&mut self, start_column: usize) -> Result<(), LexerError> { let start = self.current; @@ -1024,13 +1257,14 @@ impl<'a> Lexer<'a> { } } - #[inline] + #[inline(always)] fn consume_digits(&mut self) { while self.is_digit(self.peek()) || self.peek() == '_' { self.advance(); } } - + + #[inline] fn extract_number_value(&self, start: usize, end: usize) -> String { // Remove numeric separators (_) let mut value_str = String::with_capacity(end - start); @@ -1042,158 +1276,109 @@ impl<'a> Lexer<'a> { value_str } - fn identifier(&mut self) { - let start_column = self.column - 1; - - while self.is_alphanumeric(self.peek()) { - self.advance(); - } - - // Get the identifier text - let text = &self.source[self.start..self.current]; - - // Check if it's a keyword using a match statement for better performance - let token_type = match text { - "break" => TokenType::Break, - "case" => TokenType::Case, - "catch" => TokenType::Catch, - "class" => TokenType::Class, - "const" => TokenType::Const, - "continue" => TokenType::Continue, - "debugger" => TokenType::Debugger, - "default" => TokenType::Default, - "delete" => TokenType::Delete, - "do" => TokenType::Do, - "else" => TokenType::Else, - "enum" => TokenType::Enum, - "export" => TokenType::Export, - "extends" => TokenType::Extends, - "false" => TokenType::False, - "finally" => TokenType::Finally, - "for" => TokenType::For, - "function" => TokenType::Function, - "if" => TokenType::If, - "import" => TokenType::Import, - "in" => TokenType::In, - "instanceof" => TokenType::InstanceOf, - "new" => TokenType::New, - "null" => TokenType::Null, - "return" => TokenType::Return, - "super" => TokenType::Super, - "undefined" => TokenType::Undefined, - "constructor" => TokenType::Constructor, - "switch" => TokenType::Switch, - "this" => TokenType::This, - "throw" => TokenType::Throw, - "true" => TokenType::True, - "try" => TokenType::Try, - "typeof" => TokenType::Typeof, - "var" => TokenType::Var, - "void" => TokenType::Void, - "while" => TokenType::While, - "with" => TokenType::With, - "yield" => TokenType::Yield, - "async" => TokenType::Async, - "await" => TokenType::Await, - "let" => TokenType::Let, - "static" => TokenType::Static, - "get" => TokenType::Get, - "set" => TokenType::Set, - "of" => TokenType::Of, - "as" => TokenType::As, - "from" => TokenType::From, - "target" => TokenType::Target, - "implements" => TokenType::Implements, - "interface" => TokenType::Interface, - "package" => TokenType::Package, - "private" => TokenType::Private, - "protected" => TokenType::Protected, - "public" => TokenType::Public, - "arguments" => TokenType::Arguments, - "eval" => TokenType::Eval, - _ => TokenType::Identifier(text.to_string()), - }; - - let length = (self.current - self.start) as usize; - - add_token!(self, token_type, length); - } - - #[inline] + #[inline(always)] fn is_digit(&self, c: char) -> bool { - c.is_ascii_digit() + c >= '0' && c <= '9' // Direct comparison is faster than is_ascii_digit() } - #[inline] + #[inline(always)] fn is_alpha(&self, c: char) -> bool { - c.is_ascii_alphabetic() || c == '_' || c == '$' + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$' } - #[inline] + #[inline(always)] fn is_alphanumeric(&self, c: char) -> bool { self.is_alpha(c) || self.is_digit(c) } - #[inline] + #[inline(always)] fn is_hex_digit(&self, c: char) -> bool { - c.is_ascii_hexdigit() + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } - #[inline] + #[inline(always)] + fn is_at_end(&self) -> bool { + self.current >= self.source_len + } + + #[inline(always)] fn is_octal_digit(&self, c: char) -> bool { c >= '0' && c <= '7' } - #[inline] - fn is_at_end(&self) -> bool { - self.current >= self.chars.len() - } - #[inline] + #[inline(always)] fn advance(&mut self) -> char { - let c = self.chars[self.current]; - self.current += 1; + if self.is_at_end() { + return '\0'; + } + + // Fast path for ASCII (most common case in JS) + if self.current < self.source_len && self.bytes[self.current] < 128 { + let c = self.bytes[self.current] as char; + self.previous_char = self.current_char; + self.current_char = c; + self.current += 1; + self.column += 1; + return c; + } + + // Fallback for non-ASCII (UTF-8) + let c = self.source[self.current..].chars().next().unwrap(); + self.previous_char = self.current_char; + self.current_char = c; + self.current += c.len_utf8(); self.column += 1; c } - #[inline] + #[inline(always)] fn peek(&self) -> char { if self.is_at_end() { - '\0' - } else { - self.chars[self.current] + return '\0'; } + if self.bytes[self.current] < 128 { + return self.bytes[self.current] as char; + } + self.source[self.current..].chars().next().unwrap() } - - #[inline] + + #[inline(always)] fn peek_next(&self) -> char { - if self.current + 1 >= self.chars.len() { - '\0' - } else { - self.chars[self.current + 1] + if self.current + 1 >= self.source_len { + return '\0'; + } + + // Fast path for ASCII + if self.bytes[self.current] < 128 && self.bytes[self.current + 1] < 128 { + return self.bytes[self.current + 1] as char; + } + + // If current is ASCII but next might not be + if self.bytes[self.current] < 128 { + let next_pos = self.current + 1; + return self.source[next_pos..].chars().next().unwrap_or('\0'); } + + // Both current and next are non-ASCII + let mut iter = self.source[self.current..].chars(); + iter.next(); + iter.next().unwrap_or('\0') } - - #[inline] + + #[inline(always)] fn peek_previous(&self) -> char { - if self.current == 0 { - '\0' - } else { - self.chars[self.current - 1] - } + self.previous_char } - #[inline] + #[inline(always)] fn match_char(&mut self, expected: char) -> bool { if self.is_at_end() || self.peek() != expected { false } else { - self.current += 1; - self.column += 1; + self.advance(); true } } - } diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 3e04d58..ff13c5e 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,7 +1,10 @@ mod error; mod token; mod lexer; +mod interner; +mod context; pub use error::LexerError; pub use token::{Token, TokenType, TemplatePart}; -pub use lexer::Lexer; \ No newline at end of file +pub use lexer::Lexer; +pub use context::LexicalContext; diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 5d9d88d..2089b11 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -66,7 +66,10 @@ pub enum TokenType { NumberLiteral(f64), BigIntLiteral(String), RegExpLiteral(String, String), - True, False, Null, Undefined, + True, + False, + Null, + Undefined, // Keywords Var, Let, @@ -125,6 +128,120 @@ pub enum TokenType { EOF, } +impl TokenType { + + pub fn keyword_text(&self) -> Option<&str> { + match self { + // Characters/operators don't have keyword text + TokenType::LeftParen | TokenType::RightParen | TokenType::LeftBrace | + TokenType::RightBrace | TokenType::LeftBracket | TokenType::RightBracket | + TokenType::Comma | TokenType::Dot | TokenType::Semicolon | TokenType::Colon | + TokenType::Question | TokenType::Arrow | TokenType::Hash | TokenType::Plus | + TokenType::PlusPlus | TokenType::PlusEqual | TokenType::Minus | + TokenType::MinusMinus | TokenType::MinusEqual | TokenType::Star | + TokenType::StarStar | TokenType::StarEqual | TokenType::StarStarEqual | + TokenType::Slash | TokenType::SlashEqual | TokenType::Percent | + TokenType::PercentEqual | TokenType::Equal | TokenType::EqualEqual | + TokenType::EqualEqualEqual | TokenType::Bang | TokenType::BangEqual | + TokenType::BangEqualEqual | TokenType::Greater | TokenType::GreaterEqual | + TokenType::GreaterGreater | TokenType::GreaterGreaterEqual | + TokenType::GreaterGreaterGreater | TokenType::GreaterGreaterGreaterEqual | + TokenType::Less | TokenType::LessEqual | TokenType::LessLess | + TokenType::LessLessEqual | TokenType::Ampersand | TokenType::AmpersandEqual | + TokenType::AmpersandAmpersand | TokenType::AmpersandAmpersandEqual | + TokenType::Pipe | TokenType::PipeEqual | TokenType::PipePipe | + TokenType::PipePipeEqual | TokenType::Caret | TokenType::CaretEqual | + TokenType::Tilde | TokenType::Ellipsis | TokenType::QuestionQuestion | + TokenType::QuestionQuestionEqual | TokenType::QuestionDot => None, + + // Literals don't have keyword text + TokenType::Identifier(_) | TokenType::StringLiteral(_) | + TokenType::TemplateLiteral(_) | TokenType::NumberLiteral(_) | + TokenType::BigIntLiteral(_) | TokenType::RegExpLiteral(_, _) => None, + + // Boolean literals and null + TokenType::True => Some("true"), + TokenType::False => Some("false"), + TokenType::Null => Some("null"), + TokenType::Undefined => Some("undefined"), + + // Keywords + TokenType::Var => Some("var"), + TokenType::Let => Some("let"), + TokenType::With => Some("with"), + TokenType::Const => Some("const"), + TokenType::Function => Some("function"), + TokenType::Return => Some("return"), + TokenType::If => Some("if"), + TokenType::Else => Some("else"), + TokenType::While => Some("while"), + TokenType::For => Some("for"), + TokenType::Break => Some("break"), + TokenType::Continue => Some("continue"), + TokenType::This => Some("this"), + TokenType::Super => Some("super"), + TokenType::New => Some("new"), + TokenType::Delete => Some("delete"), + TokenType::Typeof => Some("typeof"), + TokenType::Void => Some("void"), + TokenType::In => Some("in"), + TokenType::InstanceOf => Some("instanceof"), + TokenType::Try => Some("try"), + TokenType::Catch => Some("catch"), + TokenType::Finally => Some("finally"), + TokenType::Throw => Some("throw"), + TokenType::Switch => Some("switch"), + TokenType::Case => Some("case"), + TokenType::Default => Some("default"), + TokenType::Await => Some("await"), + TokenType::Async => Some("async"), + TokenType::Do => Some("do"), + TokenType::Enum => Some("enum"), + TokenType::Of => Some("of"), + TokenType::Target => Some("target"), + TokenType::Implements => Some("implements"), + TokenType::Interface => Some("interface"), + TokenType::Package => Some("package"), + TokenType::Private => Some("private"), + TokenType::Protected => Some("protected"), + TokenType::Public => Some("public"), + TokenType::Arguments => Some("arguments"), + TokenType::Eval => Some("eval"), + TokenType::Debugger => Some("debugger"), + TokenType::Class => Some("class"), + TokenType::Extends => Some("extends"), + TokenType::Constructor => Some("constructor"), + TokenType::Static => Some("static"), + TokenType::Get => Some("get"), + TokenType::Set => Some("set"), + TokenType::Yield => Some("yield"), + TokenType::Import => Some("import"), + TokenType::Export => Some("export"), + TokenType::From => Some("from"), + TokenType::As => Some("as"), + + // Sentinel + TokenType::EOF => None, + } + } + + pub fn to_string(&self) -> String { + match self { + TokenType::Identifier(name) => name.clone(), + TokenType::StringLiteral(s) => format!("\"{}\"", s), + TokenType::NumberLiteral(n) => n.to_string(), + TokenType::BigIntLiteral(b) => format!("{}n", b), + TokenType::RegExpLiteral(pattern, flags) => format!("/{}/{}", pattern, flags), + TokenType::TemplateLiteral(_) => "`...`".to_string(), + _ => match self.keyword_text() { + Some(text) => text.to_string(), + None => format!("{:?}", self), + }, + } + } +} + + #[derive(Debug, Clone, PartialEq)] pub enum TemplatePart { String(String), diff --git a/src/parser/asi.rs b/src/parser/asi.rs new file mode 100644 index 0000000..aad2442 --- /dev/null +++ b/src/parser/asi.rs @@ -0,0 +1,127 @@ +use super::prelude::*; + +use crate::lexer::{Token, TokenType}; +use super::error::{ParserError, ParseResult}; +use super::core::Parser; + +impl Parser { + + pub fn consume_semicolon(&mut self, message: &str) -> ParseResult { + // Case 1: Explicit semicolon + if self.match_token(&TokenType::Semicolon) { + return Ok(self.previous().unwrap().clone()); + } + + // Automatic Semicolon Insertion (ASI) rules + + // Case 2: Line terminator + if self.previous_line_terminator() { + // Special case: restricted productions + // These statements cannot be followed by a line terminator without a semicolon + if let Some(prev) = self.previous() { + match prev.token_type { + // Rule: No LineTerminator here after return/throw/yield/break/continue + TokenType::Return | + TokenType::Throw | + TokenType::Yield | + TokenType::Break | + TokenType::Continue => { + // Check if there's an expression after these keywords + // If not, ASI applies + if self.is_expression_start() { + let binding = Token::new(TokenType::EOF, 0, 0, 0); + let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); + return Err(ParserError::new(message, token.line, token.column)); + } + }, + // Rule: No ASI before postfix ++ or -- + _ if self.check(&TokenType::PlusPlus) || self.check(&TokenType::MinusMinus) => { + let binding = Token::new(TokenType::EOF, 0, 0, 0); + let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); + return Err(ParserError::new(message, token.line, token.column)); + }, + _ => {} + } + } + + return Ok(self.previous().unwrap().clone()); + } + + // Case 3: Closing brace + if self.check(&TokenType::RightBrace) { + return Ok(self.previous().unwrap().clone()); + } + + // Case 4: End of input + if self.is_at_end() { + return Ok(self.previous().unwrap().clone()); + } + + // Case 5: The next token would cause a syntax error + // This is a complex case that requires looking ahead + // For example, in "{ 1 \n 2 }" we need to insert a semicolon after 1 + if self.would_cause_syntax_error() { + return Ok(self.previous().unwrap().clone()); + } + + // Otherwise, it's an error + let binding = Token::new(TokenType::EOF, 0, 0, 0); + let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); + Err(ParserError::new(message, token.line, token.column)) + } + + // Helper method to check if the current token would start an expression + fn is_expression_start(&self) -> bool { + match self.peek_token_type() { + Some(TokenType::Identifier(_)) | + Some(TokenType::NumberLiteral(_)) | + Some(TokenType::StringLiteral(_)) | + Some(TokenType::TemplateLiteral(_)) | + Some(TokenType::RegExpLiteral(_, _)) | + Some(TokenType::True) | + Some(TokenType::False) | + Some(TokenType::Null) | + Some(TokenType::This) | + Some(TokenType::LeftParen) | + Some(TokenType::LeftBracket) | + Some(TokenType::LeftBrace) | + Some(TokenType::Function) | + Some(TokenType::New) | + Some(TokenType::Delete) | + Some(TokenType::Typeof) | + Some(TokenType::Void) | + Some(TokenType::Plus) | + Some(TokenType::Minus) | + Some(TokenType::Bang) | + Some(TokenType::Tilde) => true, + _ => false + } + } + + // Helper method to check if continuing without a semicolon would cause a syntax error + fn would_cause_syntax_error(&self) -> bool { + // This is a simplified implementation + // A full implementation would need to look ahead and check for specific patterns + + // For example, if we have "a \n (" we need to insert a semicolon + // because "a(" would be parsed as a function call + if let Some(prev_token) = self.previous() { + if let Some(next_token) = self.peek_token() { + match (&prev_token.token_type, &next_token.token_type) { + // Cases where continuing would cause a syntax error + (_, TokenType::LeftParen) | + (_, TokenType::LeftBracket) | + (_, TokenType::Plus) | + (_, TokenType::Minus) | + (_, TokenType::Slash) | + (_, TokenType::Star) => true, + _ => false + } + } else { + false + } + } else { + false + } + } +} \ No newline at end of file diff --git a/src/parser/classes.rs b/src/parser/classes.rs index 95bd5cf..ff5a555 100644 --- a/src/parser/classes.rs +++ b/src/parser/classes.rs @@ -1,5 +1,8 @@ +use super::prelude::*; + + use crate::ast::*; -use crate::lexer::TokenType; +use crate::lexer::{TokenType, LexicalContext}; use super::error::ParseResult; use super::core::Parser; use super::expressions::Precedence; @@ -84,14 +87,20 @@ impl Parser { } // Parse property key - let key = self.parse_property_key()?; + let key = self.with_context(LexicalContext::PropertyKey, |parser| { + parser.parse_property_key() + })?; // Check for constructor method if !is_static && !is_async && !is_generator && kind == MethodKind::Method { if let PropertyKey::Identifier(name) = &key { if name.as_ref() == "constructor" { let params = self.parse_function_params()?; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(false, false)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + return Ok(ClassMember::Constructor { params, body }); } } @@ -100,7 +109,10 @@ impl Parser { // Method definition if self.check(&TokenType::LeftParen) || is_generator || is_async { let params = self.parse_function_params()?; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_async, is_generator)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; return Ok(ClassMember::Method { key, diff --git a/src/parser/core.rs b/src/parser/core.rs index bbc079c..4af784b 100644 --- a/src/parser/core.rs +++ b/src/parser/core.rs @@ -1,5 +1,7 @@ +use super::prelude::*; + use crate::ast::*; -use crate::lexer::{Token, TokenType}; +use crate::lexer::{Token, TokenType, LexicalContext}; use super::error::{ParserError, ParseResult}; use super::state::ParserState; use std::collections::HashSet; @@ -11,6 +13,8 @@ pub struct Parser { pub comments: Vec, pub state: ParserState, pub source: Option, + + context_stack: Vec, } impl Parser { @@ -22,9 +26,119 @@ impl Parser { comments: Vec::new(), state: ParserState::new(), source: None, + context_stack: vec![LexicalContext::Default], + } + } + + // Add methods to manage the context stack + pub fn push_context(&mut self, context: LexicalContext) { + self.context_stack.push(context); + } + + pub fn pop_context(&mut self) -> Option { + if self.context_stack.len() > 1 { + self.context_stack.pop() + } else { + None + } + } + + pub fn current_context(&self) -> LexicalContext { + *self.context_stack.last().unwrap_or(&LexicalContext::Default) + } + + pub fn get_context_stack_info(&self) -> Vec { + // Get up to the last 6 entries from the context stack + let stack_len = self.context_stack.len(); + let start_idx = if stack_len > 6 { stack_len - 6 } else { 0 }; + + self.context_stack[start_idx..] + .iter() + .rev() + .map(|ctx| format!("{}", ctx)) + .collect() + } + + pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + let current_pos = self.current; + + // Only process tokens if the context has any keywords that can be identifiers + if context.has_keywords_as_identifiers() { + for token in self.tokens.iter_mut().skip(current_pos) { + // Work directly with the token type without extracting text first + if context.allows_token_as_identifier(&token.token_type) { + // Get the keyword text only when we know we need to convert it + if let Some(text) = token.token_type.keyword_text() { + token.token_type = TokenType::Identifier(text.to_string()); + } + } + } } + + self.push_context(context); + let result = f(self); + self.pop_context(); + result } + /* + pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + let current_pos = self.current; + for token in self.tokens.iter_mut().skip(current_pos) { + // TODO might be improved to not need keyword_text invocation and do checks on tokens dirrectly + if let Some(text) = token.token_type.keyword_text() { + if context.allows_keyword_as_identifier(text) { + token.token_type = TokenType::Identifier(text.to_string()); + } + } + } + self.push_context(context); + let result = f(self); + self.pop_context(); + result + } + */ + + // Helper methods to check contexts + pub fn is_in_loop(&self) -> bool { + self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::LoopBody)) + } + + pub fn is_in_switch(&self) -> bool { + self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::SwitchBody)) + } + + pub fn is_in_function(&self) -> bool { + self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + } + + pub fn allows_yield(&self) -> bool { + if let Some(LexicalContext::FunctionBody { allow_yield, .. }) = self.context_stack.iter() + .rev() + .find(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + { + *allow_yield + } else { + false + } + } + + pub fn allows_await(&self) -> bool { + if let Some(LexicalContext::FunctionBody { allow_await, .. }) = self.context_stack.iter() + .rev() + .find(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + { + *allow_await + } else { + false + } + } // Method to attach source code to an existing parser pub fn attach_source(&mut self, source: &str) { @@ -91,23 +205,7 @@ impl Parser { } if token_type == &TokenType::Semicolon { - if self.match_token(&TokenType::Semicolon) { - return Ok(self.previous().unwrap().clone()); - } - - // ASI rules: insert semicolon if - // 1. The current token is on a new line from the previous token - // 2. The current token is a closing brace - // 3. We've reached the end of input - if self.previous_line_terminator() || - self.check(&TokenType::RightBrace) || - self.is_at_end() { - if let Some(prev) = self.previous() { - return Ok(prev.clone()); - } else { - return DUMMY_TOKEN.with(|token| Ok(token.clone())); - } - } + return self.consume_semicolon(message); } else if self.check(token_type) { return Ok(self.advance().unwrap().clone()); } @@ -118,23 +216,11 @@ impl Parser { t } else { return DUMMY_TOKEN.with(|token| { - Err(ParserError::with_token_span( - message, - token.line, - token.column, - token.length, - &self.get_source_text() - )) + Err(parser_error_at_current!(self, message)) }); }; - Err(ParserError::with_token_span( - message, - token.line, - token.column, - token.length, - &self.get_source_text() - )) + Err(parser_error_at_current!(self, message)) } pub fn previous_line_terminator(&self) -> bool { @@ -146,78 +232,15 @@ impl Parser { false } - // TODO delete - pub fn identifier_name(&self, token: &Token) -> ParseResult> { - match &token.token_type { - TokenType::Identifier(name) => Ok(name.clone().into_boxed_str()), - _ => Err(ParserError::with_token_span( - "Expected identifier", - token.line, - token.column, - token.length, - &self.get_source_text() - )), - } - } - pub fn expect_identifier(&mut self, message: &str) -> ParseResult> { - // Create a binding for the error case - let binding = Token::new(TokenType::EOF, 0, 0, 0); - - // Get the token, handling the case where there might not be one - let token = match self.advance() { - Some(t) => t, - None => { - let last = self.previous().unwrap_or(&binding); - return Err(ParserError::with_token_span( - message, - last.line, - last.column, - last.length, - &self.get_source_text() - )); - } - }; - - match &token.token_type { - TokenType::Identifier(name) => Ok(name.clone().into_boxed_str()), - TokenType::Default => Ok("default".into()), - TokenType::As => Ok("as".into()), - TokenType::For => Ok("for".into()), - TokenType::Target => Ok("target".into()), - TokenType::From => Ok("from".into()), - TokenType::Class => Ok("class".into()), - TokenType::Get => Ok("get".into()), - TokenType::Set => Ok("set".into()), - _ => Err(ParserError::with_token_span( - &format!("Expected identifier, found {:?}", token.token_type), - token.line, - token.column, - token.length, - &self.get_source_text() - )), + if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { + self.advance(); + Ok(name.into_boxed_str()) + } else { + Err(parser_error_at_current_mut!(self, "Expected identifier")) } } - // Error helper - pub fn error_unexpected(&self, message: &str) -> ParserError { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = match self.peek_token() { - Some(t) => t, - None => match self.previous() { - Some(t) => t, - None => &binding - } - }; - ParserError::with_token_span( - message, - token.line, - token.column, - token.length, - &self.get_source_text() - ) - } - pub fn get_source_text(&self) -> String { self.source.clone().unwrap_or_default() } @@ -257,13 +280,7 @@ impl Parser { // Ensure we've consumed all tokens if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { let token = self.peek_token().unwrap(); - return Err(ParserError::with_token_span( - "Unexpected token after statement", - token.line, - token.column, - token.length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Unexpected token after statement")); } Ok(stmt) @@ -271,13 +288,7 @@ impl Parser { pub fn parse_single_expression(&mut self) -> ParseResult { if self.tokens.is_empty() { - return Err(ParserError::with_token_span( - "Empty input", - 0, - 0, - 0, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Empty input")); } let expr = self.parse_expression()?; @@ -285,13 +296,7 @@ impl Parser { // Ensure we've consumed all tokens if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { let token = self.peek_token().unwrap(); - return Err(ParserError::with_token_span( - "Unexpected token after expression", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "Unexpected token after expression")); } Ok(expr) @@ -306,6 +311,7 @@ impl Parser { self.comments.push(comment); } + // TODO delete // Helper method to handle parsing of "enum" keyword which is reserved in strict mode pub fn handle_reserved_word(&self, word: &str) -> ParseResult<()> { if self.state.in_strict_mode { @@ -313,13 +319,7 @@ impl Parser { if reserved_words.contains(&word) { let token = self.previous().unwrap(); - return Err(super::error::ParserError::with_token_span( - &format!("'{}' is a reserved word in strict mode", word), - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "'{}' is a reserved word in strict mode", word)); } } @@ -331,13 +331,7 @@ impl Parser { if self.state.in_strict_mode { if name == "eval" || name == "arguments" { let token = self.previous().unwrap(); - return Err(super::error::ParserError::with_token_span( - &format!("'{}' cannot be used as a variable name in strict mode", name), - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "'{}' cannot be used as a variable name in strict mode", name)); } } @@ -351,23 +345,11 @@ impl Parser { for param in params { if let Expression::Identifier(name) = param { if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { - return Err(super::error::ParserError::with_token_span( - &format!("'{}' cannot be used as a parameter name in strict mode", name), - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text(), - )); + return Err(parser_error_at_previous!(self, "'{}' cannot be used as a parameter name in strict mode", name)); } if !seen_params.insert(name.clone()) { - return Err(super::error::ParserError::with_token_span( - &format!("Duplicate parameter name '{}'", name), - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text(), - )); + return Err(parser_error_at_previous!(self, "Duplicate parameter name '{}'", name)); } } } @@ -378,13 +360,7 @@ impl Parser { // Helper method to handle octal literals in strict mode pub fn validate_octal_literal(&self, value: &str) -> ParseResult<()> { if self.state.in_strict_mode && value.starts_with('0') && !value.starts_with("0x") && !value.starts_with("0b") && !value.starts_with("0o") { - return Err(super::error::ParserError::with_token_span( - "Octal literals are not allowed in strict mode", - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text(), - )); + return Err(parser_error_at_previous!(self, "Octal literals are not allowed in strict mode")); } Ok(()) @@ -469,15 +445,8 @@ impl Parser { } else { unreachable!() } - } else if self.check(&TokenType::Default) { - Ok(PropertyKey::Identifier("default".into())) - } else if self.check(&TokenType::Get) { - Ok(PropertyKey::Identifier("get".into())) - } - else if self.check(&TokenType::Set) { - Ok(PropertyKey::Identifier("set".into())) } else { - let name = self.expect_identifier("Expected property name 999")?; + let name = self.expect_identifier("Expected property name 0")?; Ok(PropertyKey::Identifier(name)) } } diff --git a/src/parser/declarations.rs b/src/parser/declarations.rs deleted file mode 100644 index 6347cdc..0000000 --- a/src/parser/declarations.rs +++ /dev/null @@ -1,45 +0,0 @@ -use crate::ast::*; -use crate::lexer::{Token, TokenType}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - // Variable declarations - pub fn parse_variable_declaration(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap_or_else(|| Token::new(TokenType::EOF, 0, 0, 0)); - - let kind = match token.token_type { - TokenType::Var => VariableKind::Var, - TokenType::Let => VariableKind::Let, - TokenType::Const => VariableKind::Const, - _ => unreachable!(), - }; - - // Parse first declarator (required) - let mut declarations = vec![self.parse_variable_declarator()?]; - - // Parse additional declarators separated by commas - while self.match_token(&TokenType::Comma) { - declarations.push(self.parse_variable_declarator()?); - } - - // Consume semicolon unless we're in a for-in/of loop - if !self.state.in_loop { - self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; - } - - Ok(VariableDeclaration { declarations, kind }) - } - - pub fn parse_variable_declarator(&mut self) -> ParseResult { - let id = self.parse_pattern()?; - - // Parse optional initializer - let init = self.match_token(&TokenType::Equal) - .then(|| self.parse_expression()) - .transpose()?; - - Ok(VariableDeclarator { id, init }) - } -} diff --git a/src/parser/error.rs b/src/parser/error.rs index 8ce3549..63609ad 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,4 +1,5 @@ -use crate::lexer::LexerError; +use crate::lexer::{LexerError, Token}; +use super::core::Parser; use std::fmt; #[derive(Debug, Clone)] @@ -8,30 +9,110 @@ pub struct ParserError { pub column: usize, pub source_line: Option, pub source_span: Option<(usize, usize)>, + pub context_stack: Vec, + pub current_token: Option, } impl ParserError { - - pub fn with_token_span(message: &str, line: usize, column: usize, token_length: usize, source: &str) -> Self { - // Extract just the relevant line with limited context - let source_line = extract_source_line_with_context(source, line, column, 60); - let span_end = column + token_length; + + pub fn new(message: &str, line: usize, column: usize) -> Self { + ParserError { + message: message.to_string(), + line, + column, + source_line: None, + source_span: None, + context_stack: Vec::new(), + current_token: None, + } + } + + /// Create a parser error from a parser reference and token information + pub fn from_parser(parser: &Parser, message: &str, line: usize, column: usize, token_length: usize) -> Self { + let source = parser.get_source_text(); - // Adjust column if we've added ellipsis at the start + let source_line = extract_source_line_with_context(&source, line, column, 60); + let span_end = column + token_length; + let (adjusted_column, adjusted_span_end) = if source_line.starts_with("...") { - (column.min(60) + 3, span_end.min(60) + 3) + let adjusted_col = column.min(60) + 3; + let adjusted_end = adjusted_col + token_length; + (adjusted_col, adjusted_end) } else { (column, span_end) }; - + + let context_stack = parser.get_context_stack_info(); + + let current_token = parser.peek_token().cloned(); + ParserError { message: message.to_string(), line, column, source_line: Some(source_line), source_span: Some((adjusted_column, adjusted_span_end)), + context_stack, + current_token, + } + } + + /// Create a parser error from the current token with an immutable reference + pub fn at_current(parser: &Parser, message: &str) -> Self { + if let Some(token) = parser.peek_token() { + Self::from_parser( + parser, + message, + token.line, + token.column, + token.length + ) + } else if let Some(token) = parser.previous() { + Self::from_parser( + parser, + message, + token.line, + token.column, + token.length + ) + } else { + Self::new(message, 0, 0) } } + + /// Create a parser error from the current token with a mutable reference + pub fn at_current_mut(parser: &mut Parser, message: &str) -> Self { + Self::at_current(&*parser, message) + } + + /// Create a parser error from the previous token with an immutable reference + pub fn at_previous(parser: &Parser, message: &str) -> Self { + if let Some(token) = parser.previous() { + Self::from_parser( + parser, + message, + token.line, + token.column, + token.length + ) + } else if let Some(token) = parser.peek_token() { + Self::from_parser( + parser, + message, + token.line, + token.column, + token.length + ) + } else { + // Fallback if no token is available + Self::new(message, 0, 0) + } + } + + /// Create a parser error from the previous token with a mutable reference + pub fn at_previous_mut(parser: &mut Parser, message: &str) -> Self { + Self::at_previous(&*parser, message) + } } @@ -59,14 +140,47 @@ impl fmt::Display for ParserError { write!(f, " ")?; } + // Calculate how many carets to print (limited by the actual line length) + let visible_end = if let Some(line) = &self.source_line { + end.min(start + line.len() - start.min(line.len())) + } else { + end + }; + // Print carets for the span length - for _ in start..end.max(start+1) { + for _ in start..visible_end.max(start+1) { write!(f, "^")?; } writeln!(f)?; + + // Print current token information if available + if let Some(token) = &self.current_token { + writeln!(f, "\nCurrent token: {:#?}", token.token_type)?; + } + + // Print context stack information if available + if !self.context_stack.is_empty() { + writeln!(f, "\nLexical context stack (newest first):")?; + for (i, context) in self.context_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, context)?; + } + } } else { writeln!(f, "at line {}, column {}", self.line, self.column)?; + + // Print current token information if available + if let Some(token) = &self.current_token { + writeln!(f, "\nCurrent token: {:#?}", token.token_type)?; + } + + // Print context stack information if available + if !self.context_stack.is_empty() { + writeln!(f, "\nLexical context stack (newest first):")?; + for (i, context) in self.context_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, context)?; + } + } } Ok(()) @@ -88,7 +202,6 @@ fn num_digits(n: usize) -> usize { count } - impl std::error::Error for ParserError {} impl From for ParserError { @@ -99,6 +212,8 @@ impl From for ParserError { column: error.column, source_line: None, source_span: None, + context_stack: Vec::new(), + current_token: None, } } } @@ -149,5 +264,44 @@ fn extract_source_line_with_context(source: &str, line_number: usize, column: us result } -/// Type alias for parser results pub type ParseResult = Result; + +#[macro_export] +macro_rules! parser_error_at_current { + ($self:expr, $message:expr) => { + $crate::parser::error::ParserError::at_current($self, $message) + }; + ($self:expr, $fmt:expr, $($arg:tt)*) => { + $crate::parser::error::ParserError::at_current($self, &format!($fmt, $($arg)*)) + }; +} + +#[macro_export] +macro_rules! parser_error_at_previous { + ($self:expr, $message:expr) => { + $crate::parser::error::ParserError::at_previous($self, $message) + }; + ($self:expr, $fmt:expr, $($arg:tt)*) => { + $crate::parser::error::ParserError::at_previous($self, &format!($fmt, $($arg)*)) + }; +} + +#[macro_export] +macro_rules! parser_error_at_current_mut { + ($self:expr, $message:expr) => { + $crate::parser::error::ParserError::at_current_mut($self, $message) + }; + ($self:expr, $fmt:expr, $($arg:tt)*) => { + $crate::parser::error::ParserError::at_current_mut($self, &format!($fmt, $($arg)*)) + }; +} + +#[macro_export] +macro_rules! parser_error_at_previous_mut { + ($self:expr, $message:expr) => { + $crate::parser::error::ParserError::at_previous_mut($self, $message) + }; + ($self:expr, $fmt:expr, $($arg:tt)*) => { + $crate::parser::error::ParserError::at_previous_mut($self, &format!($fmt, $($arg)*)) + }; +} diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 0cfaadc..cadc38c 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -1,5 +1,7 @@ +use super::prelude::*; + use crate::ast::*; -use crate::lexer::{Token, TokenType, TemplatePart}; +use crate::lexer::{Token, TokenType, TemplatePart, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -24,7 +26,7 @@ pub enum Precedence { Prefix, // ! ~ + - ++ -- typeof void delete Postfix, // ++ -- Call, // . [] () - //Primary + Primary } impl Parser { @@ -96,13 +98,13 @@ impl Parser { } }, // Await expression - Some(TokenType::Await) if self.state.allow_await => { + Some(TokenType::Await) if self.allows_await() => { self.advance(); let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; Expression::Await(Box::new(argument)) }, // Yield expression - Some(TokenType::Yield) if self.state.allow_yield => { + Some(TokenType::Yield) if self.allows_yield() => { self.advance(); let delegate = self.match_token(&TokenType::Star); @@ -212,24 +214,12 @@ impl Parser { match temp_parser.parse_expression() { Ok(expr) => expressions.push(expr), Err(e) => { - return Err(super::error::ParserError::with_token_span( - &format!("Invalid expression in template literal: {}", e.message), - token_line, - token_column, - token_length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Invalid expression in template literal: {}", e.message)); } } }, Err(e) => { - return Err(super::error::ParserError::with_token_span( - &format!("Error tokenizing expression in template literal: {}", e.message), - token_line, - token_column, - token_length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Error tokenizing expression in template literal: {}", e.message)); } } @@ -247,17 +237,7 @@ impl Parser { if quasis.len() == expressions.len() { quasis.push("".into()); } else { - return Err(super::error::ParserError::with_token_span( - &format!( - "Invalid template literal: expected {} quasis but got {}", - expressions.len() + 1, - quasis.len(), - ), - token_line, - token_column, - token_length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Invalid template literal: expected {} quasis but got {}", expressions.len() + 1, quasis.len())); } } @@ -274,6 +254,7 @@ impl Parser { //Some(TokenType::Get) | //Some(TokenType::Set) | //Some(TokenType::From) => { + let name = self.expect_identifier("Expected identifier in expression")?; if self.check(&TokenType::Arrow) { let param = Expression::Identifier(name); @@ -283,114 +264,65 @@ impl Parser { Expression::Identifier(name) }, Some(TokenType::LeftParen) => { + //println!("In ( {:#?}", self.peek_token_type()); + self.advance(); // Consume the '(' - // TODO tricky tricky - - self.advance(); // consume '(' - - //println!("In ("); - - // TODO IIFE + // Handle empty parameter list: () => ... + if self.match_token(&TokenType::RightParen) { + return if self.match_token(&TokenType::Arrow) { + self.parse_arrow_function_body(vec![], false) + } else { + let token = self.previous().unwrap(); + Err(parser_error_at_current!(self, "Unexpected empty parentheses '()'")) + }; + } -// if self.match_token(&TokenType::LeftParen) { -// let arguments = self.parse_arguments()?; -// body = Expression::Call { -// callee: Box::new(body), -// arguments, -// optional: false, -// }; -// } - + //println!("Here 1 current token {:#?}", self.peek_token_type()); + let mut expr = self.parse_expression()?; + + //println!("Here 2"); + // Handle single-parameter or nested parentheses: (x) => ..., ((expr)) + if self.match_token(&TokenType::RightParen) { + if self.match_token(&TokenType::Arrow) { + let params = match expr { + //Expression::Identifier(_) => vec![expr], + //Expression::Sequence(seq) => seq, + Expression::Sequence(seq) => seq,//self.flatten_sequence(seq), + _ => vec![expr], + }; + return self.parse_arrow_function_body(params, false); + } + } else if self.check(&TokenType::Comma) { + //println!("Some comma {:#?}", self.peek_token_type()); - match self.parse_expression() { - Ok(expr) => { - //println!("Parsed expr {:#?}", expr); + // Handle comma-separated parameters: (a, b, c) + let mut params = vec![expr]; + while self.match_token(&TokenType::Comma) { //println!("Current token {:#?}", self.peek_token_type()); - - match expr { - Expression::ArrowFunction { .. } => { - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - return Ok(expr); - } - _ => {}, - } - - let mut params = vec![]; - - // TODO better way to extract Expressions into Patterns - - match expr { - // TODO sequences are tuples or maybe just keep it as is? - Expression::Sequence(ref seq) => { - println!("Found sequence of identifiers {:#?}", seq); - for item in seq { - match item { - Expression::Identifier(ref name) => { - params.push(Expression::Identifier(name.clone())); - }, - _ => {}, // TODO rest - } - } - }, - Expression::Identifier(ref name) => { - println!("Found identifier {:#?}", name); - params.push(Expression::Identifier(name.clone())); - }, - /* - Expression::ArrowFunction { .. } => { - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - return Ok(expr); - } - */ - _ => {}, - } - - if self.match_token(&TokenType::Arrow) { - println!("Now I am in body of arrow function"); - - let body = self.parse_arrow_function_body(params, false)?; - - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - return Ok(body); - } - - //println!("Not in body of arrow function just generic expression"); - - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - - if self.match_token(&TokenType::Arrow) { - - println!("Acumulated params {:#?}", params); - - println!("Backtrack it was actually arrow function"); - - let body = self.parse_arrow_function_body(params, false)?; - return Ok(body); - } - return Ok(expr); - }, - Err(err) => { - println!("In error"); - - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; - - println!("Current token {:#?}", self.peek_token_type()); - - if self.match_token(&TokenType::Arrow) { - println!("Now I am in body of arrow function"); - - // TODO skipped over params - let params = vec![]; - let body = self.parse_arrow_function_body(params, false)?; - return Ok(body); - } + params.push(self.parse_expression_with_precedence(Precedence::Assignment)?); + } + self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; + return if self.match_token(&TokenType::Arrow) { + self.parse_arrow_function_body(params, false) + } else { + Ok(Expression::Sequence(params)) + }; + } else { + self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + } - return Err(err); - }, + // Handle expressions after ')': ., [ or ( + if self.match_token(&TokenType::Dot) { + expr = self.parse_expression_with_precedence(Precedence::Assignment)?; + } else if self.check(&TokenType::LeftBracket) || self.check(&TokenType::LeftParen) { + expr = self.parse_expression_with_precedence(Precedence::Call)?; } + + expr }, Some(TokenType::LeftBracket) => { + //println!("I am here"); self.advance(); let mut elements = Vec::new(); while !self.check(&TokenType::RightBracket) && !self.is_at_end() { @@ -439,52 +371,11 @@ impl Parser { } } } - - // Parse property key - let key = if self.match_token(&TokenType::LeftBracket) { - // Computed property key - let expr = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property key")?; - PropertyKey::Computed(expr) - } else if self.match_token(&TokenType::Hash) { - // Private identifier (class fields/methods) - let name = self.expect_identifier("Expected private identifier name")?; - PropertyKey::PrivateIdentifier(name) - } else if let Some(TokenType::StringLiteral(_)) = self.peek_token_type() { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { - PropertyKey::StringLiteral(s.into_boxed_str()) - } else { - unreachable!() - } - } else if let Some(TokenType::NumberLiteral(_)) = self.peek_token_type() { - if let TokenType::NumberLiteral(n) = self.advance().unwrap().token_type { - PropertyKey::NumericLiteral(n) - } else { - unreachable!() - } - } else if self.check(&TokenType::Default) { - self.advance(); - PropertyKey::Identifier("default".into()) - } else if self.check(&TokenType::Get) { - self.advance(); - PropertyKey::Identifier("get".into()) - } else if self.check(&TokenType::Set) { - self.advance(); - PropertyKey::Identifier("set".into()) - } else if self.check(&TokenType::From) { - self.advance(); - PropertyKey::Identifier("from".into()) - } else if self.check(&TokenType::As) { - self.advance(); - PropertyKey::Identifier("as".into()) - } else if self.check(&TokenType::For) { - self.advance(); - PropertyKey::Identifier("for".into()) - } else { - // Identifier - let name = self.expect_identifier("Expected property name 1")?; - PropertyKey::Identifier(name) - }; + + // Use with_context for property key parsing + let key = self.with_context(LexicalContext::PropertyKey, |parser| { + parser.parse_property_key() + })?; let computed = matches!(key, PropertyKey::Computed(_)); @@ -497,7 +388,10 @@ impl Parser { }; let params = self.parse_function_params()?; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_async, is_generator)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; properties.push(ObjectProperty::Method { key, @@ -538,6 +432,8 @@ impl Parser { } if !self.check(&TokenType::RightBrace) { + //println!("Now have token {:#?}", self.peek_token()); + self.consume(&TokenType::Comma, "Expected ',' after property")?; // Allow trailing comma @@ -548,11 +444,12 @@ impl Parser { break; } } - + self.consume(&TokenType::RightBrace, "Expected '}' after object literal")?; - + Expression::Object(properties) }, + Some(TokenType::Function) => self.parse_function_expression()?, Some(TokenType::Class) => self.parse_class_expression()?, Some(TokenType::New) => { @@ -569,23 +466,11 @@ impl Parser { } } else { let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::with_token_span( - "Expected 'target' after 'new.'", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); } } else { let token = self.peek_token().unwrap(); - return Err(super::error::ParserError::with_token_span( - "Expected 'target' after 'new.'", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); } } else { // Regular new expression @@ -618,7 +503,7 @@ impl Parser { } } - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; + self.consume(&TokenType::RightParen, "Expected ')' after arguments 1")?; args } else { Vec::new() @@ -645,14 +530,11 @@ impl Parser { }, Some(TokenType::Async) if self.is_async_function() => self.parse_async_function_expression()?, _ => { + + // TODO trailing comma gets there + let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap()); - return Err(super::error::ParserError::with_token_span( - &format!("Unexpected token in expression: {:?}", token.token_type), - token.line, - token.column, - token.length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Unexpected token in expression: {:?}", token.token_type)); } }; @@ -726,13 +608,7 @@ impl Parser { if self.match_any(&[TokenType::PlusPlus, TokenType::MinusMinus]) { if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. }) { let token = self.previous().unwrap(); - return Err(super::error::ParserError::with_token_span( - "Invalid left-hand side in postfix operation", - token.line, - token.column, - token.length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Invalid left-hand side in postfix operation")); } let operator = match self.previous().unwrap().token_type { @@ -754,9 +630,15 @@ impl Parser { match current_precedence { Precedence::Comma => { self.advance(); // consume comma - let right = self.parse_expression_with_precedence(Precedence::Assignment)?; - // TODO if None or Empty its fine - expr = Expression::Sequence(vec![expr, right]); + + if !self.check(&TokenType::RightParen) { + let right = self.parse_expression_with_precedence(Precedence::Assignment)?; + if let Expression::Sequence(ref mut seq) = expr { + seq.push(right); + } else { + expr = Expression::Sequence(vec![expr, right]); + } + } }, Precedence::Assignment => { // Match assignment operator @@ -800,13 +682,7 @@ impl Parser { if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. } | Expression::Array(_) | Expression::Object(_)) { let binding = Token::new(TokenType::EOF, 0, 0, 0); let token = self.previous().unwrap_or(&binding); - return Err(super::error::ParserError::with_token_span( - "Invalid left-hand side in assignment", - token.line, - token.column, - token.length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Invalid left-hand side in assignment")); } let right = self.parse_expression_with_precedence(Precedence::Assignment)?; @@ -891,6 +767,17 @@ impl Parser { right: Box::new(right), }; }, + // TODO implement +// Precedence::NullishCoalescing => { +// self.advance(); // consume '??' +// let right = self.parse_expression_with_precedence(Precedence::NullishCoalescing)?; + // +// expr = Expression::Logical { +// operator: LogicalOperator::NullishCoalescing, +// left: Box::new(expr), +// right: Box::new(right), +// }; +// }, Precedence::BitwiseOr | Precedence::BitwiseXor | Precedence::BitwiseAnd | @@ -927,13 +814,7 @@ impl Parser { TokenType::InstanceOf => BinaryOperator::InstanceOf, _ => { let token = self.previous().unwrap(); - return Err(super::error::ParserError::with_token_span( - &format!("Unexpected token: {:?}", token_type), - token.line, - token.column, - token.length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Unexpected token: {:?}", token_type)); } }; @@ -970,39 +851,19 @@ impl Parser { }; }, Precedence::Call => { + + //println!("In call {:#?}", self.peek_token_type()); + if self.match_token(&TokenType::Dot) { - let property = if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { - self.advance(); - name.into_boxed_str() - } - else if self.check(&TokenType::Default) { - self.advance(); - "default".into() - } else if self.check(&TokenType::Get) { - self.advance(); - "get".into() - } else if self.check(&TokenType::Set) { - self.advance(); - "set".into() - } else if self.check(&TokenType::From) { - self.advance(); - "from".into() - } else if self.check(&TokenType::As) { - self.advance(); - "as".into() - } else if self.check(&TokenType::For) { - self.advance(); - "for".into() - } else { - return Err(super::error::ParserError::with_token_span( - "Expected property name 3", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column, - self.peek_token().unwrap().length, - &self.get_source_text() - )); - }; + let property = self.with_context(LexicalContext::MemberAccess, |parser| { + let property = if let Some(TokenType::Identifier(name)) = parser.peek_token_type().cloned() { + parser.advance(); + return Ok(name.into_boxed_str()) + } else { + return Err(parser_error_at_current!(parser, "Expected property name 3")); + }; + })?; expr = Expression::Member { object: Box::new(expr), @@ -1011,6 +872,7 @@ impl Parser { optional: false, }; } else if self.match_token(&TokenType::LeftBracket) { + //println!("This case"); // Member access with bracket notation let property = self.parse_expression()?; self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; @@ -1067,7 +929,7 @@ impl Parser { } } - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; + self.consume(&TokenType::RightParen, "Expected ')' after arguments 2")?; expr = Expression::Call { callee: Box::new(expr), @@ -1085,4 +947,16 @@ impl Parser { Ok(expr) } + // TODO remove need for this flatten in place when processing comma +// fn flatten_sequence(&self, exprs: Vec) -> Vec { +// let mut flattened = Vec::new(); +// for expr in exprs { +// match expr { +// Expression::Sequence(seq) => flattened.extend(self.flatten_sequence(seq)), +// _ => flattened.push(expr), +// } +// } +// flattened +// } + } diff --git a/src/parser/functions.rs b/src/parser/functions.rs index 4f4e2c4..c7041cb 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -1,5 +1,7 @@ +use super::prelude::*; + use crate::ast::*; -use crate::lexer::TokenType; +use crate::lexer::{TokenType, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -11,17 +13,11 @@ impl Parser { let is_generator = self.match_token(&TokenType::Star); let id = self.expect_identifier("Expected function name")?; - // Save and update parser state - let (prev_in_function, prev_allow_yield) = (self.state.in_function, self.state.allow_yield); - self.state.in_function = true; - self.state.allow_yield = is_generator; - let params = self.parse_function_params()?; - let body = self.parse_function_body(false, is_generator)?; - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + let body = self.parse_function_body(is_generator, false)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; Ok(FunctionDeclaration { id, @@ -38,21 +34,17 @@ impl Parser { let is_generator = self.match_token(&TokenType::Star); // Optional function name for function expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) - .then(|| self.expect_identifier("Expected function name")) - .transpose()?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield) = (self.state.in_function, self.state.allow_yield); - self.state.in_function = true; - self.state.allow_yield = is_generator; + let id = if matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { + Some(self.expect_identifier("Expected function name")?) + } else { + None + }; let params = self.parse_function_params()?; - let body = self.parse_function_body(false, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + let body = self.parse_function_body(is_generator, false)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; Ok(Expression::Function { id, @@ -68,26 +60,19 @@ impl Parser { self.advance(); // consume 'function' let is_generator = self.match_token(&TokenType::Star); - + // Optional function name for function expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) - .then(|| self.expect_identifier("Expected function name")) - .transpose()?; - - // Save and update parser state - let (prev_in_function, prev_allow_yield, prev_allow_await) = - (self.state.in_function, self.state.allow_yield, self.state.allow_await); - self.state.in_function = true; - self.state.allow_yield = is_generator; - self.state.allow_await = true; + let id = if matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { + Some(self.expect_identifier("Expected function name")?) + } else { + None + }; let params = self.parse_function_params()?; - let body = self.parse_function_body(true, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - self.state.allow_await = prev_allow_await; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + let body = self.parse_function_body(is_generator, true)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; Ok(Expression::Function { id, @@ -98,72 +83,28 @@ impl Parser { }) } - pub fn parse_function_params(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; - - let mut params = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Rest parameter - let arg = self.parse_pattern()?; - params.push(Expression::Spread(Box::new(arg))); - break; // Rest parameter must be the last one - } else { - params.push(self.parse_pattern()?); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after function parameters")?; - - Ok(params) - } - - pub fn parse_function_body(&mut self, _is_async: bool, _is_generator: bool) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; - - let mut body = Vec::new(); - - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - body.push(self.parse_statement()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; - - Ok(body) - } - pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { - // Save and update parser state - let (prev_in_function, prev_allow_await) = (self.state.in_function, self.state.allow_await); - self.state.in_function = true; - self.state.allow_await = is_async; - + // Create a new function body context with appropriate yield/await flags + let body = if self.check(&TokenType::LeftBrace) { // Block body - let statements = self.parse_function_body(is_async, false)?; - ArrowFunctionBody::Block(statements) + + let body = self.parse_function_body(false, is_async)?; + + ArrowFunctionBody::Block(body) } else { // Expression body - let expr = self.parse_expression()?; - ArrowFunctionBody::Expression(Box::new(expr)) + let function_body_context = LexicalContext::FunctionBody { + allow_yield: false, + allow_await: is_async + }; + + self.with_context(function_body_context, |parser| { + let expr = parser.parse_expression()?; + Ok(ArrowFunctionBody::Expression(Box::new(expr))) + })? }; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_await = prev_allow_await; - + Ok(Expression::ArrowFunction { params, body, @@ -178,20 +119,11 @@ impl Parser { let is_generator = self.match_token(&TokenType::Star); let id = self.expect_identifier("Expected function name")?; - // Save and update parser state - let (prev_in_function, prev_allow_yield, prev_allow_await) = - (self.state.in_function, self.state.allow_yield, self.state.allow_await); - self.state.in_function = true; - self.state.allow_yield = is_generator; - self.state.allow_await = true; - let params = self.parse_function_params()?; - let body = self.parse_function_body(true, is_generator)?; - - // Restore previous state - self.state.in_function = prev_in_function; - self.state.allow_yield = prev_allow_yield; - self.state.allow_await = prev_allow_await; + + self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + let body = self.parse_function_body(is_generator, true)?; + self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; Ok(FunctionDeclaration { id, @@ -202,6 +134,54 @@ impl Parser { }) } + pub fn parse_function_params(&mut self) -> ParseResult> { + self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; + + // Create parameter name context with current strict mode + let param_context = LexicalContext::ParameterName { + strict_mode: self.state.in_strict_mode + }; + + self.with_context(param_context, |parser| { + let mut params = Vec::new(); + + if !parser.check(&TokenType::RightParen) { + loop { + if parser.match_token(&TokenType::Ellipsis) { + // Rest parameter + let arg = parser.parse_pattern()?; + params.push(Expression::Spread(Box::new(arg))); + break; // Rest parameter must be the last one + } else { + params.push(parser.parse_pattern()?); + } + if !parser.match_token(&TokenType::Comma) { + break; + } + // Handle trailing comma + if parser.check(&TokenType::RightParen) { + break; + } + } + } + + parser.consume(&TokenType::RightParen, "Expected ')' after function parameters")?; + + Ok(params) + }) + } + + pub fn parse_function_body(&mut self, is_async: bool, is_generator: bool) -> ParseResult> { + let function_body_context = LexicalContext::FunctionBody { allow_yield: is_generator, allow_await: is_async }; + self.with_context(function_body_context, |parser| { + let mut body = Vec::new(); + while !parser.check(&TokenType::RightBrace) && !parser.is_at_end() { + body.push(parser.parse_statement()?); + } + Ok(body) + }) + } + // Helper method to check if we're looking at an async function pub fn is_async_function(&self) -> bool { if let Some(TokenType::Async) = self.peek_token_type() { @@ -211,5 +191,4 @@ impl Parser { } false } - } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e588743..e760443 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8,11 +8,14 @@ mod state; mod core; mod expressions; mod statements; -mod declarations; mod patterns; mod functions; mod classes; mod modules; - +mod asi; pub use self::core::Parser; + +mod prelude; + +pub use prelude::*; \ No newline at end of file diff --git a/src/parser/modules.rs b/src/parser/modules.rs index b209379..18cf5fa 100644 --- a/src/parser/modules.rs +++ b/src/parser/modules.rs @@ -1,5 +1,8 @@ +use super::prelude::*; + + use crate::ast::*; -use crate::lexer::{Token, TokenType}; +use crate::lexer::{Token, TokenType, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -22,7 +25,8 @@ impl Parser { } pub fn parse_module(&mut self) -> ParseResult { - // Set strict mode for modules + // Set strict mode for modules - this is fine to keep as state + // since modules are always in strict mode self.state.in_strict_mode = true; let mut body = Vec::new(); @@ -76,7 +80,7 @@ impl Parser { // Continue to named imports } else if !self.check(&TokenType::From) { // If no comma and not 'from', it's an error - return Err(self.error_unexpected("Expected ',' or 'from' after default import")); + return Err(parser_error_at_current!(self, "Expected ',' or 'from' after default import")); } } @@ -90,7 +94,7 @@ impl Parser { if !specifiers.is_empty() { source = self.parse_from_clause()?; } else { - return Err(self.error_unexpected("Expected import specifiers")); + return Err(parser_error_at_current!(self, "Expected import specifiers")); } } @@ -110,7 +114,7 @@ impl Parser { assertions, }) } else { - Err(self.error_unexpected("Missing module source in import statement")) + Err(parser_error_at_current!(self, "Missing module source in import statement")) } } @@ -119,7 +123,7 @@ impl Parser { if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { Ok(Some(s.into_boxed_str())) } else { - Err(self.error_unexpected("Expected string literal for module source")) + Err(parser_error_at_current!(self, "Expected string literal for module source")) } } @@ -132,51 +136,58 @@ impl Parser { // Helper method to parse namespace import: * as name pub fn parse_namespace_import(&mut self) -> ParseResult { self.consume(&TokenType::As, "Expected 'as' after '*'")?; - // First advance to get the token - let token = self.advance().unwrap().clone(); - // Then use the cloned token for identifier_name - let local = self.identifier_name(&token)?; + // Use ImportExport context for parsing the identifier + let local = self.with_context(LexicalContext::ImportExport, |parser| { + parser.expect_identifier("Expected namespace import name") + })?; + Ok(ImportSpecifier::Namespace(local)) } // Helper method to parse default import: defaultExport pub fn parse_default_import(&mut self) -> ParseResult { - let local = self.expect_identifier("Expected default import name")?; + // Use ImportExport context for parsing the identifier + let local = self.with_context(LexicalContext::ImportExport, |parser| { + parser.expect_identifier("Expected default import name") + })?; + Ok(ImportSpecifier::Default(local)) } // Helper method to parse named imports: { name1, name2 as alias2 } pub fn parse_named_imports(&mut self) -> ParseResult> { - let mut specifiers = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - let imported = self.expect_identifier("Expected imported name")?; - - let local = if self.match_token(&TokenType::As) { - self.expect_identifier("Expected local name after 'as'")? - } else { - imported.clone() - }; - - specifiers.push(ImportSpecifier::Named { - imported, - local, - }); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; + self.with_context(LexicalContext::ImportExport, |parser| { + let mut specifiers = Vec::new(); + + if !parser.check(&TokenType::RightBrace) { + loop { + let imported = parser.expect_identifier("Expected imported name")?; + + let local = if parser.match_token(&TokenType::As) { + parser.expect_identifier("Expected local name after 'as'")? + } else { + imported.clone() + }; + + specifiers.push(ImportSpecifier::Named { + imported, + local, + }); + + if !parser.match_token(&TokenType::Comma) { + break; + } + + // Handle trailing comma + if parser.check(&TokenType::RightBrace) { + break; + } } } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after named imports")?; - Ok(specifiers) + + parser.consume(&TokenType::RightBrace, "Expected '}' after named imports")?; + Ok(specifiers) + }) } pub fn parse_import_assertions(&mut self) -> ParseResult> { @@ -186,13 +197,17 @@ impl Parser { if !self.check(&TokenType::RightBrace) { loop { - let key = self.expect_identifier("Expected assertion key")?; + // Use ImportExport context for parsing assertion keys + let key = self.with_context(LexicalContext::ImportExport, |parser| { + parser.expect_identifier("Expected assertion key") + })?; + self.consume(&TokenType::Colon, "Expected ':' after assertion key")?; let value = if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { s.into_boxed_str() } else { - return Err(self.error_unexpected("Expected string literal for assertion value")); + return Err(parser_error_at_current!(self, "Expected string literal for assertion value")); }; assertions.push(ImportAssertion { key, value }); @@ -237,41 +252,26 @@ impl Parser { } // If we get here, it's an invalid export statement - Err(super::error::ParserError::with_token_span( - "Invalid export statement. Expected '*', default, declaration, or named exports", - start_token.line, - start_token.column, - start_token.length, - &self.get_source_text() - )) + Err(parser_error_at_current!(self, "Invalid export statement. Expected '*', default, declaration, or named exports")) } // Helper method for export * from "module" or export * as name from "module" pub fn parse_export_all(&mut self, start_token: &Token) -> ParseResult { + // Use ImportExport context for parsing the exported name let exported = if self.match_token(&TokenType::As) { - Some(self.expect_identifier("Expected exported name after 'as'")?) + Some(self.with_context(LexicalContext::ImportExport, |parser| { + parser.expect_identifier("Expected exported name after 'as'") + })?) } else { None }; if !self.match_token(&TokenType::From) { - return Err(super::error::ParserError::with_token_span( - "Expected 'from' after export *", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column, - self.peek_token().unwrap().length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Expected 'from' after export *")); } let source = self.parse_module_source()? - .ok_or_else(|| super::error::ParserError::with_token_span( - "Expected string literal for module source", - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text() - ))?; + .ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?; self.consume(&TokenType::Semicolon, "Expected ';' after export statement")?; @@ -311,13 +311,7 @@ impl Parser { } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { Declaration::Variable(self.parse_variable_declaration()?) } else { - return Err(super::error::ParserError::with_token_span( - "Expected declaration in export statement", - self.peek_token().unwrap().line, - self.peek_token().unwrap().column, - self.peek_token().unwrap().length, - &self.get_source_text() - )); + return Err(parser_error_at_current!(self, "Expected declaration in export statement")); }; Ok(Statement::Export(ExportDeclaration::Named { @@ -333,13 +327,7 @@ impl Parser { // Optional from clause let source = if self.match_token(&TokenType::From) { - Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::with_token_span( - "Expected string literal for module source", - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text() - ))?) + Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?) } else { None }; @@ -355,32 +343,33 @@ impl Parser { // Helper method to parse export specifiers: { name1, name2 as alias2 } pub fn parse_export_specifiers(&mut self) -> ParseResult> { - let mut specifiers = Vec::new(); - - if !self.check(&TokenType::RightBrace) { - loop { - let local = self.expect_identifier("Expected exported identifier")?; - let exported = if self.match_token(&TokenType::As) { - self.expect_identifier("Expected exported name after 'as'")? - } else { - local.clone() - }; - - specifiers.push(ExportSpecifier { local, exported }); - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightBrace) { - break; + self.with_context(LexicalContext::ImportExport, |parser| { + let mut specifiers = Vec::new(); + + if !parser.check(&TokenType::RightBrace) { + loop { + let local = parser.expect_identifier("Expected exported identifier")?; + let exported = if parser.match_token(&TokenType::As) { + parser.expect_identifier("Expected exported name after 'as'")? + } else { + local.clone() + }; + specifiers.push(ExportSpecifier { local, exported }); + + if !parser.match_token(&TokenType::Comma) { + break; + } + + // Handle trailing comma + if parser.check(&TokenType::RightBrace) { + break; + } } } - } - - self.consume(&TokenType::RightBrace, "Expected '}' after export specifiers")?; - Ok(specifiers) + + parser.consume(&TokenType::RightBrace, "Expected '}' after export specifiers")?; + Ok(specifiers) + }) } // Helper method to check if the current token starts a declaration diff --git a/src/parser/patterns.rs b/src/parser/patterns.rs index e2070c8..3e55617 100644 --- a/src/parser/patterns.rs +++ b/src/parser/patterns.rs @@ -1,5 +1,8 @@ +use super::prelude::*; + + use crate::ast::*; -use crate::lexer::TokenType; +use crate::lexer::{TokenType, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -8,10 +11,7 @@ impl Parser { pub fn parse_pattern(&mut self) -> ParseResult { match self.peek_token_type() { // Identifier pattern - Some(TokenType::Identifier(_)) | - Some(TokenType::Default) | - Some(TokenType::As) | - Some(TokenType::From) => { + Some(TokenType::Identifier(_)) => { let name = self.expect_identifier("Expected identifier in pattern")?; Ok(Expression::Identifier(name)) }, @@ -30,12 +30,14 @@ impl Parser { // Rest element must be the last one if !self.check(&TokenType::RightBrace) { - return Err(self.error_unexpected("Rest element must be the last element in object pattern")); + return Err(parser_error_at_current!(self, "Rest element must be the last element in object pattern")); } break; } else { // Regular property - let key = self.parse_property_key()?; + let key = self.with_context(LexicalContext::PropertyKey, |parser| { + parser.parse_property_key() + })?; // Handle shorthand: { x } let (value, computed, shorthand) = if !self.check(&TokenType::Colon) { @@ -55,7 +57,7 @@ impl Parser { (pattern, false, true) } } else { - return Err(self.error_unexpected("Invalid shorthand property in object pattern")); + return Err(parser_error_at_current!(self, "Invalid shorthand property in object pattern")); } } else { // Full syntax: { key: value } @@ -120,10 +122,10 @@ impl Parser { if !self.check(&TokenType::RightBracket) { if self.match_token(&TokenType::Comma) { if !self.check(&TokenType::RightBracket) { - return Err(self.error_unexpected("Rest element must be the last element in array pattern")); + return Err(parser_error_at_current!(self, "Rest element must be the last element in array pattern")); } } else { - return Err(self.error_unexpected("Expected ',' or ']' after rest element in array pattern")); + return Err(parser_error_at_current!(self, "Expected ',' or ']' after rest element in array pattern")); } } break; @@ -158,7 +160,7 @@ impl Parser { // Assignment pattern: x = 1 (handled by the caller) _ => { - Err(self.error_unexpected("Expected pattern")) + Err(parser_error_at_current!(self, "Expected pattern")) } } } diff --git a/src/parser/prelude.rs b/src/parser/prelude.rs new file mode 100644 index 0000000..a38df44 --- /dev/null +++ b/src/parser/prelude.rs @@ -0,0 +1,6 @@ +pub use crate::{ + parser_error_at_current, + parser_error_at_previous, + parser_error_at_current_mut, + parser_error_at_previous_mut, +}; \ No newline at end of file diff --git a/src/parser/state.rs b/src/parser/state.rs index 8f14400..2e46997 100644 --- a/src/parser/state.rs +++ b/src/parser/state.rs @@ -2,24 +2,14 @@ use std::collections::HashSet; pub struct ParserState { pub in_strict_mode: bool, - pub allow_yield: bool, - pub allow_await: bool, - pub in_loop: bool, - pub in_switch: bool, - pub in_function: bool, - pub labels: HashSet>, + pub labels: HashSet>, } impl ParserState { pub fn new() -> Self { Self { in_strict_mode: false, - allow_yield: false, - allow_await: false, - in_loop: false, - in_switch: false, - in_function: false, labels: HashSet::new(), } } -} \ No newline at end of file +} diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 202dffd..041ea3f 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -1,10 +1,44 @@ +use super::prelude::*; + + use crate::ast::*; -use crate::lexer::TokenType; +use crate::lexer::{Token, TokenType, LexicalContext}; use super::error::{ParserError, ParseResult}; use super::core::Parser; impl Parser { + // Variable declarations + pub fn parse_variable_declaration(&mut self) -> ParseResult { + let token = self.advance().cloned().unwrap_or_else(|| Token::new(TokenType::EOF, 0, 0, 0)); + + let kind = match token.token_type { + TokenType::Var => VariableKind::Var, + TokenType::Let => VariableKind::Let, + TokenType::Const => VariableKind::Const, + _ => unreachable!(), + }; + + // Parse first declarator (required) + let mut declarations = vec![self.parse_variable_declarator()?]; + + // Parse additional declarators separated by commas + while self.match_token(&TokenType::Comma) { + declarations.push(self.parse_variable_declarator()?); + } + + // Consume semicolon unless we're in a for-in/of loop context + let current_context = self.current_context(); + let is_in_loop_parameters = matches!(current_context, LexicalContext::LoopParameters); + + if !is_in_loop_parameters { + self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; + } + + Ok(VariableDeclaration { declarations, kind }) + } + + pub fn parse_statement(&mut self) -> ParseResult { match self.peek_token_type() { // Empty statement (just a semicolon) @@ -102,40 +136,16 @@ impl Parser { Ok(Statement::If { test, consequent, alternate }) } - /// Parse switch statement: switch (discriminant) { case/default... } - fn parse_switch(&mut self) -> ParseResult { - self.advance(); // consume 'switch' - self.consume(&TokenType::LeftParen, "Expected '(' after 'switch'")?; - - let discriminant = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after switch expression")?; - self.consume(&TokenType::LeftBrace, "Expected '{' to start switch block")?; - - // Save previous switch state - let prev_in_switch = self.state.in_switch; - self.state.in_switch = true; - - let mut cases = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - cases.push(self.parse_switch_case()?); - } - - self.consume(&TokenType::RightBrace, "Expected '}' to end switch block")?; - - // Restore previous switch state - self.state.in_switch = prev_in_switch; - - Ok(Statement::Switch { discriminant, cases }) - } - /// Parse a single case in a switch statement fn parse_switch_case(&mut self) -> ParseResult { let test = if self.match_token(&TokenType::Case) { + // After 'case', we expect an expression Some(self.parse_expression()?) } else if self.match_token(&TokenType::Default) { None } else { - return Err(self.error_unexpected("Expected 'case' or 'default'")); + println!("Current token {:#?}", self.peek_token()); + return Err(parser_error_at_current!(self, "Expected 'case' or 'default'")); }; self.consume(&TokenType::Colon, "Expected ':' after case value")?; @@ -153,50 +163,6 @@ impl Parser { Ok(SwitchCase { test, consequent }) } - /// Parse while statement: while (test) statement - fn parse_while(&mut self) -> ParseResult { - self.advance(); // consume 'while' - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - let body = Box::new(self.parse_statement()?); - - // Restore previous loop state - self.state.in_loop = prev_in_loop; - - Ok(Statement::Loop(LoopStatement::While { test, body })) - } - - /// Parse do-while statement: do statement while (test); - fn parse_do_while(&mut self) -> ParseResult { - self.advance(); // consume 'do' - - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - let body = Box::new(self.parse_statement()?); - - // Restore previous loop state - self.state.in_loop = prev_in_loop; - - self.consume(&TokenType::While, "Expected 'while' after do block")?; - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - self.consume(&TokenType::Semicolon, "Expected ';' after do-while statement")?; - - Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) - } - /// Parse try statement: try block [catch] [finally] fn parse_try(&mut self) -> ParseResult { self.advance(); // consume 'try' @@ -215,7 +181,7 @@ impl Parser { // Either catch or finally must be present if handler.is_none() && finalizer.is_none() { - return Err(self.error_unexpected("Expected 'catch' or 'finally' after try block")); + return Err(parser_error_at_current!(self, "Expected 'catch' or 'finally' after try block")); } Ok(Statement::Try { block, handler, finalizer }) @@ -226,18 +192,22 @@ impl Parser { // Optional catch parameter let param = self.match_token(&TokenType::LeftParen) .then(|| { - //let param = self.parse_pattern()?; - // TODO fixme - let param = None; - self.consume(&TokenType::RightParen, "Expected ')' after catch parameter")?; - // Explicitly specify the error type as ParserError - Ok::<_, super::error::ParserError>(param) + // Attempt to parse the parameter identifier + if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { + self.advance(); // Consume the identifier + self.consume(&TokenType::RightParen, "Expected ')' after catch parameter")?; + Ok(Expression::Identifier(name.into_boxed_str())) + } else { + // If not an identifier, it's an error + Err(parser_error_at_current!(self, "Expected identifier for catch parameter")) + } }) .transpose()?; let body = Box::new(self.parse_block()?); - - Ok(CatchClause { param: param.expect("REASON"), body }) + + Ok(CatchClause { param, body }) + } /// Parse throw statement: throw expression; @@ -246,13 +216,7 @@ impl Parser { // No line terminator allowed between throw and expression if self.previous_line_terminator() { - return Err(ParserError::with_token_span( - "Illegal newline after throw", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "Illegal newline after throw")); } let expr = self.parse_expression()?; @@ -261,137 +225,13 @@ impl Parser { Ok(Statement::Throw(expr)) } - /// Parse return statement: return [expression]; - fn parse_return(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'return' - - // Check if we're in a function - if !self.state.in_function { - return Err(ParserError::with_token_span( - "'return' statement outside of function", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); - } - - // Return with no value if semicolon or end of block - let argument = (!self.check(&TokenType::Semicolon) && - !self.check(&TokenType::RightBrace) && - !self.is_at_end() && - !self.previous_line_terminator()) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&TokenType::Semicolon, "Expected ';' after return statement")?; - - Ok(Statement::Return(argument)) - } - - /// Parse break statement: break [label]; - fn parse_break(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'break' - - // Check if we're in a loop or switch - if !self.state.in_loop && !self.state.in_switch { - return Err(ParserError::with_token_span( - "'break' statement outside of loop or switch", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); - } - - // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(ParserError::with_token_span( - &format!("Undefined label '{}'", label_name), - token.line, - token.column, - token.length, - &self.get_source_text(), - )); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume(&TokenType::Semicolon, "Expected ';' after break statement")?; - - Ok(Statement::Break(label)) - } - - /// Parse continue statement: continue [label]; - fn parse_continue(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'continue' - - // Check if we're in a loop - if !self.state.in_loop { - return Err(ParserError::with_token_span( - "'continue' statement outside of loop", - token.line, - token.column, - token.length, - &self.get_source_text(), - )); - } - - // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(ParserError::with_token_span( - &format!("Undefined label '{}'", label_name), - token.line, - token.column, - token.length, - &self.get_source_text(), - )); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume(&TokenType::Semicolon, "Expected ';' after continue statement")?; - - Ok(Statement::Continue(label)) - } - /// Parse with statement: with (object) statement fn parse_with(&mut self) -> ParseResult { self.advance(); // consume 'with' // Check if in strict mode if self.state.in_strict_mode { - return Err(ParserError::with_token_span( - "'with' statements are not allowed in strict mode", - self.previous().unwrap().line, - self.previous().unwrap().column, - self.previous().unwrap().length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "'with' statements are not allowed in strict mode")); } self.consume(&TokenType::LeftParen, "Expected '(' after 'with'")?; @@ -417,20 +257,15 @@ impl Parser { /// Parse labeled statement: identifier: statement fn parse_labeled(&mut self) -> ParseResult { let token = self.advance().cloned().unwrap(); - let label = self.identifier_name(&token)?; + let label = self.expect_identifier("Expected label name")?; + self.consume(&TokenType::Colon, "Expected ':' after label")?; // Add label to the set of active labels let label_exists = !self.state.labels.insert(label.clone()); if label_exists { - return Err(ParserError::with_token_span( - &format!("Label '{}' has already been declared", label), - token.line, - token.column, - token.length, - &self.get_source_text(), - )); + return Err(parser_error_at_current!(self, "Label '{}' has already been declared", label)); } // Parse the labeled statement @@ -455,35 +290,23 @@ impl Parser { /// Parse expression statement: expression; pub fn parse_expression_statement(&mut self) -> ParseResult { - // Handle directives (like "use strict") + //println!("in parse_expression_statement"); + let start_pos = self.current; - //println!("Before parse expression"); let expr = self.parse_expression()?; - //println!("After parse expression: {:#?}", expr); + //println!("in parse_expression_statement parsed {:#?}", expr); // Check for directive prologue let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { - //println!("This case"); // Only consider as directive if it's at the beginning of a function/program // and is a simple string literal (not an expression) start_pos == 0 || self.previous().unwrap().token_type == TokenType::LeftBrace } else { - //println!("That case"); false }; - - //println!("now need a ;"); - - - //if self.check(&TokenType::) - //if self.check(&TokenType::LeftParen) { - //println!("Immediately invoked?"); - //} - - //println!(self.peek_token_type()) self.consume(&TokenType::Semicolon, "Expected ';' after expression statement")?; // If this is a "use strict" directive, update parser state @@ -498,241 +321,552 @@ impl Parser { Ok(Statement::Expression(expr)) } - /// Parse for statement: for ([init]; [test]; [update]) statement - fn parse_for(&mut self) -> ParseResult { - self.advance(); // consume 'for' + + /// Parse switch statement: switch (discriminant) { case/default... } + fn parse_switch(&mut self) -> ParseResult { + self.advance(); // consume 'switch' + self.consume(&TokenType::LeftParen, "Expected '(' after 'switch'")?; + + let discriminant = self.parse_expression()?; + self.consume(&TokenType::RightParen, "Expected ')' after switch expression")?; + + self.consume(&TokenType::LeftBrace, "Expected '{' before switch cases")?; + + // Use SwitchBody context instead of state flag + let cases = self.with_context(LexicalContext::SwitchBody, |parser| { + let mut inner_cases = Vec::new(); + let mut has_default = false; + + while !parser.check(&TokenType::RightBrace) && !parser.is_at_end() { + let case = parser.parse_switch_case()?; + + // Validate only one default case + if case.test.is_none() { + if has_default { + return Err(parser_error_at_current!(parser, "Multiple default clauses in switch statement")); + } + has_default = true; + } + + inner_cases.push(case); + } + + Ok(inner_cases) + })?; + + self.consume(&TokenType::RightBrace, "Expected '}' after switch cases")?; + + Ok(Statement::Switch { discriminant, cases }) + } + + /// Parse while statement: while (test) statement + fn parse_while(&mut self) -> ParseResult { + self.advance(); // consume 'while' + self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; - // Check for for-await-of - let is_await = self.match_token(&TokenType::Await); + let test = self.parse_expression()?; + self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?; + // Use LoopBody context instead of state flag + let body = self.with_context(LexicalContext::LoopBody, |parser| { + parser.parse_statement().map(Box::new) + })?; + + Ok(Statement::Loop(LoopStatement::While { test, body })) + } + + /// Parse do-while statement: do statement while (test); + fn parse_do_while(&mut self) -> ParseResult { + self.advance(); // consume 'do' + + // Use LoopBody context instead of state flag + let body = self.with_context(LexicalContext::LoopBody, |parser| { + parser.parse_statement().map(Box::new) + })?; + + self.consume(&TokenType::While, "Expected 'while' after do block")?; + self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; + + let test = self.parse_expression()?; + + self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; + self.consume(&TokenType::Semicolon, "Expected ';' after do-while statement")?; + + Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) + } + + /// Parse break statement: break [label]; + fn parse_break(&mut self) -> ParseResult { + let token = self.advance().cloned().unwrap(); // consume 'break' + + if !self.is_in_loop() && !self.is_in_switch() { + return Err(parser_error_at_current!(self, "'break' statement outside of loop or switch")); + } - // Save previous loop state - let prev_in_loop = self.state.in_loop; - self.state.in_loop = true; - - // Parse initialization - let result = if self.match_token(&TokenType::Semicolon) { - // No initialization - standard for loop with empty init - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; + // Optional label + let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { + if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { + self.advance(); - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; + // Verify label exists + let label_name = name.into_boxed_str(); + if !self.state.labels.contains(&label_name) { + return Err(parser_error_at_current!(self, "Undefined label '{}'", label_name)); + } - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: None, - test, - update, - body + Some(label_name) + } else { + None } - } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { - // Variable declaration initialization - let decl = self.parse_variable_declaration()?; - - // Check for for-in or for-of - if self.check(&TokenType::In) { - // for-in loop with variable declaration - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); + } else { + None + }; + + self.consume(&TokenType::Semicolon, "Expected ';' after break statement")?; + + Ok(Statement::Break(label)) + } + + /// Parse continue statement: continue [label]; + fn parse_continue(&mut self) -> ParseResult { + let token = self.advance().cloned().unwrap(); // consume 'continue' + + // Check if we're in a loop using context method + if !self.is_in_loop() { + return Err(parser_error_at_current!(self, "'continue' statement outside of loop")); + } + + // Optional label + let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { + if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { + self.advance(); - LoopStatement::ForIn { - left: ForInOfLeft::Declaration(decl), - right, - body + // Verify label exists + let label_name = name.into_boxed_str(); + if !self.state.labels.contains(&label_name) { + return Err(parser_error_at_current!(self, "Undefined label '{}'", label_name)); } - } else if self.check(&TokenType::Of) { - // for-of loop with variable declaration - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); - LoopStatement::ForOf { - left: ForInOfLeft::Declaration(decl), - right, - body, - is_await - } + Some(label_name) } else { - // Standard for loop with variable declaration - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - + None + } + } else { + None + }; + + self.consume(&TokenType::Semicolon, "Expected ';' after continue statement")?; + + Ok(Statement::Continue(label)) + } + + /// Parse return statement: return [expression]; + fn parse_return(&mut self) -> ParseResult { + let token = self.advance().cloned().unwrap(); // consume 'return' + + // Check if we're in a function using context method + if !self.is_in_function() { + return Err(parser_error_at_current!(self, "'return' statement outside of function")); + } + + // Return with no value if semicolon or end of block + let argument = (!self.check(&TokenType::Semicolon) && + !self.check(&TokenType::RightBrace) && + !self.is_at_end() && + !self.previous_line_terminator()) + .then(|| self.parse_expression()) + .transpose()?; + + self.consume(&TokenType::Semicolon, "Expected ';' after return statement")?; + + Ok(Statement::Return(argument)) + } + + /// Parse for statement: for ([init]; [test]; [update]) statement + fn parse_for(&mut self) -> ParseResult { + self.advance(); // consume 'for' + + // Check for for-await-of + let is_await = self.match_token(&TokenType::Await); + + self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?; + + // Parse initialization with LoopParameters context + let result = self.with_context(LexicalContext::LoopParameters, |parser| { + // ... existing for loop parsing code ... + if parser.match_token(&TokenType::Semicolon) { + // No initialization - standard for loop with empty init // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) + let test = (!parser.check(&TokenType::Semicolon)) + .then(|| parser.parse_expression()) .transpose()?; - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; + parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; // Parse update - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) + let update = (!parser.check(&TokenType::RightParen)) + .then(|| parser.parse_expression()) .transpose()?; - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; + parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - // Parse body - let body = Box::new(self.parse_statement()?); + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; - LoopStatement::For { - init: Some(ForInit::Variable(decl)), + Ok(LoopStatement::For { + init: None, test, update, body - } - } - } else if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - // For identifiers, we need to check if they're followed by 'in', 'of', or other tokens - // that would indicate different types of for loops - - // First, check if the next tokens form a for-in or for-of loop - // Save current position to backtrack if needed - let start_pos = self.current; - - // Parse the identifier - let token = self.advance().unwrap().clone(); - let name = self.identifier_name(&token)?; - let left = Expression::Identifier(name); - - // Check what follows the identifier - if self.check(&TokenType::In) { - // for-in loop with identifier - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForIn { - left: ForInOfLeft::Pattern(left), - right, - body - } - } else if self.check(&TokenType::Of) { - // for-of loop with identifier - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); - - LoopStatement::ForOf { - left: ForInOfLeft::Pattern(left), - right, - body, - is_await - } - } else { - // Not a for-in or for-of loop, so it must be a standard for loop - // Reset position and parse the full initialization expression - self.current = start_pos; - - // Parse the initialization expression - let init_expr = self.parse_expression()?; + }) + } else if parser.check(&TokenType::Var) || parser.check(&TokenType::Let) || parser.check(&TokenType::Const) { + // Variable declaration initialization + let decl = parser.parse_variable_declaration()?; - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; + // Check for for-in or for-of + if parser.check(&TokenType::In) { + // for-in loop with variable declaration + parser.advance(); // consume 'in' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body + Ok(LoopStatement::ForIn { + left: ForInOfLeft::Declaration(decl), + right, + body + }) + } else if parser.check(&TokenType::Of) { + // for-of loop with variable declaration + parser.advance(); // consume 'of' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::ForOf { + left: ForInOfLeft::Declaration(decl), + right, + body, + is_await + }) + } else { + // Standard for loop with variable declaration + parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; + + // Parse condition + let test = (!parser.check(&TokenType::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; + + // Parse update + let update = (!parser.check(&TokenType::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: Some(ForInit::Variable(decl)), + test, + update, + body + }) } - } - } else { - // For other expressions (including array/object literals and complex expressions) - // Parse the full initialization expression - let init_expr = self.parse_expression()?; - - // Check if this is a for-in or for-of loop - if self.check(&TokenType::In) { - // for-in loop with expression - self.advance(); // consume 'in' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - let body = Box::new(self.parse_statement()?); + } else if let Some(TokenType::Identifier(_)) = parser.peek_token_type() { + // ... existing identifier handling code ... + // First, check if the next tokens form a for-in or for-of loop + // Save current position to backtrack if needed + let start_pos = parser.current; - LoopStatement::ForIn { - left: ForInOfLeft::Pattern(init_expr), - right, - body - } - } else if self.check(&TokenType::Of) { - // for-of loop with expression - self.advance(); // consume 'of' - let right = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - let body = Box::new(self.parse_statement()?); + // Parse the identifier + let token = parser.advance().unwrap().clone(); + let name = parser.expect_identifier("Expected label name")?; + let left = Expression::Identifier(name); - LoopStatement::ForOf { - left: ForInOfLeft::Pattern(init_expr), - right, - body, - is_await + // Check what follows the identifier + if parser.check(&TokenType::In) { + // for-in loop with identifier + parser.advance(); // consume 'in' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::ForIn { + left: ForInOfLeft::Pattern(left), + right, + body + }) + } else if parser.check(&TokenType::Of) { + // for-of loop with identifier + parser.advance(); // consume 'of' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::ForOf { + left: ForInOfLeft::Pattern(left), + right, + body, + is_await + }) + } else { + // Not a for-in or for-of loop, so it must be a standard for loop + // Reset position and parse the full initialization expression + parser.current = start_pos; + + // Parse the initialization expression + let init_expr = parser.parse_expression()?; + + parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; + + // Parse condition + let test = (!parser.check(&TokenType::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; + + // Parse update (which might be empty) + let update = (!parser.check(&TokenType::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: Some(ForInit::Expression(init_expr)), + test, + update, + body + }) } } else { - // Standard for loop with expression initialization - self.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; + // For other expressions (including array/object literals and complex expressions) + // Parse the full initialization expression + let init_expr = parser.parse_expression()?; - // Parse condition - let test = (!self.check(&TokenType::Semicolon)) - .then(|| self.parse_expression()) - .transpose()?; + // Check if this is a for-in or for-of loop + if parser.check(&TokenType::In) { + // for-in loop with expression + parser.advance(); // consume 'in' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!self.check(&TokenType::RightParen)) - .then(|| self.parse_expression()) - .transpose()?; + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; - self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body - let body = Box::new(self.parse_statement()?); - - LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body + Ok(LoopStatement::ForIn { + left: ForInOfLeft::Pattern(init_expr), + right, + body + }) + } else if parser.check(&TokenType::Of) { + // for-of loop with expression + parser.advance(); // consume 'of' + let right = parser.parse_expression()?; + parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::ForOf { + left: ForInOfLeft::Pattern(init_expr), + right, + body, + is_await + }) + } else { + // Standard for loop with expression initialization + parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; + + // Parse condition + let test = (!parser.check(&TokenType::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; + + // Parse update (which might be empty) + let update = (!parser.check(&TokenType::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; + + // Parse body with LoopBody context + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: Some(ForInit::Expression(init_expr)), + test, + update, + body + }) + } + } + })?; + + Ok(Statement::Loop(result)) + } + + + /// Parse a variable declarator: pattern = initializer + pub fn parse_variable_declarator(&mut self) -> ParseResult { + // Get the current token position for error reporting + let start_token = self.peek_token().cloned(); + + // Parse the binding pattern (identifier, object pattern, or array pattern) + let id = self.parse_pattern()?; + + // Check if this is a const declaration without an initializer + let is_const = if let Some(prev_token) = self.tokens.get(self.current - 2) { + matches!(prev_token.token_type, TokenType::Const) + } else { + false + }; + + // Parse optional initializer + let init = if self.match_token(&TokenType::Equal) { + // Parse the initializer expression + Some(self.parse_expression()?) + } else { + // Const declarations must have initializers + if is_const { + if let Some(token) = start_token { + return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); + } else { + return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); } } + None }; - // Restore previous loop state - self.state.in_loop = prev_in_loop; + // Validate the pattern based on the current context + self.validate_binding_pattern(&id, is_const)?; - Ok(Statement::Loop(result)) + Ok(VariableDeclarator { id, init }) + } + + /// Helper method to validate a binding pattern + fn validate_binding_pattern(&self, pattern: &Expression, is_const: bool) -> ParseResult<()> { + match pattern { + // For simple identifiers, check for strict mode restrictions + Expression::Identifier(name) => { + // Check for reserved words in strict mode + if self.state.in_strict_mode { + let reserved_words = ["eval", "arguments"]; + if reserved_words.contains(&name.as_ref()) { + return Err(parser_error_at_current!(self, "'{}' cannot be used as a variable name in strict mode", name)); + } + } + + // Check for other JavaScript reserved words that can't be variable names + let always_reserved = ["let", "yield", "await", "static", "implements", + "interface", "package", "private", "protected", "public"]; + if always_reserved.contains(&name.as_ref()) { + return Err(parser_error_at_current!(self, "'{}' is a reserved word and cannot be used as a variable name", name)); + } + }, + // For object patterns, recursively validate each property + Expression::Object(properties) => { + for property in properties { + match property { + ObjectProperty::Property { key, value, .. } => { + // Validate the value part of the property + self.validate_binding_pattern(value, is_const)?; + }, + ObjectProperty::Spread(expr) => { + // For spread elements, validate the spread target + if let Expression::Identifier(name) = expr { + self.validate_binding_pattern(&Expression::Identifier(name.clone()), is_const)?; + } else if let Expression::Object(_) | Expression::Array(_) = expr { + self.validate_binding_pattern(expr, is_const)?; + } else { + return Err(parser_error_at_current!(self, "Invalid rest element in object pattern")); + } + }, + _ => { + // Methods are not allowed in binding patterns + return Err(parser_error_at_current!(self, "Method definitions are not allowed in object patterns")); + } + } + } + }, + // For array patterns, recursively validate each element + Expression::Array(elements) => { + for element in elements { + match element { + ArrayElement::Expression(expr) => { + self.validate_binding_pattern(expr, is_const)?; + }, + ArrayElement::Spread(expr) => { + // For spread elements, validate the spread target + if let Expression::Identifier(name) = expr { + self.validate_binding_pattern(&Expression::Identifier(name.clone()), is_const)?; + } else if let Expression::Object(_) | Expression::Array(_) = expr { + self.validate_binding_pattern(expr, is_const)?; + } else { + return Err(parser_error_at_current!(self, "Invalid rest element in array pattern")); + } + }, + ArrayElement::Hole => { + // Holes are allowed in array patterns + } + } + } + }, + // Handle assignment patterns (default values) + Expression::Assignment { left, .. } => { + self.validate_binding_pattern(left, is_const)?; + }, + // Handle spread elements + Expression::Spread(inner) => { + self.validate_binding_pattern(inner, is_const)?; + }, + // Other expression types are not valid binding patterns + _ => { + return Err(parser_error_at_current!(self, "Invalid binding pattern in variable declaration")); + } } + + Ok(()) +} + } From d375f4ac9d1e3e4139ffcb0278a4d3f3310aa0dc Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Tue, 6 May 2025 23:08:43 +0200 Subject: [PATCH 5/7] improved parsing of loops, optimised ast enums, no token_type just token --- examples/tricky/index.js | 53 ++-- src/ast.rs | 47 +-- src/lexer/context.rs | 59 ++-- src/lexer/interner.rs | 25 -- src/lexer/lexer.rs | 353 ++++++++++----------- src/lexer/mod.rs | 3 +- src/lexer/token.rs | 195 ++++++------ src/parser/asi.rs | 153 ++++----- src/parser/classes.rs | 49 ++- src/parser/core.rs | 203 +++++------- src/parser/error.rs | 301 +++++++++++------- src/parser/expressions.rs | 541 +++++++++++++------------------- src/parser/functions.rs | 56 ++-- src/parser/modules.rs | 104 +++--- src/parser/patterns.rs | 49 +-- src/parser/prelude.rs | 7 +- src/parser/statements.rs | 644 ++++++++++++++++++-------------------- 17 files changed, 1285 insertions(+), 1557 deletions(-) delete mode 100644 src/lexer/interner.rs diff --git a/examples/tricky/index.js b/examples/tricky/index.js index c387cb9..bbb9a24 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -1,24 +1,31 @@ -//OK -(x=>3); -(x=>({})); -((x)=>3); -((x)=>({})); -(x)=>{}; -(x,y,z)=>({}); -(x,y) => 3; -x=>3; -[x=>3]; -x=>({}); -(x=>3)(3); -x=>{}; -async function* foo(a, b, ...c) {} -function* foo(a) {} -(function foo() {}()) -((x)=>3)(3); -(x,y,z,a,b,c,d)=>3; -(function() {}) -({ x: y => 3 }); +////OK +//(x=>3); +//(x=>({})); +//((x)=>3); +//((x)=>({})); +//(x)=>{}; +//(x,y,z)=>({}); +//(x,y) => 3; +//x=>3; +//[x=>3]; +//x=>({}); +//(x=>3)(3); +//x=>{}; +//async function* foo(a, b, ...c) {} +//function* foo(a) {} +//(function foo() {}()) +//((x)=>3)(3); +//(x,y,z,a,b,c,d)=>3; +//(function() {}) +//({ x: y => 3 }); +// +//// ERROR +////function foo(a=1) {} +//{ x: y => 3 }; -// ERROR -//function foo(a=1) {} -{ x: y => 3 }; + +for (let i=0; i, }, ForIn { - left: ForInOfLeft, + left: ForInit, right: Expression, body: Box, }, ForOf { - left: ForInOfLeft, + left: ForInit, right: Expression, body: Box, is_await: bool, }, } -#[derive(Debug, Clone)] -pub enum ForInOfLeft { - Declaration(VariableDeclaration), - Pattern(Expression), -} - #[derive(Debug, Clone)] pub enum Declaration { Variable(VariableDeclaration), @@ -186,8 +180,8 @@ pub enum ExportDefaultDeclaration { #[derive(Debug, Clone)] pub enum ForInit { - Variable(VariableDeclaration), - Expression(Expression), + Declaration(VariableDeclaration), + Pattern(Expression), } #[derive(Debug, Clone, PartialEq)] @@ -203,33 +197,6 @@ pub struct VariableDeclarator { pub init: Option, } -// -//// TODO delete -//#[derive(Debug, Clone)] -//pub enum Pattern { - //Identifier(Box), - //ObjectPattern(Vec), - //ArrayPattern(Vec>), - //RestElement(Box), - //AssignmentPattern { - //left: Box, - //right: Expression, - //}, -//} - -/* -#[derive(Debug, Clone)] -pub enum ObjectPatternProperty { - Property { - key: PropertyKey, - value: Expression, - computed: bool, - shorthand: bool, - }, - Spread(Box), - //Rest(Box), -}*/ - #[derive(Debug, Clone)] pub struct CatchClause { pub param: Option, @@ -320,10 +287,7 @@ pub enum Expression { arguments: Vec, optional: bool, }, - New { - callee: Box, - arguments: Vec, - }, + New(Box), Conditional { test: Box, consequent: Box, @@ -479,6 +443,7 @@ pub enum BinaryOperator { RightShift, UnsignedRightShift, In, + Of, InstanceOf, } diff --git a/src/lexer/context.rs b/src/lexer/context.rs index 6e81e9a..d55fe0a 100644 --- a/src/lexer/context.rs +++ b/src/lexer/context.rs @@ -1,5 +1,5 @@ use std::fmt; -use crate::lexer::TokenType; +use crate::lexer::Token; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LexicalContext { @@ -18,7 +18,7 @@ pub enum LexicalContext { impl fmt::Display for LexicalContext { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Default => write!(f, "default"), + Self::Default => write!(f, "global"), Self::PropertyKey => write!(f, "property key"), Self::MemberAccess => write!(f, "member access"), Self::ImportExport => write!(f, "import export"), @@ -29,7 +29,7 @@ impl fmt::Display for LexicalContext { Self::FunctionBody { allow_yield: true, allow_await: false } => write!(f, "generator function body"), Self::FunctionBody { allow_yield: false, allow_await: true } => write!(f, "async function body"), Self::FunctionBody { allow_yield: true, allow_await: true } => write!(f, "async generator function body"), - Self::LoopParameters => write!(f, "loop test"), + Self::LoopParameters => write!(f, "loop init"), Self::LoopBody => write!(f, "loop body"), Self::SwitchBody => write!(f, "switch body"), } @@ -40,49 +40,28 @@ impl LexicalContext { // Fast check if this context allows any keywords as identifiers pub fn has_keywords_as_identifiers(&self) -> bool { - // Return true if this context might allow any keywords as identifiers - // This is a quick filter to avoid processing tokens unnecessarily match self { - LexicalContext::Default => false, // Default context doesn't allow keywords as identifiers - // Add other cases based on your implementation + LexicalContext::Default => false, _ => true, } } - - /* - // Check if a specific token type can be used as an identifier in this context - fn allows_token_as_identifier(&self, token_type: &TokenType) -> bool { - // First check if the token is a keyword at all - match token_type { - // Match specific keywords that might be allowed as identifiers - TokenType::Await => self.allows_keyword_as_identifier("await"), - TokenType::Yield => self.allows_keyword_as_identifier("yield"), - // Add other keywords that might be allowed as identifiers - TokenType::Let | - TokenType::Static | - TokenType::Implements | - TokenType::Interface | - TokenType::Package | - TokenType::Private | - TokenType::Protected | - TokenType::Public => self.allows_keyword_as_identifier(token_type.keyword_text().unwrap()), - - // Non-keywords or other token types don't need conversion - _ => false, - } - } -*/ - pub fn allows_token_as_identifier(&self, token_type: &TokenType) -> bool { + pub fn allows_token_as_identifier(&self, token: &Token) -> bool { match self { // In property contexts, all keywords can be identifiers except a few special ones Self::MemberAccess => { //let result = matches!(keyword, "default"); - //println!("Checking in MemberAccess with {:#?}", keyword); + println!("Checking in MemberAccess with {:#?}", token); - if token_type == &TokenType::Default { + if token == &Token::Default { + true + } else if token == &Token::From { + true + } else if token == &Token::For { + true + } else if token == &Token::Get { true } else { false @@ -118,15 +97,21 @@ impl LexicalContext { } }, Self::LoopParameters => { - //println!("Currently in LoopParameters with {:#?}", keyword); - false + //println!("Currently in LoopParameters with {:#?}", token); + if token == &Token::Set { + true + } else { + false + } }, // In function bodies, yield and await have special handling Self::FunctionBody { allow_yield, allow_await } => { //println!("Currently in FunctionBody with {:#?}", keyword); - if (*allow_yield && token_type == &TokenType::Yield) || (*allow_await && token_type == &TokenType::Await) { + if (*allow_yield && token == &Token::Yield) || (*allow_await && token == &Token::Await) { false + } else if token == &Token::As { + true } else { // Default to not allowing keywords as identifiers in function bodies false diff --git a/src/lexer/interner.rs b/src/lexer/interner.rs deleted file mode 100644 index c37de4b..0000000 --- a/src/lexer/interner.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::collections::HashMap; -use std::rc::Rc; - -pub struct StringInterner { - strings: HashMap>, -} - -impl StringInterner { - pub fn new() -> Self { - StringInterner { - strings: HashMap::new(), - } - } - - pub fn intern(&mut self, s: &str) -> Rc { - if let Some(interned) = self.strings.get(s) { - interned.clone() - } else { - // Fix: Use explicit type annotation - let rc: Rc = s.into(); - self.strings.insert(s.to_string(), rc.clone()); - rc - } - } -} \ No newline at end of file diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 9cd98cc..24902e6 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,13 +1,11 @@ use std::collections::HashSet; -use crate::lexer::{Token, TokenType, TemplatePart, LexerError}; - -// TODO Specialized Token Handling for Lexer Optimization +use crate::lexer::{Token, TemplatePart, LexerError}; pub struct Lexer<'a> { source: &'a str, - bytes: &'a [u8], // Direct access to the underlying bytes + bytes: &'a [u8], source_len: usize, - tokens: Vec, + tokens: Vec<(Token, (usize, usize))>, start: usize, current: usize, line: usize, @@ -18,10 +16,7 @@ pub struct Lexer<'a> { macro_rules! add_token { ($self:expr, $token_type:expr) => { - $self.tokens.push(Token::new($token_type, $self.line, $self.column - 1, 1)) - }; - ($self:expr, $token_type:expr, $length:expr) => { - $self.tokens.push(Token::new($token_type, $self.line, $self.column - $length, $length)) + $self.tokens.push(($token_type, ($self.line, $self.column))) }; } @@ -42,14 +37,12 @@ impl<'a> Lexer<'a> { previous_char: '\0', } } - + #[inline(always)] - fn identifier(&mut self) { - let start_column = self.column - 1; - + fn identifier(&mut self) { // Track whether the identifier is all ASCII let mut is_all_ascii = true; - + // Fast path for identifiers (most common case) while !self.is_at_end() { if self.current < self.source_len { @@ -75,7 +68,7 @@ impl<'a> Lexer<'a> { continue; } } - + // If we reach here, either we're at the end or the next character // is not an identifier character if !self.is_at_end() && self.is_alphanumeric(self.peek()) { @@ -88,10 +81,10 @@ impl<'a> Lexer<'a> { break; } } - + // Calculate the length of the identifier let length = self.current - self.start; - + // Only check for keywords if the identifier is within the length range of keywords // and is all ASCII (since all keywords are ASCII) let token_type = if is_all_ascii && length >= 2 && length <= 10 { @@ -101,87 +94,87 @@ impl<'a> Lexer<'a> { // First check by length for faster matching match bytes.len() { 2 => match bytes { - b"do" => TokenType::Do, - b"if" => TokenType::If, - b"in" => TokenType::In, - b"of" => TokenType::Of, - b"as" => TokenType::As, + b"do" => Token::Do, + b"if" => Token::If, + b"in" => Token::In, + b"of" => Token::Of, + b"as" => Token::As, _ => self.create_identifier_token(), }, 3 => match bytes { - b"for" => TokenType::For, - b"let" => TokenType::Let, - b"new" => TokenType::New, - b"try" => TokenType::Try, - b"var" => TokenType::Var, - b"get" => TokenType::Get, - b"set" => TokenType::Set, + b"for" => Token::For, + b"let" => Token::Let, + b"new" => Token::New, + b"try" => Token::Try, + b"var" => Token::Var, + b"get" => Token::Get, + b"set" => Token::Set, _ => self.create_identifier_token(), }, 4 => match bytes { - b"case" => TokenType::Case, - b"else" => TokenType::Else, - b"enum" => TokenType::Enum, - b"from" => TokenType::From, - b"null" => TokenType::Null, - b"this" => TokenType::This, - b"true" => TokenType::True, - b"void" => TokenType::Void, - b"with" => TokenType::With, - b"eval" => TokenType::Eval, + b"case" => Token::Case, + b"else" => Token::Else, + b"enum" => Token::Enum, + b"from" => Token::From, + b"null" => Token::Null, + b"this" => Token::This, + b"true" => Token::True, + b"void" => Token::Void, + b"with" => Token::With, + b"eval" => Token::Eval, _ => self.create_identifier_token(), }, 5 => match bytes { - b"async" => TokenType::Async, - b"await" => TokenType::Await, - b"break" => TokenType::Break, - b"catch" => TokenType::Catch, - b"class" => TokenType::Class, - b"const" => TokenType::Const, - b"false" => TokenType::False, - b"super" => TokenType::Super, - b"throw" => TokenType::Throw, - b"while" => TokenType::While, - b"yield" => TokenType::Yield, + b"async" => Token::Async, + b"await" => Token::Await, + b"break" => Token::Break, + b"catch" => Token::Catch, + b"class" => Token::Class, + b"const" => Token::Const, + b"false" => Token::False, + b"super" => Token::Super, + b"throw" => Token::Throw, + b"while" => Token::While, + b"yield" => Token::Yield, _ => self.create_identifier_token(), }, 6 => match bytes { - b"delete" => TokenType::Delete, - b"export" => TokenType::Export, - b"import" => TokenType::Import, - b"public" => TokenType::Public, - b"return" => TokenType::Return, - b"static" => TokenType::Static, - b"switch" => TokenType::Switch, - b"target" => TokenType::Target, - b"typeof" => TokenType::Typeof, + b"delete" => Token::Delete, + b"export" => Token::Export, + b"import" => Token::Import, + b"public" => Token::Public, + b"return" => Token::Return, + b"static" => Token::Static, + b"switch" => Token::Switch, + b"target" => Token::Target, + b"typeof" => Token::Typeof, _ => self.create_identifier_token(), }, 7 => match bytes { - b"default" => TokenType::Default, - b"extends" => TokenType::Extends, - b"finally" => TokenType::Finally, - b"package" => TokenType::Package, - b"private" => TokenType::Private, + b"default" => Token::Default, + b"extends" => Token::Extends, + b"finally" => Token::Finally, + b"package" => Token::Package, + b"private" => Token::Private, _ => self.create_identifier_token(), }, 8 => match bytes { - b"continue" => TokenType::Continue, - b"debugger" => TokenType::Debugger, - b"function" => TokenType::Function, + b"continue" => Token::Continue, + b"debugger" => Token::Debugger, + b"function" => Token::Function, _ => self.create_identifier_token(), }, 9 => match bytes { - b"arguments" => TokenType::Arguments, - b"interface" => TokenType::Interface, - b"protected" => TokenType::Protected, - b"undefined" => TokenType::Undefined, + b"arguments" => Token::Arguments, + b"interface" => Token::Interface, + b"protected" => Token::Protected, + b"undefined" => Token::Undefined, _ => self.create_identifier_token(), }, 10 => match bytes { - b"instanceof" => TokenType::InstanceOf, - b"implements" => TokenType::Implements, - b"constructor" => TokenType::Constructor, + b"instanceof" => Token::InstanceOf, + b"implements" => Token::Implements, + b"constructor" => Token::Constructor, _ => self.create_identifier_token(), }, _ => self.create_identifier_token(), @@ -192,23 +185,23 @@ impl<'a> Lexer<'a> { }; // Add the token - add_token!(self, token_type, length as usize); + add_token!(self, token_type); } // Helper method to create an identifier token #[inline] - fn create_identifier_token(&self) -> TokenType { + fn create_identifier_token(&self) -> Token { let text = &self.source[self.start..self.current]; - TokenType::Identifier(text.to_string()) + Token::Identifier(text.to_string()) } - pub fn scan_tokens(&mut self) -> Result, LexerError> { + pub fn scan_tokens(&mut self) -> Result, LexerError> { while !self.is_at_end() { self.start = self.current; self.scan_token()?; } let _eof_column = self.column; - add_token!(self, TokenType::EOF, 0); + add_token!(self, Token::EOF); Ok(std::mem::take(&mut self.tokens)) } @@ -216,168 +209,168 @@ impl<'a> Lexer<'a> { let c = self.advance(); match c { - '(' => add_token!(self, TokenType::LeftParen), - ')' => add_token!(self, TokenType::RightParen), - '{' => add_token!(self, TokenType::LeftBrace), - '}' => add_token!(self, TokenType::RightBrace), - '[' => add_token!(self, TokenType::LeftBracket), - ']' => add_token!(self, TokenType::RightBracket), - ',' => add_token!(self, TokenType::Comma), - ';' => add_token!(self, TokenType::Semicolon), - ':' => add_token!(self, TokenType::Colon), - '#' => add_token!(self, TokenType::Hash), + '(' => add_token!(self, Token::LeftParen), + ')' => add_token!(self, Token::RightParen), + '{' => add_token!(self, Token::LeftBrace), + '}' => add_token!(self, Token::RightBrace), + '[' => add_token!(self, Token::LeftBracket), + ']' => add_token!(self, Token::RightBracket), + ',' => add_token!(self, Token::Comma), + ';' => add_token!(self, Token::Semicolon), + ':' => add_token!(self, Token::Colon), + '#' => add_token!(self, Token::Hash), '.' => { if self.match_char('.') && self.match_char('.') { - add_token!(self, TokenType::Ellipsis, 3); + add_token!(self, Token::Ellipsis); } else { - add_token!(self, TokenType::Dot); + add_token!(self, Token::Dot); } }, '+' => { if self.match_char('+') { - add_token!(self, TokenType::PlusPlus, 2); + add_token!(self, Token::PlusPlus); } else if self.match_char('=') { - add_token!(self, TokenType::PlusEqual, 2); + add_token!(self, Token::PlusEqual); } else { - add_token!(self, TokenType::Plus); + add_token!(self, Token::Plus); } }, '-' => { if self.match_char('-') { - add_token!(self, TokenType::MinusMinus, 2); + add_token!(self, Token::MinusMinus); } else if self.match_char('=') { - add_token!(self, TokenType::MinusEqual, 2); + add_token!(self, Token::MinusEqual); } else { - add_token!(self, TokenType::Minus); + add_token!(self, Token::Minus); } }, '%' => { if self.match_char('=') { - add_token!(self, TokenType::PercentEqual, 2); + add_token!(self, Token::PercentEqual); } else { - add_token!(self, TokenType::Percent); + add_token!(self, Token::Percent); } }, '^' => { if self.match_char('=') { - add_token!(self, TokenType::CaretEqual, 2); + add_token!(self, Token::CaretEqual); } else { - add_token!(self, TokenType::Caret); + add_token!(self, Token::Caret); } }, '*' => { if self.match_char('*') { if self.match_char('=') { - add_token!(self, TokenType::StarStarEqual, 3); + add_token!(self, Token::StarStarEqual); } else { - add_token!(self, TokenType::StarStar, 2); + add_token!(self, Token::StarStar); } } else if self.match_char('=') { - add_token!(self, TokenType::StarEqual, 2); + add_token!(self, Token::StarEqual); } else { - add_token!(self, TokenType::Star); + add_token!(self, Token::Star); } }, '/' => self.handle_slash()?, '!' => { if self.match_char('=') { if self.match_char('=') { - add_token!(self, TokenType::BangEqualEqual, 3); + add_token!(self, Token::BangEqualEqual); } else { - add_token!(self, TokenType::BangEqual, 2); + add_token!(self, Token::BangEqual); } } else { - add_token!(self, TokenType::Bang); + add_token!(self, Token::Bang); } }, '=' => { if self.match_char('>') { - add_token!(self, TokenType::Arrow, 2); + add_token!(self, Token::Arrow); } else if self.match_char('=') { if self.match_char('=') { - add_token!(self, TokenType::EqualEqualEqual, 3); + add_token!(self, Token::EqualEqualEqual); } else { - add_token!(self, TokenType::EqualEqual, 2); + add_token!(self, Token::EqualEqual); } } else { - add_token!(self, TokenType::Equal); + add_token!(self, Token::Equal); } }, '<' => { if self.match_char('=') { - add_token!(self, TokenType::LessEqual, 2); + add_token!(self, Token::LessEqual); } else if self.match_char('<') { if self.match_char('=') { - add_token!(self, TokenType::LessLessEqual, 3); + add_token!(self, Token::LessLessEqual); } else { - add_token!(self, TokenType::LessLess, 2); + add_token!(self, Token::LessLess); } } else { - add_token!(self, TokenType::Less); + add_token!(self, Token::Less); } }, '>' => { if self.match_char('=') { - add_token!(self, TokenType::GreaterEqual, 2); + add_token!(self, Token::GreaterEqual); } else if self.match_char('>') { if self.match_char('>') { if self.match_char('=') { - add_token!(self, TokenType::GreaterGreaterGreaterEqual, 4); + add_token!(self, Token::GreaterGreaterGreaterEqual); } else { - add_token!(self, TokenType::GreaterGreaterGreater, 3); + add_token!(self, Token::GreaterGreaterGreater); } } else if self.match_char('=') { - add_token!(self, TokenType::GreaterGreaterEqual, 3); + add_token!(self, Token::GreaterGreaterEqual); } else { - add_token!(self, TokenType::GreaterGreater, 2); + add_token!(self, Token::GreaterGreater); } } else { - add_token!(self, TokenType::Greater); + add_token!(self, Token::Greater); } }, '&' => { if self.match_char('&') { if self.match_char('=') { - add_token!(self, TokenType::AmpersandAmpersandEqual, 3); + add_token!(self, Token::AmpersandAmpersandEqual); } else { - add_token!(self, TokenType::AmpersandAmpersand, 2); + add_token!(self, Token::AmpersandAmpersand); } } else if self.match_char('=') { - add_token!(self, TokenType::AmpersandEqual, 2); + add_token!(self, Token::AmpersandEqual); } else { - add_token!(self, TokenType::Ampersand); + add_token!(self, Token::Ampersand); } }, '|' => { if self.match_char('|') { if self.match_char('=') { - add_token!(self, TokenType::PipePipeEqual, 3); + add_token!(self, Token::PipePipeEqual); } else { - add_token!(self, TokenType::PipePipe, 2); + add_token!(self, Token::PipePipe); } } else if self.match_char('=') { - add_token!(self, TokenType::PipeEqual, 2); + add_token!(self, Token::PipeEqual); } else { - add_token!(self, TokenType::Pipe); + add_token!(self, Token::Pipe); } }, - '~' => add_token!(self, TokenType::Tilde), + '~' => add_token!(self, Token::Tilde), '?' => { if self.match_char('?') { if self.match_char('=') { - add_token!(self, TokenType::QuestionQuestionEqual, 3); + add_token!(self, Token::QuestionQuestionEqual); } else { - add_token!(self, TokenType::QuestionQuestion, 2); + add_token!(self, Token::QuestionQuestion); } } else if self.match_char('.') { - add_token!(self, TokenType::QuestionDot, 2); + add_token!(self, Token::QuestionDot); } else { - add_token!(self, TokenType::Question); + add_token!(self, Token::Question); } }, @@ -451,11 +444,11 @@ impl<'a> Lexer<'a> { } else if self.match_char('*') { self.block_comment()?; } else if self.match_char('=') { - add_token!(self, TokenType::SlashEqual, 2); + add_token!(self, Token::SlashEqual); } else if self.is_regexp_start() { self.regexp()?; } else { - add_token!(self, TokenType::Slash); + add_token!(self, Token::Slash); } Ok(()) } @@ -467,28 +460,28 @@ impl<'a> Lexer<'a> { } // Get the last token type - let last_token = &self.tokens.last().unwrap().token_type; + let (last_token, _) = &self.tokens.last().unwrap(); // A slash starts a regex if it follows a token that cannot be the end of an expression match last_token { // After these tokens, a slash is division (these can end an expression) - TokenType::Identifier(_) | - TokenType::NumberLiteral(_) | - TokenType::StringLiteral(_) | - TokenType::RegExpLiteral(_, _) | - TokenType::TemplateLiteral(_) | - TokenType::True | - TokenType::False | - TokenType::Null | - TokenType::This | - TokenType::RightParen | - TokenType::RightBracket | - TokenType::PlusPlus | - TokenType::MinusMinus => false, + Token::Identifier(_) | + Token::NumberLiteral(_) | + Token::StringLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::TemplateLiteral(_) | + Token::True | + Token::False | + Token::Null | + Token::This | + Token::RightParen | + Token::RightBracket | + Token::PlusPlus | + Token::MinusMinus => false, // Special case: right brace - could be block or object literal - TokenType::RightBrace => { + Token::RightBrace => { // TODO implement properly // This is a complex case that depends on context @@ -568,10 +561,8 @@ impl<'a> Lexer<'a> { )); } } - - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::RegExpLiteral(pattern, flags), length); + add_token!(self, Token::RegExpLiteral(pattern, flags)); Ok(()) } @@ -711,9 +702,7 @@ impl<'a> Lexer<'a> { // Consume the closing backtick self.advance(); - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::TemplateLiteral(parts), length); + add_token!(self, Token::TemplateLiteral(parts)); Ok(()) } @@ -794,10 +783,7 @@ impl<'a> Lexer<'a> { // Consume the closing quote self.advance(); - - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::StringLiteral(value), length); + add_token!(self, Token::StringLiteral(value)); Ok(()) } @@ -1045,10 +1031,8 @@ impl<'a> Lexer<'a> { start_column )); } - - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(value_str), length); + add_token!(self, Token::BigIntLiteral(value_str)); return Ok(()); } @@ -1061,18 +1045,14 @@ impl<'a> Lexer<'a> { !value_str.contains('E') && value_str.len() < 10 { // For small integers, parse directly to avoid floating point conversion if let Ok(int_val) = value_str.parse::() { - let length = (self.current - self.start) as usize; - - add_token!(self, TokenType::NumberLiteral(int_val as f64), length); - + add_token!(self, Token::NumberLiteral(int_val as f64)); return Ok(()); } } match value_str.parse::() { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value), length); + add_token!(self, Token::NumberLiteral(value)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1111,8 +1091,7 @@ impl<'a> Lexer<'a> { // Parse as binary match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0b{}", value_str)), length); + add_token!(self, Token::BigIntLiteral(format!("0b{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1128,8 +1107,7 @@ impl<'a> Lexer<'a> { // Parse as binary and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + add_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1169,8 +1147,7 @@ impl<'a> Lexer<'a> { // Parse as octal match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0o{}", value_str)), length); + add_token!(self, Token::BigIntLiteral(format!("0o{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1186,8 +1163,7 @@ impl<'a> Lexer<'a> { // Parse as octal and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + add_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1227,8 +1203,7 @@ impl<'a> Lexer<'a> { // Parse as hex match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(_) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::BigIntLiteral(format!("0x{}", value_str)), length); + add_token!(self, Token::BigIntLiteral(format!("0x{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1244,8 +1219,7 @@ impl<'a> Lexer<'a> { // Parse as hex and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(value) => { - let length = (self.current - self.start) as usize; - add_token!(self, TokenType::NumberLiteral(value as f64), length); + add_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1300,19 +1274,18 @@ impl<'a> Lexer<'a> { fn is_at_end(&self) -> bool { self.current >= self.source_len } - + #[inline(always)] fn is_octal_digit(&self, c: char) -> bool { c >= '0' && c <= '7' } - #[inline(always)] fn advance(&mut self) -> char { if self.is_at_end() { return '\0'; } - + // Fast path for ASCII (most common case in JS) if self.current < self.source_len && self.bytes[self.current] < 128 { let c = self.bytes[self.current] as char; @@ -1322,7 +1295,7 @@ impl<'a> Lexer<'a> { self.column += 1; return c; } - + // Fallback for non-ASCII (UTF-8) let c = self.source[self.current..].chars().next().unwrap(); self.previous_char = self.current_char; @@ -1348,18 +1321,15 @@ impl<'a> Lexer<'a> { if self.current + 1 >= self.source_len { return '\0'; } - // Fast path for ASCII if self.bytes[self.current] < 128 && self.bytes[self.current + 1] < 128 { return self.bytes[self.current + 1] as char; } - // If current is ASCII but next might not be if self.bytes[self.current] < 128 { let next_pos = self.current + 1; return self.source[next_pos..].chars().next().unwrap_or('\0'); } - // Both current and next are non-ASCII let mut iter = self.source[self.current..].chars(); iter.next(); @@ -1370,7 +1340,7 @@ impl<'a> Lexer<'a> { fn peek_previous(&self) -> char { self.previous_char } - + #[inline(always)] fn match_char(&mut self, expected: char) -> bool { if self.is_at_end() || self.peek() != expected { @@ -1381,4 +1351,3 @@ impl<'a> Lexer<'a> { } } } - diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index ff13c5e..e1bebad 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,10 +1,9 @@ mod error; mod token; mod lexer; -mod interner; mod context; pub use error::LexerError; -pub use token::{Token, TokenType, TemplatePart}; +pub use token::{Token, TemplatePart}; pub use lexer::Lexer; pub use context::LexicalContext; diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 2089b11..cc0675e 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,5 +1,5 @@ #[derive(Debug, Clone, PartialEq)] -pub enum TokenType { +pub enum Token { // Characters LeftParen, RightParen, @@ -128,111 +128,111 @@ pub enum TokenType { EOF, } -impl TokenType { +impl Token { pub fn keyword_text(&self) -> Option<&str> { match self { // Characters/operators don't have keyword text - TokenType::LeftParen | TokenType::RightParen | TokenType::LeftBrace | - TokenType::RightBrace | TokenType::LeftBracket | TokenType::RightBracket | - TokenType::Comma | TokenType::Dot | TokenType::Semicolon | TokenType::Colon | - TokenType::Question | TokenType::Arrow | TokenType::Hash | TokenType::Plus | - TokenType::PlusPlus | TokenType::PlusEqual | TokenType::Minus | - TokenType::MinusMinus | TokenType::MinusEqual | TokenType::Star | - TokenType::StarStar | TokenType::StarEqual | TokenType::StarStarEqual | - TokenType::Slash | TokenType::SlashEqual | TokenType::Percent | - TokenType::PercentEqual | TokenType::Equal | TokenType::EqualEqual | - TokenType::EqualEqualEqual | TokenType::Bang | TokenType::BangEqual | - TokenType::BangEqualEqual | TokenType::Greater | TokenType::GreaterEqual | - TokenType::GreaterGreater | TokenType::GreaterGreaterEqual | - TokenType::GreaterGreaterGreater | TokenType::GreaterGreaterGreaterEqual | - TokenType::Less | TokenType::LessEqual | TokenType::LessLess | - TokenType::LessLessEqual | TokenType::Ampersand | TokenType::AmpersandEqual | - TokenType::AmpersandAmpersand | TokenType::AmpersandAmpersandEqual | - TokenType::Pipe | TokenType::PipeEqual | TokenType::PipePipe | - TokenType::PipePipeEqual | TokenType::Caret | TokenType::CaretEqual | - TokenType::Tilde | TokenType::Ellipsis | TokenType::QuestionQuestion | - TokenType::QuestionQuestionEqual | TokenType::QuestionDot => None, + Token::LeftParen | Token::RightParen | Token::LeftBrace | + Token::RightBrace | Token::LeftBracket | Token::RightBracket | + Token::Comma | Token::Dot | Token::Semicolon | Token::Colon | + Token::Question | Token::Arrow | Token::Hash | Token::Plus | + Token::PlusPlus | Token::PlusEqual | Token::Minus | + Token::MinusMinus | Token::MinusEqual | Token::Star | + Token::StarStar | Token::StarEqual | Token::StarStarEqual | + Token::Slash | Token::SlashEqual | Token::Percent | + Token::PercentEqual | Token::Equal | Token::EqualEqual | + Token::EqualEqualEqual | Token::Bang | Token::BangEqual | + Token::BangEqualEqual | Token::Greater | Token::GreaterEqual | + Token::GreaterGreater | Token::GreaterGreaterEqual | + Token::GreaterGreaterGreater | Token::GreaterGreaterGreaterEqual | + Token::Less | Token::LessEqual | Token::LessLess | + Token::LessLessEqual | Token::Ampersand | Token::AmpersandEqual | + Token::AmpersandAmpersand | Token::AmpersandAmpersandEqual | + Token::Pipe | Token::PipeEqual | Token::PipePipe | + Token::PipePipeEqual | Token::Caret | Token::CaretEqual | + Token::Tilde | Token::Ellipsis | Token::QuestionQuestion | + Token::QuestionQuestionEqual | Token::QuestionDot => None, // Literals don't have keyword text - TokenType::Identifier(_) | TokenType::StringLiteral(_) | - TokenType::TemplateLiteral(_) | TokenType::NumberLiteral(_) | - TokenType::BigIntLiteral(_) | TokenType::RegExpLiteral(_, _) => None, + Token::Identifier(_) | Token::StringLiteral(_) | + Token::TemplateLiteral(_) | Token::NumberLiteral(_) | + Token::BigIntLiteral(_) | Token::RegExpLiteral(_, _) => None, // Boolean literals and null - TokenType::True => Some("true"), - TokenType::False => Some("false"), - TokenType::Null => Some("null"), - TokenType::Undefined => Some("undefined"), + Token::True => Some("true"), + Token::False => Some("false"), + Token::Null => Some("null"), + Token::Undefined => Some("undefined"), // Keywords - TokenType::Var => Some("var"), - TokenType::Let => Some("let"), - TokenType::With => Some("with"), - TokenType::Const => Some("const"), - TokenType::Function => Some("function"), - TokenType::Return => Some("return"), - TokenType::If => Some("if"), - TokenType::Else => Some("else"), - TokenType::While => Some("while"), - TokenType::For => Some("for"), - TokenType::Break => Some("break"), - TokenType::Continue => Some("continue"), - TokenType::This => Some("this"), - TokenType::Super => Some("super"), - TokenType::New => Some("new"), - TokenType::Delete => Some("delete"), - TokenType::Typeof => Some("typeof"), - TokenType::Void => Some("void"), - TokenType::In => Some("in"), - TokenType::InstanceOf => Some("instanceof"), - TokenType::Try => Some("try"), - TokenType::Catch => Some("catch"), - TokenType::Finally => Some("finally"), - TokenType::Throw => Some("throw"), - TokenType::Switch => Some("switch"), - TokenType::Case => Some("case"), - TokenType::Default => Some("default"), - TokenType::Await => Some("await"), - TokenType::Async => Some("async"), - TokenType::Do => Some("do"), - TokenType::Enum => Some("enum"), - TokenType::Of => Some("of"), - TokenType::Target => Some("target"), - TokenType::Implements => Some("implements"), - TokenType::Interface => Some("interface"), - TokenType::Package => Some("package"), - TokenType::Private => Some("private"), - TokenType::Protected => Some("protected"), - TokenType::Public => Some("public"), - TokenType::Arguments => Some("arguments"), - TokenType::Eval => Some("eval"), - TokenType::Debugger => Some("debugger"), - TokenType::Class => Some("class"), - TokenType::Extends => Some("extends"), - TokenType::Constructor => Some("constructor"), - TokenType::Static => Some("static"), - TokenType::Get => Some("get"), - TokenType::Set => Some("set"), - TokenType::Yield => Some("yield"), - TokenType::Import => Some("import"), - TokenType::Export => Some("export"), - TokenType::From => Some("from"), - TokenType::As => Some("as"), + Token::Var => Some("var"), + Token::Let => Some("let"), + Token::With => Some("with"), + Token::Const => Some("const"), + Token::Function => Some("function"), + Token::Return => Some("return"), + Token::If => Some("if"), + Token::Else => Some("else"), + Token::While => Some("while"), + Token::For => Some("for"), + Token::Break => Some("break"), + Token::Continue => Some("continue"), + Token::This => Some("this"), + Token::Super => Some("super"), + Token::New => Some("new"), + Token::Delete => Some("delete"), + Token::Typeof => Some("typeof"), + Token::Void => Some("void"), + Token::In => Some("in"), + Token::InstanceOf => Some("instanceof"), + Token::Try => Some("try"), + Token::Catch => Some("catch"), + Token::Finally => Some("finally"), + Token::Throw => Some("throw"), + Token::Switch => Some("switch"), + Token::Case => Some("case"), + Token::Default => Some("default"), + Token::Await => Some("await"), + Token::Async => Some("async"), + Token::Do => Some("do"), + Token::Enum => Some("enum"), + Token::Of => Some("of"), + Token::Target => Some("target"), + Token::Implements => Some("implements"), + Token::Interface => Some("interface"), + Token::Package => Some("package"), + Token::Private => Some("private"), + Token::Protected => Some("protected"), + Token::Public => Some("public"), + Token::Arguments => Some("arguments"), + Token::Eval => Some("eval"), + Token::Debugger => Some("debugger"), + Token::Class => Some("class"), + Token::Extends => Some("extends"), + Token::Constructor => Some("constructor"), + Token::Static => Some("static"), + Token::Get => Some("get"), + Token::Set => Some("set"), + Token::Yield => Some("yield"), + Token::Import => Some("import"), + Token::Export => Some("export"), + Token::From => Some("from"), + Token::As => Some("as"), // Sentinel - TokenType::EOF => None, + Token::EOF => None, } } pub fn to_string(&self) -> String { match self { - TokenType::Identifier(name) => name.clone(), - TokenType::StringLiteral(s) => format!("\"{}\"", s), - TokenType::NumberLiteral(n) => n.to_string(), - TokenType::BigIntLiteral(b) => format!("{}n", b), - TokenType::RegExpLiteral(pattern, flags) => format!("/{}/{}", pattern, flags), - TokenType::TemplateLiteral(_) => "`...`".to_string(), + Token::Identifier(name) => name.clone(), + Token::StringLiteral(s) => format!("\"{}\"", s), + Token::NumberLiteral(n) => n.to_string(), + Token::BigIntLiteral(b) => format!("{}n", b), + Token::RegExpLiteral(pattern, flags) => format!("/{}/{}", pattern, flags), + Token::TemplateLiteral(_) => "`...`".to_string(), _ => match self.keyword_text() { Some(text) => text.to_string(), None => format!("{:?}", self), @@ -241,25 +241,8 @@ impl TokenType { } } - #[derive(Debug, Clone, PartialEq)] pub enum TemplatePart { String(String), Expression(String), } - -#[derive(Debug, Clone)] -pub struct Token { - pub token_type: TokenType, - pub column: usize, - pub line: usize, - pub length: usize, -} - -impl Token { - - #[inline] - pub fn new(token_type: TokenType, line: usize, column: usize, length: usize) -> Self { - Token { token_type, line, column, length } - } -} diff --git a/src/parser/asi.rs b/src/parser/asi.rs index aad2442..3b568a2 100644 --- a/src/parser/asi.rs +++ b/src/parser/asi.rs @@ -1,127 +1,82 @@ use super::prelude::*; -use crate::lexer::{Token, TokenType}; -use super::error::{ParserError, ParseResult}; +use crate::lexer::Token; +use super::error::ParseResult; use super::core::Parser; impl Parser { pub fn consume_semicolon(&mut self, message: &str) -> ParseResult { - // Case 1: Explicit semicolon - if self.match_token(&TokenType::Semicolon) { - return Ok(self.previous().unwrap().clone()); + + if self.match_token(&Token::Semicolon) { + return Ok(self.peek_previous().unwrap().clone()); } // Automatic Semicolon Insertion (ASI) rules + + if self.check(&Token::RightBrace) { + return Ok(self.peek_previous().unwrap().clone()); + } - // Case 2: Line terminator - if self.previous_line_terminator() { + if self.is_at_end() { + return Ok(self.peek_previous().unwrap().clone()); + } + + if self.previous_line_terminator() { + // Special case: restricted productions // These statements cannot be followed by a line terminator without a semicolon - if let Some(prev) = self.previous() { - match prev.token_type { + if let Some(prev) = self.peek_previous() { + match prev { // Rule: No LineTerminator here after return/throw/yield/break/continue - TokenType::Return | - TokenType::Throw | - TokenType::Yield | - TokenType::Break | - TokenType::Continue => { + Token::Return | + Token::Throw | + Token::Yield | + Token::Break | + Token::Continue => { // Check if there's an expression after these keywords - // If not, ASI applies - if self.is_expression_start() { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - return Err(ParserError::new(message, token.line, token.column)); + // If not, ASI applies + if !self.is_expression_start() { + return Err(parser_error_at_current!(self, message)); } - }, - // Rule: No ASI before postfix ++ or -- - _ if self.check(&TokenType::PlusPlus) || self.check(&TokenType::MinusMinus) => { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - return Err(ParserError::new(message, token.line, token.column)); - }, + }, _ => {} } } - return Ok(self.previous().unwrap().clone()); - } - - // Case 3: Closing brace - if self.check(&TokenType::RightBrace) { - return Ok(self.previous().unwrap().clone()); - } - - // Case 4: End of input - if self.is_at_end() { - return Ok(self.previous().unwrap().clone()); - } - - // Case 5: The next token would cause a syntax error - // This is a complex case that requires looking ahead - // For example, in "{ 1 \n 2 }" we need to insert a semicolon after 1 - if self.would_cause_syntax_error() { - return Ok(self.previous().unwrap().clone()); + return Ok(self.peek_previous().unwrap().clone()); } - - // Otherwise, it's an error - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap_or(&binding)); - Err(ParserError::new(message, token.line, token.column)) + + // Otherwise, it's an error + Err(parser_error_at_current!(self, message)) } - + + // Helper method to check if the current token would start an expression fn is_expression_start(&self) -> bool { - match self.peek_token_type() { - Some(TokenType::Identifier(_)) | - Some(TokenType::NumberLiteral(_)) | - Some(TokenType::StringLiteral(_)) | - Some(TokenType::TemplateLiteral(_)) | - Some(TokenType::RegExpLiteral(_, _)) | - Some(TokenType::True) | - Some(TokenType::False) | - Some(TokenType::Null) | - Some(TokenType::This) | - Some(TokenType::LeftParen) | - Some(TokenType::LeftBracket) | - Some(TokenType::LeftBrace) | - Some(TokenType::Function) | - Some(TokenType::New) | - Some(TokenType::Delete) | - Some(TokenType::Typeof) | - Some(TokenType::Void) | - Some(TokenType::Plus) | - Some(TokenType::Minus) | - Some(TokenType::Bang) | - Some(TokenType::Tilde) => true, + match self.peek() { + Some(Token::Identifier(_)) | + Some(Token::NumberLiteral(_)) | + Some(Token::StringLiteral(_)) | + Some(Token::TemplateLiteral(_)) | + Some(Token::RegExpLiteral(_, _)) | + Some(Token::True) | + Some(Token::False) | + Some(Token::Null) | + Some(Token::This) | + Some(Token::LeftParen) | + Some(Token::LeftBracket) | + Some(Token::LeftBrace) | + Some(Token::Function) | + Some(Token::New) | + Some(Token::Delete) | + Some(Token::Typeof) | + Some(Token::Void) | + Some(Token::Plus) | + Some(Token::Minus) | + Some(Token::Bang) | + Some(Token::Tilde) => true, _ => false } } - - // Helper method to check if continuing without a semicolon would cause a syntax error - fn would_cause_syntax_error(&self) -> bool { - // This is a simplified implementation - // A full implementation would need to look ahead and check for specific patterns - - // For example, if we have "a \n (" we need to insert a semicolon - // because "a(" would be parsed as a function call - if let Some(prev_token) = self.previous() { - if let Some(next_token) = self.peek_token() { - match (&prev_token.token_type, &next_token.token_type) { - // Cases where continuing would cause a syntax error - (_, TokenType::LeftParen) | - (_, TokenType::LeftBracket) | - (_, TokenType::Plus) | - (_, TokenType::Minus) | - (_, TokenType::Slash) | - (_, TokenType::Star) => true, - _ => false - } - } else { - false - } - } else { - false - } - } } \ No newline at end of file diff --git a/src/parser/classes.rs b/src/parser/classes.rs index ff5a555..55586ae 100644 --- a/src/parser/classes.rs +++ b/src/parser/classes.rs @@ -1,8 +1,7 @@ use super::prelude::*; - use crate::ast::*; -use crate::lexer::{TokenType, LexicalContext}; +use crate::lexer::{Token, LexicalContext}; use super::error::ParseResult; use super::core::Parser; use super::expressions::Precedence; @@ -13,7 +12,7 @@ impl Parser { self.advance(); // consume 'class' let id = self.expect_identifier("Expected class name")?; - let super_class = self.match_token(&TokenType::Extends) + let super_class = self.match_token(&Token::Extends) .then(|| self.parse_expression_with_precedence(Precedence::Call)) .transpose()?; @@ -26,12 +25,12 @@ impl Parser { self.advance(); // consume 'class' // Optional class name for expressions - let id = matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) + let id = matches!(self.peek(), Some(Token::Identifier(_))) .then(|| self.expect_identifier("Expected class name")) .transpose()?; // Optional extends clause - let super_class = self.match_token(&TokenType::Extends) + let super_class = self.match_token(&Token::Extends) .then(|| self.parse_expression_with_precedence(Precedence::Call).map(Box::new)) .transpose()?; @@ -41,7 +40,7 @@ impl Parser { } pub fn parse_class_body(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' before class body")?; + self.consume(&Token::LeftBrace, "Expected '{' before class body")?; // Classes are always in strict mode let prev_strict = self.state.in_strict_mode; @@ -49,39 +48,39 @@ impl Parser { let mut body = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { + while !self.check(&Token::RightBrace) && !self.is_at_end() { // Skip empty class elements (semicolons) - if self.match_token(&TokenType::Semicolon) { + if self.match_token(&Token::Semicolon) { continue; } body.push(self.parse_class_member()?); } - self.consume(&TokenType::RightBrace, "Expected '}' after class body")?; + self.consume(&Token::RightBrace, "Expected '}' after class body")?; self.state.in_strict_mode = prev_strict; Ok(body) } pub fn parse_class_member(&mut self) -> ParseResult { - let is_static = self.match_token(&TokenType::Static); + let is_static = self.match_token(&Token::Static); // Handle static blocks (ES2022) - if is_static && self.check(&TokenType::LeftBrace) { + if is_static && self.check(&Token::LeftBrace) { return self.parse_static_block(); } // Parse method modifiers - let is_async = self.match_token(&TokenType::Async); - let is_generator = self.match_token(&TokenType::Star); + let is_async = self.match_token(&Token::Async); + let is_generator = self.match_token(&Token::Star); // Check for getter/setter let mut kind = MethodKind::Method; if !is_async && !is_generator { - if self.match_token(&TokenType::Get) { + if self.match_token(&Token::Get) { kind = MethodKind::Getter; - } else if self.match_token(&TokenType::Set) { + } else if self.match_token(&Token::Set) { kind = MethodKind::Setter; } } @@ -97,9 +96,9 @@ impl Parser { if name.as_ref() == "constructor" { let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(false, false)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; return Ok(ClassMember::Constructor { params, body }); } @@ -107,12 +106,12 @@ impl Parser { } // Method definition - if self.check(&TokenType::LeftParen) || is_generator || is_async { + if self.check(&Token::LeftParen) || is_generator || is_async { let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_async, is_generator)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; return Ok(ClassMember::Method { key, @@ -128,11 +127,11 @@ impl Parser { } // Class field - let value = self.match_token(&TokenType::Equal) + let value = self.match_token(&Token::Equal) .then(|| self.parse_expression()) .transpose()?; - self.consume(&TokenType::Semicolon, "Expected ';' after class field")?; + self.consume(&Token::Semicolon, "Expected ';' after class field")?; Ok(ClassMember::Property { key, @@ -142,15 +141,15 @@ impl Parser { } pub fn parse_static_block(&mut self) -> ParseResult { - self.consume(&TokenType::LeftBrace, "Expected '{' after 'static'")?; + self.consume(&Token::LeftBrace, "Expected '{' after 'static'")?; let mut body = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { + while !self.check(&Token::RightBrace) && !self.is_at_end() { body.push(self.parse_statement()?); } - self.consume(&TokenType::RightBrace, "Expected '}' after static block")?; + self.consume(&Token::RightBrace, "Expected '}' after static block")?; Ok(ClassMember::StaticBlock { body }) } diff --git a/src/parser/core.rs b/src/parser/core.rs index 4af784b..3605375 100644 --- a/src/parser/core.rs +++ b/src/parser/core.rs @@ -1,14 +1,14 @@ use super::prelude::*; use crate::ast::*; -use crate::lexer::{Token, TokenType, LexicalContext}; +use crate::lexer::{Token, LexicalContext}; use super::error::{ParserError, ParseResult}; use super::state::ParserState; use std::collections::HashSet; pub struct Parser { - pub tokens: Vec, + pub tokens: Vec<(Token, (usize, usize))>, pub current: usize, pub comments: Vec, pub state: ParserState, @@ -19,7 +19,7 @@ pub struct Parser { impl Parser { - pub fn new(tokens: Vec) -> Self { + pub fn new(tokens: Vec<(Token, (usize, usize))>) -> Self { Parser { tokens, current: 0, @@ -59,59 +59,57 @@ impl Parser { .collect() } - pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult - where - F: FnOnce(&mut Self) -> ParseResult, - { - let current_pos = self.current; - - // Only process tokens if the context has any keywords that can be identifiers - if context.has_keywords_as_identifiers() { - for token in self.tokens.iter_mut().skip(current_pos) { - // Work directly with the token type without extracting text first - if context.allows_token_as_identifier(&token.token_type) { - // Get the keyword text only when we know we need to convert it - if let Some(text) = token.token_type.keyword_text() { - token.token_type = TokenType::Identifier(text.to_string()); - } - } - } - } - - self.push_context(context); - let result = f(self); - self.pop_context(); - result + pub fn get_current_position(&self) -> (usize, usize) { + let item = if self.is_at_end() { + self.tokens.get(self.current - 1) + } else { + self.tokens.get(self.current) + }; + + item.map(|(_, pos)| pos).unwrap_or(&(0,0)).clone() } - /* + // TODO FIXME this is erroneous it takes All the tokens starting from now until end not just those ones to be processed within the context scope pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult where F: FnOnce(&mut Self) -> ParseResult, - { - let current_pos = self.current; - for token in self.tokens.iter_mut().skip(current_pos) { - // TODO might be improved to not need keyword_text invocation and do checks on tokens dirrectly - if let Some(text) = token.token_type.keyword_text() { - if context.allows_keyword_as_identifier(text) { - token.token_type = TokenType::Identifier(text.to_string()); + { + let current_pos = self.current; + + // Only process tokens if the context has any keywords that can be identifiers + if context.has_keywords_as_identifiers() { + // Iterate over tokens starting from the current position + for (token_type, _) in self.tokens.iter_mut().skip(current_pos) { + // Check if the token type can be treated as an identifier in the current context + if context.allows_token_as_identifier(token_type) { + // Get the keyword text only when a conversion is needed + if let Some(text) = token_type.keyword_text() { + // Modify the token type in-place + *token_type = Token::Identifier(text.to_string()); + } } } } + self.push_context(context); let result = f(self); self.pop_context(); result } - */ // Helper methods to check contexts - pub fn is_in_loop(&self) -> bool { + pub fn is_in_loop_body(&self) -> bool { self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::LoopBody)) } + + pub fn is_in_loop_parameters(&self) -> bool { + //self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::LoopParameters)) + matches!(self.context_stack.last(), Some(LexicalContext::LoopParameters)) + } pub fn is_in_switch(&self) -> bool { - self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::SwitchBody)) + //self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::SwitchBody)) + matches!(self.context_stack.last(), Some(LexicalContext::SwitchBody)) } pub fn is_in_function(&self) -> bool { @@ -147,40 +145,44 @@ impl Parser { // Token navigation methods pub fn is_at_end(&self) -> bool { - self.current >= self.tokens.len() || matches!(self.peek_token_type(), Some(TokenType::EOF)) + self.current >= self.tokens.len() || matches!(self.peek(), Some(Token::EOF)) } - pub fn peek_token(&self) -> Option<&Token> { - self.tokens.get(self.current) - } - - pub fn peek_token_type(&self) -> Option<&TokenType> { - self.peek_token().map(|t| &t.token_type) + pub fn peek_previous(&self) -> Option<&Token> { + if self.current > 0 { + self.tokens.get(self.current - 1).map(|(token_type, _)| token_type) + } else { + None + } } - pub fn previous(&self) -> Option<&Token> { - if self.current > 0 { - self.tokens.get(self.current - 1) + pub fn peek_next(&self, offset: usize) -> Option<&Token> { + if self.current + offset < self.tokens.len() { + self.tokens.get(self.current + offset).map(|(token_type, _)| token_type) } else { None } } + pub fn peek(&self) -> Option<&Token> { + self.tokens.get(self.current).map(|(token_type, _)| token_type) + } + pub fn advance(&mut self) -> Option<&Token> { if !self.is_at_end() { self.current += 1; } - self.previous() + self.peek_previous() } - pub fn check(&self, token_type: &TokenType) -> bool { - match self.peek_token_type() { + pub fn check(&self, token_type: &Token) -> bool { + match self.peek() { Some(t) => t == token_type, None => false, } } - pub fn match_token(&mut self, token_type: &TokenType) -> bool { + pub fn match_token(&mut self, token_type: &Token) -> bool { if self.check(token_type) { self.advance(); true @@ -189,7 +191,7 @@ impl Parser { } } - pub fn match_any(&mut self, token_types: &[TokenType]) -> bool { + pub fn match_any(&mut self, token_types: &[Token]) -> bool { for token_type in token_types { if self.check(token_type) { self.advance(); @@ -199,45 +201,32 @@ impl Parser { false } - pub fn consume(&mut self, token_type: &TokenType, message: &str) -> ParseResult { - thread_local! { - static DUMMY_TOKEN: Token = Token::new(TokenType::EOF, 0, 0, 0); - } - - if token_type == &TokenType::Semicolon { - return self.consume_semicolon(message); + pub fn consume(&mut self, token_type: &Token, message: &str) -> ParseResult { + if token_type == &Token::Semicolon { + self.consume_semicolon(message)?; + return Ok(Token::Semicolon); } else if self.check(token_type) { return Ok(self.advance().unwrap().clone()); } - let token = if let Some(t) = self.peek_token() { - t - } else if let Some(t) = self.previous() { - t - } else { - return DUMMY_TOKEN.with(|token| { - Err(parser_error_at_current!(self, message)) - }); - }; - Err(parser_error_at_current!(self, message)) } pub fn previous_line_terminator(&self) -> bool { - if let Some(prev) = self.previous() { - if let Some(curr) = self.peek_token() { - return prev.line < curr.line; + if let Some((_, prev_pos)) = self.tokens.get(self.current.saturating_sub(1)) { + if let Some((_, curr_pos)) = self.tokens.get(self.current) { + return prev_pos.0 < curr_pos.0; } } false } pub fn expect_identifier(&mut self, message: &str) -> ParseResult> { - if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { + if let Some(Token::Identifier(name)) = self.peek().cloned() { self.advance(); Ok(name.into_boxed_str()) } else { - Err(parser_error_at_current_mut!(self, "Expected identifier")) + Err(parser_error_at_current_mut!(self, message)) } } @@ -278,8 +267,7 @@ impl Parser { let stmt = self.parse_statement()?; // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { - let token = self.peek_token().unwrap(); + if !self.is_at_end() && !matches!(self.peek(), Some(Token::EOF)) { return Err(parser_error_at_current!(self, "Unexpected token after statement")); } @@ -294,8 +282,7 @@ impl Parser { let expr = self.parse_expression()?; // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek_token_type(), Some(TokenType::EOF)) { - let token = self.peek_token().unwrap(); + if !self.is_at_end() && !matches!(self.peek(), Some(Token::EOF)) { return Err(parser_error_at_current!(self, "Unexpected token after expression")); } @@ -312,29 +299,13 @@ impl Parser { } // TODO delete - // Helper method to handle parsing of "enum" keyword which is reserved in strict mode - pub fn handle_reserved_word(&self, word: &str) -> ParseResult<()> { - if self.state.in_strict_mode { - let reserved_words = ["implements", "interface", "package", "private", "protected", "public", "enum", "eval", "arguments"]; - - if reserved_words.contains(&word) { - let token = self.previous().unwrap(); - return Err(parser_error_at_current!(self, "'{}' is a reserved word in strict mode", word)); - } - } - - Ok(()) - } - // Helper method to validate variable names pub fn validate_variable_name(&self, name: &str) -> ParseResult<()> { if self.state.in_strict_mode { if name == "eval" || name == "arguments" { - let token = self.previous().unwrap(); return Err(parser_error_at_current!(self, "'{}' cannot be used as a variable name in strict mode", name)); } } - Ok(()) } @@ -345,11 +316,11 @@ impl Parser { for param in params { if let Expression::Identifier(name) = param { if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { - return Err(parser_error_at_previous!(self, "'{}' cannot be used as a parameter name in strict mode", name)); + // TODO should be previous so backtrack one? + return Err(parser_error_at_current!(self, "'{}' cannot be used as a parameter name in strict mode", name)); } - if !seen_params.insert(name.clone()) { - return Err(parser_error_at_previous!(self, "Duplicate parameter name '{}'", name)); + return Err(parser_error_at_current!(self, "Duplicate parameter name '{}'", name)); } } } @@ -360,14 +331,14 @@ impl Parser { // Helper method to handle octal literals in strict mode pub fn validate_octal_literal(&self, value: &str) -> ParseResult<()> { if self.state.in_strict_mode && value.starts_with('0') && !value.starts_with("0x") && !value.starts_with("0b") && !value.starts_with("0o") { - return Err(parser_error_at_previous!(self, "Octal literals are not allowed in strict mode")); + // TODO should be previous so backtrack one? + return Err(parser_error_at_current!(self, "Octal literals are not allowed in strict mode")); } - Ok(()) } // Helper method to parse a list of elements separated by commas - pub fn parse_comma_separated_list(&mut self, terminator: &TokenType, parser_fn: F) -> ParseResult> + pub fn parse_comma_separated_list(&mut self, terminator: &Token, parser_fn: F) -> ParseResult> where F: Fn(&mut Self) -> ParseResult, { @@ -377,7 +348,7 @@ impl Parser { loop { elements.push(parser_fn(self)?); - if !self.match_token(&TokenType::Comma) { + if !self.match_token(&Token::Comma) { break; } @@ -396,9 +367,9 @@ impl Parser { pub fn parse_arguments(&mut self) -> ParseResult> { let mut args = Vec::new(); - if !self.check(&TokenType::RightParen) { + if !self.check(&Token::RightParen) { loop { - if self.match_token(&TokenType::Ellipsis) { + if self.match_token(&Token::Ellipsis) { // Spread argument let expr = self.parse_expression()?; args.push(Argument::Spread(expr)); @@ -408,40 +379,40 @@ impl Parser { args.push(Argument::Expression(expr)); } - if !self.match_token(&TokenType::Comma) { + if !self.match_token(&Token::Comma) { break; } // Handle trailing comma - if self.check(&TokenType::RightParen) { + if self.check(&Token::RightParen) { break; } } } - - self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; - + + self.consume(&Token::RightParen, "Expected ')' after arguments")?; + Ok(args) } // Property key parsing for object literals, class members, and destructuring patterns pub fn parse_property_key(&mut self) -> ParseResult { - if self.match_token(&TokenType::LeftBracket) { + if self.match_token(&Token::LeftBracket) { let expr = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property key")?; + self.consume(&Token::RightBracket, "Expected ']' after computed property key")?; Ok(PropertyKey::Computed(expr)) - } else if self.match_token(&TokenType::Hash) { + } else if self.match_token(&Token::Hash) { let name = self.expect_identifier("Expected private identifier name")?; Ok(PropertyKey::PrivateIdentifier(name)) - } else if let Some(TokenType::StringLiteral(_)) = self.peek_token_type() { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { + } else if let Some(Token::StringLiteral(_)) = self.peek() { + if let Token::StringLiteral(s) = self.advance().unwrap().clone() { Ok(PropertyKey::StringLiteral(s.into_boxed_str())) } else { unreachable!() } - } else if let Some(TokenType::NumberLiteral(_)) = self.peek_token_type() { - if let TokenType::NumberLiteral(n) = self.advance().unwrap().token_type { - Ok(PropertyKey::NumericLiteral(n)) + } else if let Some(Token::NumberLiteral(_)) = self.peek() { + if let Token::NumberLiteral(n) = self.advance().unwrap() { + Ok(PropertyKey::NumericLiteral(*n)) } else { unreachable!() } diff --git a/src/parser/error.rs b/src/parser/error.rs index 63609ad..d6d7ad1 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,4 +1,4 @@ -use crate::lexer::{LexerError, Token}; +use crate::lexer::{LexerError, TemplatePart, Token}; use super::core::Parser; use std::fmt; @@ -10,108 +10,200 @@ pub struct ParserError { pub source_line: Option, pub source_span: Option<(usize, usize)>, pub context_stack: Vec, - pub current_token: Option, + pub current_token: Token, } impl ParserError { - pub fn new(message: &str, line: usize, column: usize) -> Self { - ParserError { - message: message.to_string(), - line, - column, - source_line: None, - source_span: None, - context_stack: Vec::new(), - current_token: None, - } - } - - /// Create a parser error from a parser reference and token information - pub fn from_parser(parser: &Parser, message: &str, line: usize, column: usize, token_length: usize) -> Self { + pub fn new(parser: &Parser, message: &str) -> Self { + + let context_stack = parser.get_context_stack_info(); + + let token = parser.peek().unwrap_or_else(|| &Token::EOF); + + // Infer token length based on its type + let token_length = match token { + + Token::EOF => 0, + + Token::LeftParen + | Token::RightParen + | Token::LeftBrace + | Token::RightBrace + | Token::LeftBracket + | Token::RightBracket + | Token::Comma + | Token::Dot + | Token::Semicolon + | Token::Colon + | Token::Question + | Token::Hash + | Token::Plus + | Token::Minus + | Token::Star + | Token::Slash + | Token::Percent + | Token::Equal + | Token::Bang + | Token::Greater + | Token::Caret + | Token::Less + | Token::Pipe + | Token::Ampersand + | Token::Tilde => 1, + + Token::PlusPlus + | Token::PlusEqual + | Token::MinusMinus + | Token::MinusEqual + | Token::StarEqual + | Token::SlashEqual + | Token::PercentEqual + | Token::EqualEqual + | Token::BangEqual + | Token::GreaterEqual + | Token::GreaterGreater + | Token::LessEqual + | Token::LessLess + | Token::Arrow + | Token::StarStar + | Token::AmpersandEqual + | Token::AmpersandAmpersand + | Token::PipeEqual + | Token::PipePipe + | Token::CaretEqual + | Token::QuestionQuestion + | Token::If + | Token::In + | Token::Of + | Token::Do + | Token::As + | Token::QuestionDot => 2, + + Token::EqualEqualEqual + | Token::BangEqualEqual + | Token::GreaterGreaterEqual + | Token::GreaterGreaterGreater + | Token::LessLessEqual + | Token::AmpersandAmpersand + | Token::AmpersandAmpersandEqual + | Token::PipePipeEqual + | Token::Ellipsis + | Token::StarStarEqual + | Token::Var + | Token::Let + | Token::For + | Token::New + | Token::Try + | Token::Get + | Token::Set + | Token::QuestionQuestionEqual => 3, + + Token::Null + | Token::GreaterGreaterGreaterEqual + | Token::With + | Token::Else + | Token::Void + | Token::This + | Token::Case + | Token::Eval + | Token::Enum + | Token::From + | Token::True => 4, + + Token::Const + | Token::While + | Token::Break + | Token::Super + | Token::Await + | Token::Class + | Token::Throw + | Token::Catch + | Token::Yield + | Token::Async + | Token::False => 5, + + Token::Return + | Token::Export + | Token::Import + | Token::Switch + | Token::Typeof + | Token::Target + | Token::Public + | Token::Delete + | Token::Static => 6, + + Token::Extends + | Token::Default + | Token::Finally + | Token::Package + | Token::Private => 7, + + Token::Debugger + | Token::Continue + | Token::Function => 8, + + Token::Undefined + | Token::Interface + | Token::Protected + | Token::Arguments => 9, + + Token::Implements + | Token::InstanceOf => 10, + + Token::Constructor => 11, + + // Literals + Token::Identifier(name) => name.len(), + Token::StringLiteral(value) => value.len() + 2, // Account for quotation marks + Token::NumberLiteral(value) => value.to_string().len(), + Token::BigIntLiteral(value) => value.len() + 1, // Account for the trailing 'n' + Token::RegExpLiteral(pattern, flags) => pattern.len() + flags.len() + 2, // Account for the slashes + Token::TemplateLiteral(parts) => parts.iter().fold(2, |acc, part| { + acc + match part { + TemplatePart::String(s) => s.len(), + TemplatePart::Expression(e) => e.len(), + } + }), + + }; + + let (line, column) = parser.get_current_position(); + + let col = column - token_length; + let source = parser.get_source_text(); - let source_line = extract_source_line_with_context(&source, line, column, 60); - let span_end = column + token_length; + let source_line = extract_source_line_with_context(&source, line, col, 60); + let span_end = column; let (adjusted_column, adjusted_span_end) = if source_line.starts_with("...") { - let adjusted_col = column.min(60) + 3; + let adjusted_col = col.min(60) + 3; let adjusted_end = adjusted_col + token_length; (adjusted_col, adjusted_end) } else { - (column, span_end) + (col, span_end) }; - let context_stack = parser.get_context_stack_info(); - - let current_token = parser.peek_token().cloned(); - ParserError { message: message.to_string(), line, - column, + column: col, source_line: Some(source_line), source_span: Some((adjusted_column, adjusted_span_end)), context_stack, - current_token, + current_token: token.clone(), } } /// Create a parser error from the current token with an immutable reference pub fn at_current(parser: &Parser, message: &str) -> Self { - if let Some(token) = parser.peek_token() { - Self::from_parser( - parser, - message, - token.line, - token.column, - token.length - ) - } else if let Some(token) = parser.previous() { - Self::from_parser( - parser, - message, - token.line, - token.column, - token.length - ) - } else { - Self::new(message, 0, 0) - } + Self::new(parser, message) } /// Create a parser error from the current token with a mutable reference pub fn at_current_mut(parser: &mut Parser, message: &str) -> Self { - Self::at_current(&*parser, message) - } - - /// Create a parser error from the previous token with an immutable reference - pub fn at_previous(parser: &Parser, message: &str) -> Self { - if let Some(token) = parser.previous() { - Self::from_parser( - parser, - message, - token.line, - token.column, - token.length - ) - } else if let Some(token) = parser.peek_token() { - Self::from_parser( - parser, - message, - token.line, - token.column, - token.length - ) - } else { - // Fallback if no token is available - Self::new(message, 0, 0) - } - } - - /// Create a parser error from the previous token with a mutable reference - pub fn at_previous_mut(parser: &mut Parser, message: &str) -> Self { - Self::at_previous(&*parser, message) + Self::new(&*parser, message) } } @@ -153,33 +245,20 @@ impl fmt::Display for ParserError { } writeln!(f)?; - - // Print current token information if available - if let Some(token) = &self.current_token { - writeln!(f, "\nCurrent token: {:#?}", token.token_type)?; - } - - // Print context stack information if available - if !self.context_stack.is_empty() { - writeln!(f, "\nLexical context stack (newest first):")?; - for (i, context) in self.context_stack.iter().enumerate() { - writeln!(f, " {}: {}", i, context)?; - } - } } else { writeln!(f, "at line {}, column {}", self.line, self.column)?; - - // Print current token information if available - if let Some(token) = &self.current_token { - writeln!(f, "\nCurrent token: {:#?}", token.token_type)?; - } - - // Print context stack information if available - if !self.context_stack.is_empty() { - writeln!(f, "\nLexical context stack (newest first):")?; - for (i, context) in self.context_stack.iter().enumerate() { - writeln!(f, " {}: {}", i, context)?; - } + } + + // Print current token information if available + if !matches!(self.current_token, Token::EOF) { + writeln!(f, "\nCurrent token: {:#?}", self.current_token)?; + } + + // Print context stack information if available + if !self.context_stack.is_empty() { + writeln!(f, "\nLexical context stack:")?; + for (i, context) in self.context_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, context)?; } } @@ -213,7 +292,7 @@ impl From for ParserError { source_line: None, source_span: None, context_stack: Vec::new(), - current_token: None, + current_token: Token::EOF, } } } @@ -276,16 +355,6 @@ macro_rules! parser_error_at_current { }; } -#[macro_export] -macro_rules! parser_error_at_previous { - ($self:expr, $message:expr) => { - $crate::parser::error::ParserError::at_previous($self, $message) - }; - ($self:expr, $fmt:expr, $($arg:tt)*) => { - $crate::parser::error::ParserError::at_previous($self, &format!($fmt, $($arg)*)) - }; -} - #[macro_export] macro_rules! parser_error_at_current_mut { ($self:expr, $message:expr) => { @@ -295,13 +364,3 @@ macro_rules! parser_error_at_current_mut { $crate::parser::error::ParserError::at_current_mut($self, &format!($fmt, $($arg)*)) }; } - -#[macro_export] -macro_rules! parser_error_at_previous_mut { - ($self:expr, $message:expr) => { - $crate::parser::error::ParserError::at_previous_mut($self, $message) - }; - ($self:expr, $fmt:expr, $($arg:tt)*) => { - $crate::parser::error::ParserError::at_previous_mut($self, &format!($fmt, $($arg)*)) - }; -} diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index cadc38c..d477f66 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -1,7 +1,7 @@ use super::prelude::*; use crate::ast::*; -use crate::lexer::{Token, TokenType, TemplatePart, LexicalContext}; +use crate::lexer::{Token, TemplatePart, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -32,60 +32,33 @@ pub enum Precedence { impl Parser { pub fn parse_expression(&mut self) -> ParseResult { - -// match self.parse_expression_with_precedence(Precedence::Comma) { -// Ok(expr) => { -// -// match expr { -// Expression::ArrowFunction { params: _, body: _, is_async: _ } if self.check(&TokenType::RightParen) => { -// self.advance(); -// //println!("Parsed Arrow Function!!!"); -// }, -// _ => {}, -// }; -// -// println!("Parsed expression: {:#?}", expr); -// Ok(expr) -// }, -// err => err -// } - - //let expr =; - - //if - //if let Expression::ArrowFunction = &expr { - // println!("Consumed arrow function"); - //} - - //expr - self.parse_expression_with_precedence(Precedence::Comma) } pub fn parse_expression_with_precedence(&mut self, precedence: Precedence) -> ParseResult { // Parse prefix expressions - let mut expr = match self.peek_token_type() { + let mut expr = match self.peek() { // Unary prefix operators - Some(TokenType::Bang) | - Some(TokenType::Tilde) | - Some(TokenType::Plus) | - Some(TokenType::Minus) | - Some(TokenType::PlusPlus) | - Some(TokenType::MinusMinus) | - Some(TokenType::Typeof) | - Some(TokenType::Void) | - Some(TokenType::Delete) => { + Some(Token::Bang) | + Some(Token::Tilde) | + Some(Token::Plus) | + Some(Token::Minus) | + Some(Token::PlusPlus) | + Some(Token::MinusMinus) | + Some(Token::Typeof) | + Some(Token::Void) | + Some(Token::Delete) => { self.advance(); - let operator = match self.previous().unwrap().token_type { - TokenType::Bang => UnaryOperator::Not, - TokenType::Tilde => UnaryOperator::BitwiseNot, - TokenType::Plus => UnaryOperator::Plus, - TokenType::Minus => UnaryOperator::Minus, - TokenType::PlusPlus => UnaryOperator::Increment, - TokenType::MinusMinus => UnaryOperator::Decrement, - TokenType::Typeof => UnaryOperator::Typeof, - TokenType::Void => UnaryOperator::Void, - TokenType::Delete => UnaryOperator::Delete, + let operator = match self.peek_previous().unwrap() { + Token::Bang => UnaryOperator::Not, + Token::Tilde => UnaryOperator::BitwiseNot, + Token::Plus => UnaryOperator::Plus, + Token::Minus => UnaryOperator::Minus, + Token::PlusPlus => UnaryOperator::Increment, + Token::MinusMinus => UnaryOperator::Decrement, + Token::Typeof => UnaryOperator::Typeof, + Token::Void => UnaryOperator::Void, + Token::Delete => UnaryOperator::Delete, _ => unreachable!(), }; @@ -98,22 +71,22 @@ impl Parser { } }, // Await expression - Some(TokenType::Await) if self.allows_await() => { + Some(Token::Await) if self.allows_await() => { self.advance(); let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; Expression::Await(Box::new(argument)) }, // Yield expression - Some(TokenType::Yield) if self.allows_yield() => { + Some(Token::Yield) if self.allows_yield() => { self.advance(); - let delegate = self.match_token(&TokenType::Star); + let delegate = self.match_token(&Token::Star); // Yield can be used without an argument - let argument = if self.check(&TokenType::Semicolon) || - self.check(&TokenType::RightBrace) || - self.check(&TokenType::Comma) || - self.check(&TokenType::RightParen) || - self.check(&TokenType::Colon) || + let argument = if self.check(&Token::Semicolon) || + self.check(&Token::RightBrace) || + self.check(&Token::Comma) || + self.check(&Token::RightParen) || + self.check(&Token::Colon) || self.is_at_end() { None } else { @@ -126,48 +99,48 @@ impl Parser { } }, // Primary expressions - Some(TokenType::This) => { + Some(Token::This) => { self.advance(); Expression::This }, - Some(TokenType::Arguments) => { + Some(Token::Arguments) => { self.advance(); Expression::Identifier("arguments".into()) }, - Some(TokenType::Super) => { + Some(Token::Super) => { self.advance(); Expression::Super }, - Some(TokenType::Null) => { + Some(Token::Null) => { self.advance(); Expression::Literal(Literal::Null) }, - Some(TokenType::Undefined) => { + Some(Token::Undefined) => { self.advance(); Expression::Literal(Literal::Undefined) }, - Some(TokenType::True) => { + Some(Token::True) => { self.advance(); Expression::Literal(Literal::Boolean(true)) }, - Some(TokenType::False) => { + Some(Token::False) => { self.advance(); Expression::Literal(Literal::Boolean(false)) }, - Some(TokenType::NumberLiteral(n)) => { + Some(Token::NumberLiteral(n)) => { let value = *n; self.advance(); Expression::Literal(Literal::Number(value)) }, - Some(TokenType::StringLiteral(_)) => { - if let TokenType::StringLiteral(s) = &self.advance().unwrap().token_type { + Some(Token::StringLiteral(_)) => { + if let Token::StringLiteral(s) = &self.advance().unwrap() { Expression::Literal(Literal::String(s.clone().into_boxed_str())) } else { unreachable!() } }, - Some(TokenType::RegExpLiteral(_, _)) => { - if let TokenType::RegExpLiteral(pattern, flags) = self.advance().unwrap().token_type.clone() { + Some(Token::RegExpLiteral(_, _)) => { + if let Token::RegExpLiteral(pattern, flags) = self.advance().unwrap().clone() { Expression::Literal(Literal::RegExp { pattern: pattern.into_boxed_str(), flags: flags.into_boxed_str(), @@ -176,18 +149,15 @@ impl Parser { unreachable!() } }, - Some(TokenType::BigIntLiteral(_)) => { - if let TokenType::BigIntLiteral(s) = self.advance().unwrap().token_type.clone() { + Some(Token::BigIntLiteral(_)) => { + if let Token::BigIntLiteral(s) = self.advance().unwrap().clone() { Expression::Literal(Literal::BigInt(s.into_boxed_str())) } else { unreachable!() } }, - Some(TokenType::TemplateLiteral(_)) => { - if let TokenType::TemplateLiteral(parts) = self.advance().unwrap().token_type.clone() { - let token_line = self.previous().unwrap().line; - let token_column = self.previous().unwrap().column; - let token_length = self.previous().unwrap().length; + Some(Token::TemplateLiteral(_)) => { + if let Token::TemplateLiteral(parts) = self.advance().unwrap().clone() { let mut quasis = Vec::new(); let mut expressions = Vec::new(); @@ -247,43 +217,35 @@ impl Parser { } }, // TODO everything but Identifier hoists matches below, need a better approach to var as = e.class; scenarios - Some(TokenType::Identifier(_)) => { - //Some(TokenType::As) | - //Some(TokenType::Target) | - //Some(TokenType::Class) | - //Some(TokenType::Get) | - //Some(TokenType::Set) | - //Some(TokenType::From) => { - + Some(Token::Identifier(_)) => { let name = self.expect_identifier("Expected identifier in expression")?; - if self.check(&TokenType::Arrow) { + if self.check(&Token::Arrow) { let param = Expression::Identifier(name); self.advance(); return self.parse_arrow_function_body(vec![param], false); } Expression::Identifier(name) }, - Some(TokenType::LeftParen) => { - //println!("In ( {:#?}", self.peek_token_type()); + Some(Token::LeftParen) => { + //println!("In ( {:#?}", self.peek()); self.advance(); // Consume the '(' // Handle empty parameter list: () => ... - if self.match_token(&TokenType::RightParen) { - return if self.match_token(&TokenType::Arrow) { + if self.match_token(&Token::RightParen) { + return if self.match_token(&Token::Arrow) { self.parse_arrow_function_body(vec![], false) } else { - let token = self.previous().unwrap(); Err(parser_error_at_current!(self, "Unexpected empty parentheses '()'")) }; } - //println!("Here 1 current token {:#?}", self.peek_token_type()); + //println!("Here 1 current token {:#?}", self.peek()); let mut expr = self.parse_expression()?; //println!("Here 2"); // Handle single-parameter or nested parentheses: (x) => ..., ((expr)) - if self.match_token(&TokenType::RightParen) { - if self.match_token(&TokenType::Arrow) { + if self.match_token(&Token::RightParen) { + if self.match_token(&Token::Arrow) { let params = match expr { //Expression::Identifier(_) => vec![expr], //Expression::Sequence(seq) => seq, @@ -292,80 +254,80 @@ impl Parser { }; return self.parse_arrow_function_body(params, false); } - } else if self.check(&TokenType::Comma) { + } else if self.check(&Token::Comma) { - //println!("Some comma {:#?}", self.peek_token_type()); + //println!("Some comma {:#?}", self.peek()); // Handle comma-separated parameters: (a, b, c) let mut params = vec![expr]; - while self.match_token(&TokenType::Comma) { - //println!("Current token {:#?}", self.peek_token_type()); + while self.match_token(&Token::Comma) { + //println!("Current token {:#?}", self.peek()); params.push(self.parse_expression_with_precedence(Precedence::Assignment)?); } - self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - return if self.match_token(&TokenType::Arrow) { + self.consume(&Token::RightParen, "Expected ')' after parameters")?; + return if self.match_token(&Token::Arrow) { self.parse_arrow_function_body(params, false) } else { Ok(Expression::Sequence(params)) }; } else { - self.consume(&TokenType::RightParen, "Expected ')' after expression")?; + self.consume(&Token::RightParen, "Expected ')' after expression")?; } // Handle expressions after ')': ., [ or ( - if self.match_token(&TokenType::Dot) { + if self.match_token(&Token::Dot) { expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - } else if self.check(&TokenType::LeftBracket) || self.check(&TokenType::LeftParen) { + } else if self.check(&Token::LeftBracket) || self.check(&Token::LeftParen) { expr = self.parse_expression_with_precedence(Precedence::Call)?; } expr }, - Some(TokenType::LeftBracket) => { + Some(Token::LeftBracket) => { //println!("I am here"); self.advance(); let mut elements = Vec::new(); - while !self.check(&TokenType::RightBracket) && !self.is_at_end() { - if self.match_token(&TokenType::Comma) { + while !self.check(&Token::RightBracket) && !self.is_at_end() { + if self.match_token(&Token::Comma) { elements.push(ArrayElement::Hole); } else { - if self.match_token(&TokenType::Ellipsis) { + if self.match_token(&Token::Ellipsis) { let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; elements.push(ArrayElement::Spread(expr)); } else { let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; elements.push(ArrayElement::Expression(expr)); } - if !self.check(&TokenType::RightBracket) { - self.consume(&TokenType::Comma, "Expected ',' after array element")?; + if !self.check(&Token::RightBracket) { + self.consume(&Token::Comma, "Expected ',' after array element")?; } } } - self.consume(&TokenType::RightBracket, "Expected ']' after array elements")?; + self.consume(&Token::RightBracket, "Expected ']' after array elements")?; Expression::Array(elements) }, - Some(TokenType::LeftBrace) => { + Some(Token::LeftBrace) => { self.advance(); let mut properties = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - if self.match_token(&TokenType::Ellipsis) { + while !self.check(&Token::RightBrace) && !self.is_at_end() { + if self.match_token(&Token::Ellipsis) { let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; properties.push(ObjectProperty::Spread(expr)); } else { - let is_async = self.match_token(&TokenType::Async); - let is_generator = self.match_token(&TokenType::Star); + let is_async = self.match_token(&Token::Async); + let is_generator = self.match_token(&Token::Star); let mut kind = PropertyKind::Init; if !is_async && !is_generator { - if self.check(&TokenType::Get) || self.check(&TokenType::Set) { - let is_property_name = if let Some(next_token) = self.tokens.get(self.current + 1) { - matches!(next_token.token_type, TokenType::Colon) + if self.check(&Token::Get) || self.check(&Token::Set) { + let is_property_name = if let Some(next_token) = self.peek_next(1) { + matches!(next_token, Token::Colon) } else { false }; if !is_property_name { - if self.match_token(&TokenType::Get) { + if self.match_token(&Token::Get) { kind = PropertyKind::Get; - } else if self.match_token(&TokenType::Set) { + } else if self.match_token(&Token::Set) { kind = PropertyKind::Set; } } @@ -380,7 +342,7 @@ impl Parser { let computed = matches!(key, PropertyKey::Computed(_)); // Method definition - if self.check(&TokenType::LeftParen) || is_generator || is_async { + if self.check(&Token::LeftParen) || is_generator || is_async { let method_kind = match kind { PropertyKind::Get => MethodKind::Getter, PropertyKind::Set => MethodKind::Setter, @@ -389,9 +351,9 @@ impl Parser { let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_async, is_generator)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; properties.push(ObjectProperty::Method { key, @@ -407,7 +369,7 @@ impl Parser { } else { // Regular property let shorthand = !computed && - !self.check(&TokenType::Colon) && + !self.check(&Token::Colon) && matches!(key, PropertyKey::Identifier(_)); let value = if shorthand { @@ -417,7 +379,7 @@ impl Parser { unreachable!() } } else { - self.consume(&TokenType::Colon, "Expected ':' after property name")?; + self.consume(&Token::Colon, "Expected ':' after property name")?; self.parse_expression_with_precedence(Precedence::Assignment)? }; @@ -431,13 +393,13 @@ impl Parser { } } - if !self.check(&TokenType::RightBrace) { - //println!("Now have token {:#?}", self.peek_token()); + if !self.check(&Token::RightBrace) { + //println!("Now have token {:#?}", self.peek()); - self.consume(&TokenType::Comma, "Expected ',' after property")?; + self.consume(&Token::Comma, "Expected ',' after property")?; // Allow trailing comma - if self.check(&TokenType::RightBrace) { + if self.check(&Token::RightBrace) { break; } } else { @@ -445,19 +407,19 @@ impl Parser { } } - self.consume(&TokenType::RightBrace, "Expected '}' after object literal")?; + self.consume(&Token::RightBrace, "Expected '}' after object literal")?; Expression::Object(properties) }, - Some(TokenType::Function) => self.parse_function_expression()?, - Some(TokenType::Class) => self.parse_class_expression()?, - Some(TokenType::New) => { + Some(Token::Function) => self.parse_function_expression()?, + Some(Token::Class) => self.parse_class_expression()?, + Some(Token::New) => { self.advance(); // consume 'new' // Handle new.target meta property - if self.match_token(&TokenType::Dot) { - if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { + if self.match_token(&Token::Dot) { + if let Some(Token::Identifier(name)) = self.peek().cloned() { if name == "target" { self.advance(); // consume 'target' Expression::MetaProperty { @@ -465,155 +427,110 @@ impl Parser { property: "target".into(), } } else { - let token = self.peek_token().unwrap(); return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); } } else { - let token = self.peek_token().unwrap(); return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); } } else { // Regular new expression let callee = self.parse_expression_with_precedence(Precedence::Call)?; - // Optional arguments - let arguments = if self.match_token(&TokenType::LeftParen) { - let mut args = Vec::new(); - - if !self.check(&TokenType::RightParen) { - loop { - if self.match_token(&TokenType::Ellipsis) { - // Spread argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&TokenType::Comma) { - break; - } - - // Handle trailing comma - if self.check(&TokenType::RightParen) { - break; - } - } - } - - self.consume(&TokenType::RightParen, "Expected ')' after arguments 1")?; - args - } else { - Vec::new() - }; - - Expression::New { - callee: Box::new(callee), - arguments, - } + Expression::New(Box::new(callee)) } }, - Some(TokenType::Import) => { + Some(Token::Import) => { self.advance(); // consume 'import' - self.consume(&TokenType::LeftParen, "Expected '(' after 'import'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'import'")?; let source = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&TokenType::RightParen, "Expected ')' after import source")?; + self.consume(&Token::RightParen, "Expected ')' after import source")?; Expression::Import(Box::new(source)) }, - Some(TokenType::Hash) => { + Some(Token::Hash) => { self.advance(); // consume '#' let name = self.expect_identifier("Expected private identifier name")?; Expression::PrivateName(name) }, - Some(TokenType::Async) if self.is_async_function() => self.parse_async_function_expression()?, + Some(Token::Async) if self.is_async_function() => self.parse_async_function_expression()?, _ => { - - // TODO trailing comma gets there - - let token = self.peek_token().unwrap_or_else(|| self.previous().unwrap()); - return Err(parser_error_at_current!(self, "Unexpected token in expression: {:?}", token.token_type)); + return Err(parser_error_at_current!(self, "Unexpected token in expression")); } }; // Parse infix and postfix expressions based on precedence while !self.is_at_end() { - let current_precedence = match self.peek_token_type() { - Some(TokenType::Comma) => Precedence::Comma, - Some(TokenType::Question) => { - if self.tokens.get(self.current + 1).map_or(false, |t| matches!(t.token_type, TokenType::Dot)) { - Precedence::Call - } else { - Precedence::Conditional + let current_precedence = match self.peek() { + Some(Token::Comma) => Precedence::Comma, + Some(Token::Question) => { + match self.peek_next(1) { + Some(Token::Dot) => Precedence::Call, + _ => Precedence::Conditional, } }, - Some(TokenType::Equal) | - Some(TokenType::PlusEqual) | - Some(TokenType::MinusEqual) | - Some(TokenType::StarEqual) | - Some(TokenType::SlashEqual) | - Some(TokenType::PercentEqual) | - Some(TokenType::StarStarEqual) | - Some(TokenType::AmpersandEqual) | - Some(TokenType::PipeEqual) | - Some(TokenType::CaretEqual) | - Some(TokenType::LessLessEqual) | - Some(TokenType::GreaterGreaterEqual) | - Some(TokenType::GreaterGreaterGreaterEqual) | - Some(TokenType::AmpersandAmpersandEqual) | - Some(TokenType::PipePipeEqual) | - Some(TokenType::QuestionQuestionEqual) => Precedence::Assignment, - Some(TokenType::PipePipe) | - Some(TokenType::QuestionQuestion) => Precedence::LogicalOr, - Some(TokenType::AmpersandAmpersand) => Precedence::LogicalAnd, - Some(TokenType::Pipe) => Precedence::BitwiseOr, - Some(TokenType::Caret) => Precedence::BitwiseXor, - Some(TokenType::Ampersand) => Precedence::BitwiseAnd, - Some(TokenType::EqualEqual) | - Some(TokenType::BangEqual) | - Some(TokenType::EqualEqualEqual) | - Some(TokenType::BangEqualEqual) => Precedence::Equality, - Some(TokenType::Less) | - Some(TokenType::LessEqual) | - Some(TokenType::Greater) | - Some(TokenType::GreaterEqual) | - Some(TokenType::In) | - Some(TokenType::InstanceOf) => Precedence::Relational, - Some(TokenType::LessLess) | - Some(TokenType::GreaterGreater) | - Some(TokenType::GreaterGreaterGreater) => Precedence::Shift, - Some(TokenType::Plus) | - Some(TokenType::Minus) => Precedence::Additive, - Some(TokenType::Star) | - Some(TokenType::Slash) | - Some(TokenType::Percent) => Precedence::Multiplicative, - Some(TokenType::StarStar) => Precedence::Exponentiation, - Some(TokenType::PlusPlus) | - Some(TokenType::MinusMinus) if !self.previous_line_terminator() => Precedence::Postfix, - Some(TokenType::Dot) | - Some(TokenType::LeftBracket) | - Some(TokenType::LeftParen) | - Some(TokenType::QuestionDot) => Precedence::Call, + Some(Token::Equal) | + Some(Token::PlusEqual) | + Some(Token::MinusEqual) | + Some(Token::StarEqual) | + Some(Token::SlashEqual) | + Some(Token::PercentEqual) | + Some(Token::StarStarEqual) | + Some(Token::AmpersandEqual) | + Some(Token::PipeEqual) | + Some(Token::CaretEqual) | + Some(Token::LessLessEqual) | + Some(Token::GreaterGreaterEqual) | + Some(Token::GreaterGreaterGreaterEqual) | + Some(Token::AmpersandAmpersandEqual) | + Some(Token::PipePipeEqual) | + Some(Token::QuestionQuestionEqual) => Precedence::Assignment, + Some(Token::PipePipe) | + Some(Token::QuestionQuestion) => Precedence::LogicalOr, + Some(Token::AmpersandAmpersand) => Precedence::LogicalAnd, + Some(Token::Pipe) => Precedence::BitwiseOr, + Some(Token::Caret) => Precedence::BitwiseXor, + Some(Token::Ampersand) => Precedence::BitwiseAnd, + Some(Token::EqualEqual) | + Some(Token::BangEqual) | + Some(Token::EqualEqualEqual) | + Some(Token::BangEqualEqual) => Precedence::Equality, + Some(Token::Less) | + Some(Token::LessEqual) | + Some(Token::Greater) | + Some(Token::GreaterEqual) | + Some(Token::In) | + Some(Token::InstanceOf) => Precedence::Relational, + Some(Token::LessLess) | + Some(Token::GreaterGreater) | + Some(Token::GreaterGreaterGreater) => Precedence::Shift, + Some(Token::Plus) | + Some(Token::Minus) => Precedence::Additive, + Some(Token::Star) | + Some(Token::Slash) | + Some(Token::Percent) => Precedence::Multiplicative, + Some(Token::StarStar) => Precedence::Exponentiation, + Some(Token::PlusPlus) | + Some(Token::MinusMinus) => Precedence::Postfix, + Some(Token::Dot) | + Some(Token::LeftBracket) | + Some(Token::LeftParen) | + Some(Token::QuestionDot) => Precedence::Call, _ => Precedence::None, }; - if precedence > current_precedence { + if current_precedence == Precedence::None || precedence > current_precedence { break; } - // Handle postfix operators if current_precedence == Precedence::Postfix { - if self.match_any(&[TokenType::PlusPlus, TokenType::MinusMinus]) { + if self.match_any(&[Token::PlusPlus, Token::MinusMinus]) { if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. }) { - let token = self.previous().unwrap(); return Err(parser_error_at_current!(self, "Invalid left-hand side in postfix operation")); } - let operator = match self.previous().unwrap().token_type { - TokenType::PlusPlus => UnaryOperator::Increment, - TokenType::MinusMinus => UnaryOperator::Decrement, + let operator = match self.peek_previous().unwrap() { + Token::PlusPlus => UnaryOperator::Increment, + Token::MinusMinus => UnaryOperator::Decrement, _ => unreachable!(), }; @@ -631,7 +548,7 @@ impl Parser { Precedence::Comma => { self.advance(); // consume comma - if !self.check(&TokenType::RightParen) { + if !self.check(&Token::RightParen) { let right = self.parse_expression_with_precedence(Precedence::Assignment)?; if let Expression::Sequence(ref mut seq) = expr { seq.push(right); @@ -642,37 +559,37 @@ impl Parser { }, Precedence::Assignment => { // Match assignment operator - let op = if self.match_token(&TokenType::Equal) { + let op = if self.match_token(&Token::Equal) { AssignmentOperator::Assign - } else if self.match_token(&TokenType::PlusEqual) { + } else if self.match_token(&Token::PlusEqual) { AssignmentOperator::AddAssign - } else if self.match_token(&TokenType::MinusEqual) { + } else if self.match_token(&Token::MinusEqual) { AssignmentOperator::SubtractAssign - } else if self.match_token(&TokenType::StarEqual) { + } else if self.match_token(&Token::StarEqual) { AssignmentOperator::MultiplyAssign - } else if self.match_token(&TokenType::SlashEqual) { + } else if self.match_token(&Token::SlashEqual) { AssignmentOperator::DivideAssign - } else if self.match_token(&TokenType::PercentEqual) { + } else if self.match_token(&Token::PercentEqual) { AssignmentOperator::ModuloAssign - } else if self.match_token(&TokenType::StarStarEqual) { + } else if self.match_token(&Token::StarStarEqual) { AssignmentOperator::ExponentAssign - } else if self.match_token(&TokenType::AmpersandEqual) { + } else if self.match_token(&Token::AmpersandEqual) { AssignmentOperator::BitwiseAndAssign - } else if self.match_token(&TokenType::PipeEqual) { + } else if self.match_token(&Token::PipeEqual) { AssignmentOperator::BitwiseOrAssign - } else if self.match_token(&TokenType::CaretEqual) { + } else if self.match_token(&Token::CaretEqual) { AssignmentOperator::BitwiseXorAssign - } else if self.match_token(&TokenType::LessLessEqual) { + } else if self.match_token(&Token::LessLessEqual) { AssignmentOperator::LeftShiftAssign - } else if self.match_token(&TokenType::GreaterGreaterEqual) { + } else if self.match_token(&Token::GreaterGreaterEqual) { AssignmentOperator::RightShiftAssign - } else if self.match_token(&TokenType::GreaterGreaterGreaterEqual) { + } else if self.match_token(&Token::GreaterGreaterGreaterEqual) { AssignmentOperator::UnsignedRightShiftAssign - } else if self.match_token(&TokenType::AmpersandAmpersandEqual) { + } else if self.match_token(&Token::AmpersandAmpersandEqual) { AssignmentOperator::LogicalAndAssign - } else if self.match_token(&TokenType::PipePipeEqual) { + } else if self.match_token(&Token::PipePipeEqual) { AssignmentOperator::LogicalOrAssign - } else if self.match_token(&TokenType::QuestionQuestionEqual) { + } else if self.match_token(&Token::QuestionQuestionEqual) { AssignmentOperator::NullishAssign } else { break; // No assignment operator found @@ -680,8 +597,6 @@ impl Parser { // Validate left-hand side if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. } | Expression::Array(_) | Expression::Object(_)) { - let binding = Token::new(TokenType::EOF, 0, 0, 0); - let token = self.previous().unwrap_or(&binding); return Err(parser_error_at_current!(self, "Invalid left-hand side in assignment")); } @@ -697,21 +612,21 @@ impl Parser { self.advance(); // consume ? // Check if this is part of optional chaining - if self.check(&TokenType::Dot) { + if self.check(&Token::Dot) { // This is optional chaining self.advance(); // consume . // Now handle the optional chaining - if self.match_token(&TokenType::LeftBracket) { + if self.match_token(&Token::LeftBracket) { let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; + self.consume(&Token::RightBracket, "Expected ']' after computed property")?; expr = Expression::Member { object: Box::new(expr), property: Box::new(property), computed: true, optional: true, }; - } else if self.match_token(&TokenType::LeftParen) { + } else if self.match_token(&Token::LeftParen) { let arguments = self.parse_arguments()?; expr = Expression::Call { callee: Box::new(expr), @@ -730,7 +645,7 @@ impl Parser { } else { // This is a ternary operator let consequent = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&TokenType::Colon, "Expected ':' in conditional expression")?; + self.consume(&Token::Colon, "Expected ':' in conditional expression")?; let alternate = self.parse_expression_with_precedence(Precedence::Assignment)?; expr = Expression::Conditional { @@ -741,9 +656,9 @@ impl Parser { } }, Precedence::LogicalOr => { - let operator = if self.match_token(&TokenType::PipePipe) { + let operator = if self.match_token(&Token::PipePipe) { LogicalOperator::Or - } else if self.match_token(&TokenType::QuestionQuestion) { + } else if self.match_token(&Token::QuestionQuestion) { LogicalOperator::NullishCoalescing } else { break; @@ -787,33 +702,32 @@ impl Parser { Precedence::Additive | Precedence::Multiplicative => { self.advance(); - let token_type = self.previous().unwrap().token_type.clone(); + let token_type = self.peek_previous().unwrap().clone(); let operator = match token_type { - TokenType::Plus => BinaryOperator::Add, - TokenType::Minus => BinaryOperator::Subtract, - TokenType::Star => BinaryOperator::Multiply, - TokenType::Slash => BinaryOperator::Divide, - TokenType::Percent => BinaryOperator::Modulo, - TokenType::StarStar => BinaryOperator::Exponent, - TokenType::Pipe => BinaryOperator::BitwiseOr, - TokenType::Ampersand => BinaryOperator::BitwiseAnd, - TokenType::Caret => BinaryOperator::BitwiseXor, - TokenType::LessLess => BinaryOperator::LeftShift, - TokenType::GreaterGreater => BinaryOperator::RightShift, - TokenType::GreaterGreaterGreater => BinaryOperator::UnsignedRightShift, - TokenType::EqualEqual => BinaryOperator::Equal, - TokenType::BangEqual => BinaryOperator::NotEqual, - TokenType::EqualEqualEqual => BinaryOperator::StrictEqual, - TokenType::BangEqualEqual => BinaryOperator::StrictNotEqual, - TokenType::Less => BinaryOperator::LessThan, - TokenType::LessEqual => BinaryOperator::LessThanEqual, - TokenType::Greater => BinaryOperator::GreaterThan, - TokenType::GreaterEqual => BinaryOperator::GreaterThanEqual, - TokenType::In => BinaryOperator::In, - TokenType::InstanceOf => BinaryOperator::InstanceOf, + Token::Plus => BinaryOperator::Add, + Token::Minus => BinaryOperator::Subtract, + Token::Star => BinaryOperator::Multiply, + Token::Slash => BinaryOperator::Divide, + Token::Percent => BinaryOperator::Modulo, + Token::StarStar => BinaryOperator::Exponent, + Token::Pipe => BinaryOperator::BitwiseOr, + Token::Ampersand => BinaryOperator::BitwiseAnd, + Token::Caret => BinaryOperator::BitwiseXor, + Token::LessLess => BinaryOperator::LeftShift, + Token::GreaterGreater => BinaryOperator::RightShift, + Token::GreaterGreaterGreater => BinaryOperator::UnsignedRightShift, + Token::EqualEqual => BinaryOperator::Equal, + Token::BangEqual => BinaryOperator::NotEqual, + Token::EqualEqualEqual => BinaryOperator::StrictEqual, + Token::BangEqualEqual => BinaryOperator::StrictNotEqual, + Token::Less => BinaryOperator::LessThan, + Token::LessEqual => BinaryOperator::LessThanEqual, + Token::Greater => BinaryOperator::GreaterThan, + Token::GreaterEqual => BinaryOperator::GreaterThanEqual, + Token::In => BinaryOperator::In, + Token::InstanceOf => BinaryOperator::InstanceOf, _ => { - let token = self.previous().unwrap(); return Err(parser_error_at_current!(self, "Unexpected token: {:?}", token_type)); } }; @@ -852,12 +766,12 @@ impl Parser { }, Precedence::Call => { - //println!("In call {:#?}", self.peek_token_type()); + //println!("In call {:#?}", self.peek()); - if self.match_token(&TokenType::Dot) { + if self.match_token(&Token::Dot) { let property = self.with_context(LexicalContext::MemberAccess, |parser| { - let property = if let Some(TokenType::Identifier(name)) = parser.peek_token_type().cloned() { + if let Some(Token::Identifier(name)) = parser.peek().cloned() { parser.advance(); return Ok(name.into_boxed_str()) } else { @@ -871,22 +785,22 @@ impl Parser { computed: false, optional: false, }; - } else if self.match_token(&TokenType::LeftBracket) { + } else if self.match_token(&Token::LeftBracket) { //println!("This case"); // Member access with bracket notation let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; + self.consume(&Token::RightBracket, "Expected ']' after computed property")?; expr = Expression::Member { object: Box::new(expr), property: Box::new(property), computed: true, optional: false, }; - } else if self.match_token(&TokenType::QuestionDot) { + } else if self.match_token(&Token::QuestionDot) { // Optional chaining - if self.match_token(&TokenType::LeftBracket) { + if self.match_token(&Token::LeftBracket) { let property = self.parse_expression()?; - self.consume(&TokenType::RightBracket, "Expected ']' after computed property")?; + self.consume(&Token::RightBracket, "Expected ']' after computed property")?; expr = Expression::Member { object: Box::new(expr), property: Box::new(property), @@ -902,13 +816,14 @@ impl Parser { optional: true, }; } - } else if self.match_token(&TokenType::LeftParen) { + } else if self.match_token(&Token::LeftParen) { + // Function call let mut args = Vec::new(); - if !self.check(&TokenType::RightParen) { + if !self.check(&Token::RightParen) { loop { - if self.match_token(&TokenType::Ellipsis) { + if self.match_token(&Token::Ellipsis) { // Spread argument let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; args.push(Argument::Spread(expr)); @@ -918,18 +833,18 @@ impl Parser { args.push(Argument::Expression(expr)); } - if !self.match_token(&TokenType::Comma) { + if !self.match_token(&Token::Comma) { break; } // Handle trailing comma - if self.check(&TokenType::RightParen) { + if self.check(&Token::RightParen) { break; } } } - self.consume(&TokenType::RightParen, "Expected ')' after arguments 2")?; + self.consume(&Token::RightParen, "Expected ')' after arguments 2")?; expr = Expression::Call { callee: Box::new(expr), @@ -947,16 +862,4 @@ impl Parser { Ok(expr) } - // TODO remove need for this flatten in place when processing comma -// fn flatten_sequence(&self, exprs: Vec) -> Vec { -// let mut flattened = Vec::new(); -// for expr in exprs { -// match expr { -// Expression::Sequence(seq) => flattened.extend(self.flatten_sequence(seq)), -// _ => flattened.push(expr), -// } -// } -// flattened -// } - } diff --git a/src/parser/functions.rs b/src/parser/functions.rs index c7041cb..a9ab036 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -1,7 +1,7 @@ use super::prelude::*; use crate::ast::*; -use crate::lexer::{TokenType, LexicalContext}; +use crate::lexer::{Token, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -10,14 +10,14 @@ impl Parser { pub fn parse_function_declaration(&mut self) -> ParseResult { self.advance(); // consume 'function' - let is_generator = self.match_token(&TokenType::Star); + let is_generator = self.match_token(&Token::Star); let id = self.expect_identifier("Expected function name")?; let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_generator, false)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; Ok(FunctionDeclaration { id, @@ -31,10 +31,10 @@ impl Parser { pub fn parse_function_expression(&mut self) -> ParseResult { self.advance(); // consume 'function' - let is_generator = self.match_token(&TokenType::Star); + let is_generator = self.match_token(&Token::Star); // Optional function name for function expressions - let id = if matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { + let id = if matches!(self.peek(), Some(Token::Identifier(_))) { Some(self.expect_identifier("Expected function name")?) } else { None @@ -42,9 +42,9 @@ impl Parser { let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_generator, false)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; Ok(Expression::Function { id, @@ -59,10 +59,10 @@ impl Parser { self.advance(); // consume 'async' self.advance(); // consume 'function' - let is_generator = self.match_token(&TokenType::Star); + let is_generator = self.match_token(&Token::Star); // Optional function name for function expressions - let id = if matches!(self.peek_token_type(), Some(TokenType::Identifier(_))) { + let id = if matches!(self.peek(), Some(Token::Identifier(_))) { Some(self.expect_identifier("Expected function name")?) } else { None @@ -70,9 +70,9 @@ impl Parser { let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_generator, true)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; Ok(Expression::Function { id, @@ -86,11 +86,9 @@ impl Parser { pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { // Create a new function body context with appropriate yield/await flags - let body = if self.check(&TokenType::LeftBrace) { + let body = if self.check(&Token::LeftBrace) { // Block body - let body = self.parse_function_body(false, is_async)?; - ArrowFunctionBody::Block(body) } else { // Expression body @@ -114,16 +112,16 @@ impl Parser { pub fn parse_async_function_declaration(&mut self) -> ParseResult { self.advance(); // consume 'async' - self.consume(&TokenType::Function, "Expected 'function' after 'async'")?; + self.consume(&Token::Function, "Expected 'function' after 'async'")?; - let is_generator = self.match_token(&TokenType::Star); + let is_generator = self.match_token(&Token::Star); let id = self.expect_identifier("Expected function name")?; let params = self.parse_function_params()?; - self.consume(&TokenType::LeftBrace, "Expected '{' before function body")?; + self.consume(&Token::LeftBrace, "Expected '{' before function body")?; let body = self.parse_function_body(is_generator, true)?; - self.consume(&TokenType::RightBrace, "Expected '}' after function body")?; + self.consume(&Token::RightBrace, "Expected '}' after function body")?; Ok(FunctionDeclaration { id, @@ -135,7 +133,7 @@ impl Parser { } pub fn parse_function_params(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; + self.consume(&Token::LeftParen, "Expected '(' after function name")?; // Create parameter name context with current strict mode let param_context = LexicalContext::ParameterName { @@ -145,9 +143,9 @@ impl Parser { self.with_context(param_context, |parser| { let mut params = Vec::new(); - if !parser.check(&TokenType::RightParen) { + if !parser.check(&Token::RightParen) { loop { - if parser.match_token(&TokenType::Ellipsis) { + if parser.match_token(&Token::Ellipsis) { // Rest parameter let arg = parser.parse_pattern()?; params.push(Expression::Spread(Box::new(arg))); @@ -155,17 +153,17 @@ impl Parser { } else { params.push(parser.parse_pattern()?); } - if !parser.match_token(&TokenType::Comma) { + if !parser.match_token(&Token::Comma) { break; } // Handle trailing comma - if parser.check(&TokenType::RightParen) { + if parser.check(&Token::RightParen) { break; } } } - parser.consume(&TokenType::RightParen, "Expected ')' after function parameters")?; + parser.consume(&Token::RightParen, "Expected ')' after function parameters")?; Ok(params) }) @@ -175,7 +173,7 @@ impl Parser { let function_body_context = LexicalContext::FunctionBody { allow_yield: is_generator, allow_await: is_async }; self.with_context(function_body_context, |parser| { let mut body = Vec::new(); - while !parser.check(&TokenType::RightBrace) && !parser.is_at_end() { + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { body.push(parser.parse_statement()?); } Ok(body) @@ -184,9 +182,9 @@ impl Parser { // Helper method to check if we're looking at an async function pub fn is_async_function(&self) -> bool { - if let Some(TokenType::Async) = self.peek_token_type() { - if let Some(next_token) = self.tokens.get(self.current + 1) { - return matches!(next_token.token_type, TokenType::Function); + if let Some(Token::Async) = self.peek() { + if let Some(next_token) = self.peek_next(1) { + return matches!(next_token, Token::Function); } } false diff --git a/src/parser/modules.rs b/src/parser/modules.rs index 18cf5fa..73da792 100644 --- a/src/parser/modules.rs +++ b/src/parser/modules.rs @@ -2,7 +2,7 @@ use super::prelude::*; use crate::ast::*; -use crate::lexer::{Token, TokenType, LexicalContext}; +use crate::lexer::{Token, LexicalContext}; use super::error::ParseResult; use super::core::Parser; @@ -49,7 +49,7 @@ impl Parser { let start_token = self.advance().unwrap(); // consume 'import' // Handle import() expression vs import statement - if self.check(&TokenType::LeftParen) { + if self.check(&Token::LeftParen) { // This is an import() expression, not an import statement // Rewind and parse as expression statement self.current -= 1; @@ -60,10 +60,10 @@ impl Parser { let mut source: Option> = None; // Handle different import forms - if matches!(self.peek_token_type(), Some(TokenType::StringLiteral(_))) { + if matches!(self.peek(), Some(Token::StringLiteral(_))) { // import "module-name"; (side-effect import) source = self.parse_module_source()?; - } else if self.match_token(&TokenType::Star) { + } else if self.match_token(&Token::Star) { // import * as name from "module-name"; (namespace import) specifiers.push(self.parse_namespace_import()?); source = self.parse_from_clause()?; @@ -72,20 +72,20 @@ impl Parser { // or just { named1, named2 } from "module-name"; // Check for default import - if !self.check(&TokenType::LeftBrace) && !self.check(&TokenType::From) { + if !self.check(&Token::LeftBrace) && !self.check(&Token::From) { specifiers.push(self.parse_default_import()?); // Optional comma before named imports - if self.match_token(&TokenType::Comma) && !self.check(&TokenType::From) { + if self.match_token(&Token::Comma) && !self.check(&Token::From) { // Continue to named imports - } else if !self.check(&TokenType::From) { + } else if !self.check(&Token::From) { // If no comma and not 'from', it's an error return Err(parser_error_at_current!(self, "Expected ',' or 'from' after default import")); } } // Named imports - if self.match_token(&TokenType::LeftBrace) { + if self.match_token(&Token::LeftBrace) { let named_imports = self.parse_named_imports()?; specifiers.extend(named_imports); } @@ -99,13 +99,13 @@ impl Parser { } // Parse import assertions if present - let assertions = if self.match_token(&TokenType::With) { + let assertions = if self.match_token(&Token::With) { self.parse_import_assertions()? } else { Vec::new() }; - self.consume(&TokenType::Semicolon, "Expected ';' after import statement")?; + self.consume(&Token::Semicolon, "Expected ';' after import statement")?; if let Some(src) = source { Ok(Statement::Import { @@ -120,7 +120,7 @@ impl Parser { // Helper method to parse a module source string pub fn parse_module_source(&mut self) -> ParseResult>> { - if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { + if let Token::StringLiteral(s) = self.advance().unwrap().clone() { Ok(Some(s.into_boxed_str())) } else { Err(parser_error_at_current!(self, "Expected string literal for module source")) @@ -129,13 +129,13 @@ impl Parser { // Helper method to parse the 'from "module-name"' part pub fn parse_from_clause(&mut self) -> ParseResult>> { - self.consume(&TokenType::From, "Expected 'from' after import specifiers")?; + self.consume(&Token::From, "Expected 'from' after import specifiers")?; self.parse_module_source() } // Helper method to parse namespace import: * as name pub fn parse_namespace_import(&mut self) -> ParseResult { - self.consume(&TokenType::As, "Expected 'as' after '*'")?; + self.consume(&Token::As, "Expected 'as' after '*'")?; // Use ImportExport context for parsing the identifier let local = self.with_context(LexicalContext::ImportExport, |parser| { parser.expect_identifier("Expected namespace import name") @@ -159,11 +159,11 @@ impl Parser { self.with_context(LexicalContext::ImportExport, |parser| { let mut specifiers = Vec::new(); - if !parser.check(&TokenType::RightBrace) { + if !parser.check(&Token::RightBrace) { loop { let imported = parser.expect_identifier("Expected imported name")?; - let local = if parser.match_token(&TokenType::As) { + let local = if parser.match_token(&Token::As) { parser.expect_identifier("Expected local name after 'as'")? } else { imported.clone() @@ -174,37 +174,37 @@ impl Parser { local, }); - if !parser.match_token(&TokenType::Comma) { + if !parser.match_token(&Token::Comma) { break; } // Handle trailing comma - if parser.check(&TokenType::RightBrace) { + if parser.check(&Token::RightBrace) { break; } } } - parser.consume(&TokenType::RightBrace, "Expected '}' after named imports")?; + parser.consume(&Token::RightBrace, "Expected '}' after named imports")?; Ok(specifiers) }) } pub fn parse_import_assertions(&mut self) -> ParseResult> { - self.consume(&TokenType::LeftBrace, "Expected '{' after 'with'")?; + self.consume(&Token::LeftBrace, "Expected '{' after 'with'")?; let mut assertions = Vec::new(); - if !self.check(&TokenType::RightBrace) { + if !self.check(&Token::RightBrace) { loop { // Use ImportExport context for parsing assertion keys let key = self.with_context(LexicalContext::ImportExport, |parser| { parser.expect_identifier("Expected assertion key") })?; - self.consume(&TokenType::Colon, "Expected ':' after assertion key")?; + self.consume(&Token::Colon, "Expected ':' after assertion key")?; - let value = if let TokenType::StringLiteral(s) = self.advance().unwrap().token_type.clone() { + let value = if let Token::StringLiteral(s) = self.advance().unwrap().clone() { s.into_boxed_str() } else { return Err(parser_error_at_current!(self, "Expected string literal for assertion value")); @@ -212,18 +212,18 @@ impl Parser { assertions.push(ImportAssertion { key, value }); - if !self.match_token(&TokenType::Comma) { + if !self.match_token(&Token::Comma) { break; } // Handle trailing comma - if self.check(&TokenType::RightBrace) { + if self.check(&Token::RightBrace) { break; } } } - self.consume(&TokenType::RightBrace, "Expected '}' after import assertions")?; + self.consume(&Token::RightBrace, "Expected '}' after import assertions")?; Ok(assertions) } @@ -232,12 +232,12 @@ impl Parser { let start_token = self.advance().unwrap().clone(); // consume 'export' // Handle export * from "module" or export * as name from "module" - if self.match_token(&TokenType::Star) { + if self.match_token(&Token::Star) { return self.parse_export_all(&start_token); } // Handle export default ... - if self.match_token(&TokenType::Default) { + if self.match_token(&Token::Default) { return self.parse_export_default(&start_token); } @@ -247,7 +247,7 @@ impl Parser { } // Handle export { ... } [from "..."] - if self.match_token(&TokenType::LeftBrace) { + if self.match_token(&Token::LeftBrace) { return self.parse_export_named_specifiers(&start_token); } @@ -258,7 +258,7 @@ impl Parser { // Helper method for export * from "module" or export * as name from "module" pub fn parse_export_all(&mut self, start_token: &Token) -> ParseResult { // Use ImportExport context for parsing the exported name - let exported = if self.match_token(&TokenType::As) { + let exported = if self.match_token(&Token::As) { Some(self.with_context(LexicalContext::ImportExport, |parser| { parser.expect_identifier("Expected exported name after 'as'") })?) @@ -266,34 +266,34 @@ impl Parser { None }; - if !self.match_token(&TokenType::From) { + if !self.match_token(&Token::From) { return Err(parser_error_at_current!(self, "Expected 'from' after export *")); } let source = self.parse_module_source()? .ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?; - self.consume(&TokenType::Semicolon, "Expected ';' after export statement")?; + self.consume(&Token::Semicolon, "Expected ';' after export statement")?; Ok(Statement::Export(ExportDeclaration::All { source, exported })) } // Helper method for export default ... pub fn parse_export_default(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&TokenType::Function) { + let declaration = if self.check(&Token::Function) { let func_decl = self.parse_function_declaration()?; ExportDefaultDeclaration::Function(func_decl) - } else if self.check(&TokenType::Class) { + } else if self.check(&Token::Class) { let class_decl = self.parse_class_declaration()?; ExportDefaultDeclaration::Class(class_decl) - } else if self.check(&TokenType::Async) && self.is_async_function() { + } else if self.check(&Token::Async) && self.is_async_function() { // Handle async function let func_decl = self.parse_async_function_declaration()?; ExportDefaultDeclaration::Function(func_decl) } else { // export default expression; let expr = self.parse_expression()?; - self.consume(&TokenType::Semicolon, "Expected ';' after export default expression")?; + self.consume(&Token::Semicolon, "Expected ';' after export default expression")?; ExportDefaultDeclaration::Expression(expr) }; @@ -302,13 +302,13 @@ impl Parser { // Helper method for export declaration pub fn parse_export_declaration(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&TokenType::Function) { + let declaration = if self.check(&Token::Function) { Declaration::Function(self.parse_function_declaration()?) - } else if self.check(&TokenType::Class) { + } else if self.check(&Token::Class) { Declaration::Class(self.parse_class_declaration()?) - } else if self.check(&TokenType::Async) && self.is_async_function() { + } else if self.check(&Token::Async) && self.is_async_function() { Declaration::Function(self.parse_async_function_declaration()?) - } else if self.check(&TokenType::Var) || self.check(&TokenType::Let) || self.check(&TokenType::Const) { + } else if self.check(&Token::Var) || self.check(&Token::Let) || self.check(&Token::Const) { Declaration::Variable(self.parse_variable_declaration()?) } else { return Err(parser_error_at_current!(self, "Expected declaration in export statement")); @@ -326,13 +326,13 @@ impl Parser { let specifiers = self.parse_export_specifiers()?; // Optional from clause - let source = if self.match_token(&TokenType::From) { + let source = if self.match_token(&Token::From) { Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?) } else { None }; - self.consume(&TokenType::Semicolon, "Expected ';' after export statement")?; + self.consume(&Token::Semicolon, "Expected ';' after export statement")?; Ok(Statement::Export(ExportDeclaration::Named { declaration: None, @@ -346,39 +346,39 @@ impl Parser { self.with_context(LexicalContext::ImportExport, |parser| { let mut specifiers = Vec::new(); - if !parser.check(&TokenType::RightBrace) { + if !parser.check(&Token::RightBrace) { loop { let local = parser.expect_identifier("Expected exported identifier")?; - let exported = if parser.match_token(&TokenType::As) { + let exported = if parser.match_token(&Token::As) { parser.expect_identifier("Expected exported name after 'as'")? } else { local.clone() }; specifiers.push(ExportSpecifier { local, exported }); - if !parser.match_token(&TokenType::Comma) { + if !parser.match_token(&Token::Comma) { break; } // Handle trailing comma - if parser.check(&TokenType::RightBrace) { + if parser.check(&Token::RightBrace) { break; } } } - parser.consume(&TokenType::RightBrace, "Expected '}' after export specifiers")?; + parser.consume(&Token::RightBrace, "Expected '}' after export specifiers")?; Ok(specifiers) }) } // Helper method to check if the current token starts a declaration pub fn is_declaration_start(&self) -> bool { - self.check(&TokenType::Var) || - self.check(&TokenType::Let) || - self.check(&TokenType::Const) || - self.check(&TokenType::Function) || - self.check(&TokenType::Class) || - (self.check(&TokenType::Async) && self.is_async_function()) + self.check(&Token::Var) || + self.check(&Token::Let) || + self.check(&Token::Const) || + self.check(&Token::Function) || + self.check(&Token::Class) || + (self.check(&Token::Async) && self.is_async_function()) } } diff --git a/src/parser/patterns.rs b/src/parser/patterns.rs index 3e55617..3124105 100644 --- a/src/parser/patterns.rs +++ b/src/parser/patterns.rs @@ -2,34 +2,35 @@ use super::prelude::*; use crate::ast::*; -use crate::lexer::{TokenType, LexicalContext}; +use crate::lexer::{Token, LexicalContext}; use super::error::ParseResult; use super::core::Parser; impl Parser { pub fn parse_pattern(&mut self) -> ParseResult { - match self.peek_token_type() { + match self.peek() { // Identifier pattern - Some(TokenType::Identifier(_)) => { + Some(Token::Identifier(_)) => { let name = self.expect_identifier("Expected identifier in pattern")?; Ok(Expression::Identifier(name)) }, // Object pattern: { x, y } - Some(TokenType::LeftBrace) => { + Some(Token::LeftBrace) => { + println!("In pattern"); self.advance(); // consume '{' let mut properties = Vec::new(); - if !self.check(&TokenType::RightBrace) { + if !self.check(&Token::RightBrace) { loop { - if self.match_token(&TokenType::Ellipsis) { + if self.match_token(&Token::Ellipsis) { // Rest element let argument = self.parse_pattern()?; properties.push(ObjectProperty::Spread(argument)); // Rest element must be the last one - if !self.check(&TokenType::RightBrace) { + if !self.check(&Token::RightBrace) { return Err(parser_error_at_current!(self, "Rest element must be the last element in object pattern")); } break; @@ -40,13 +41,13 @@ impl Parser { })?; // Handle shorthand: { x } - let (value, computed, shorthand) = if !self.check(&TokenType::Colon) { + let (value, computed, shorthand) = if !self.check(&Token::Colon) { if let PropertyKey::Identifier(name) = &key { // Shorthand property: { x } let pattern = Expression::Identifier(name.clone()); // Check for default value: { x = 1 } - if self.match_token(&TokenType::Equal) { + if self.match_token(&Token::Equal) { let default = self.parse_expression()?; (Expression::Assignment { operator: AssignmentOperator::Assign, @@ -65,7 +66,7 @@ impl Parser { let pattern = self.parse_pattern()?; // Check for default value: { key: value = 1 } - if self.match_token(&TokenType::Equal) { + if self.match_token(&Token::Equal) { let default = self.parse_expression()?; (Expression::Assignment { operator: AssignmentOperator::Assign, @@ -86,42 +87,42 @@ impl Parser { }); } - if !self.match_token(&TokenType::Comma) { + if !self.match_token(&Token::Comma) { break; } // Handle trailing comma - if self.check(&TokenType::RightBrace) { + if self.check(&Token::RightBrace) { break; } } } - self.consume(&TokenType::RightBrace, "Expected '}' after object pattern")?; + self.consume(&Token::RightBrace, "Expected '}' after object pattern")?; Ok(Expression::Object(properties)) }, // Array pattern: [x, y, z = 1] - Some(TokenType::LeftBracket) => { + Some(Token::LeftBracket) => { self.advance(); // consume '[' let mut elements = Vec::new(); - while !self.check(&TokenType::RightBracket) && !self.is_at_end() { - if self.match_token(&TokenType::Comma) { + while !self.check(&Token::RightBracket) && !self.is_at_end() { + if self.match_token(&Token::Comma) { // Elision (hole) elements.push(ArrayElement::Hole); // TODO could use } else { - if self.match_token(&TokenType::Ellipsis) { + if self.match_token(&Token::Ellipsis) { // Rest element let argument = self.parse_pattern()?; elements.push(ArrayElement::Spread(Expression::Spread(Box::new(argument)))); // Rest element must be the last one - if !self.check(&TokenType::RightBracket) { - if self.match_token(&TokenType::Comma) { - if !self.check(&TokenType::RightBracket) { + if !self.check(&Token::RightBracket) { + if self.match_token(&Token::Comma) { + if !self.check(&Token::RightBracket) { return Err(parser_error_at_current!(self, "Rest element must be the last element in array pattern")); } } else { @@ -134,7 +135,7 @@ impl Parser { let pattern = self.parse_pattern()?; // Check for default value: [x = 1] - if self.match_token(&TokenType::Equal) { + if self.match_token(&Token::Equal) { let default = self.parse_expression()?; elements.push(ArrayElement::Expression(Expression::Assignment { operator: AssignmentOperator::Assign, @@ -146,13 +147,13 @@ impl Parser { } } - if !self.check(&TokenType::RightBracket) { - self.consume(&TokenType::Comma, "Expected ',' after array pattern element")?; + if !self.check(&Token::RightBracket) { + self.consume(&Token::Comma, "Expected ',' after array pattern element")?; } } } - self.consume(&TokenType::RightBracket, "Expected ']' after array pattern")?; + self.consume(&Token::RightBracket, "Expected ']' after array pattern")?; Ok(Expression::Array(elements)) }, diff --git a/src/parser/prelude.rs b/src/parser/prelude.rs index a38df44..60a5d88 100644 --- a/src/parser/prelude.rs +++ b/src/parser/prelude.rs @@ -1,6 +1 @@ -pub use crate::{ - parser_error_at_current, - parser_error_at_previous, - parser_error_at_current_mut, - parser_error_at_previous_mut, -}; \ No newline at end of file +pub use crate::{parser_error_at_current, parser_error_at_current_mut}; \ No newline at end of file diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 041ea3f..86ea20d 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -1,21 +1,19 @@ use super::prelude::*; - use crate::ast::*; -use crate::lexer::{Token, TokenType, LexicalContext}; -use super::error::{ParserError, ParseResult}; +use crate::lexer::{Token, LexicalContext}; +use super::error::ParseResult; use super::core::Parser; impl Parser { // Variable declarations pub fn parse_variable_declaration(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap_or_else(|| Token::new(TokenType::EOF, 0, 0, 0)); - let kind = match token.token_type { - TokenType::Var => VariableKind::Var, - TokenType::Let => VariableKind::Let, - TokenType::Const => VariableKind::Const, + let kind = match self.advance() { + Some(Token::Var) => VariableKind::Var, + Some(Token::Let) => VariableKind::Let, + Some(Token::Const) => VariableKind::Const, _ => unreachable!(), }; @@ -23,16 +21,17 @@ impl Parser { let mut declarations = vec![self.parse_variable_declarator()?]; // Parse additional declarators separated by commas - while self.match_token(&TokenType::Comma) { + while self.match_token(&Token::Comma) { declarations.push(self.parse_variable_declarator()?); } // Consume semicolon unless we're in a for-in/of loop context - let current_context = self.current_context(); - let is_in_loop_parameters = matches!(current_context, LexicalContext::LoopParameters); - - if !is_in_loop_parameters { - self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; + //let current_context = self.current_context(); + //let is_in_loop_parameters = matches!(current_context, LexicalContext::LoopParameters); + + + if !self.is_in_loop_parameters() { + self.consume(&Token::Semicolon, "Expected ';' after variable declaration")?; } Ok(VariableDeclaration { declarations, kind }) @@ -40,47 +39,62 @@ impl Parser { pub fn parse_statement(&mut self) -> ParseResult { - match self.peek_token_type() { + match self.peek() { // Empty statement (just a semicolon) - Some(TokenType::Semicolon) => { + Some(Token::Semicolon) => { self.advance(); Ok(Statement::Empty) }, - // Block statement { ... } - Some(TokenType::LeftBrace) => self.parse_block(), + // Block statement or literal object expression { ... } + Some(Token::LeftBrace) => { + match self.peek_previous() { + Some(Token::RightParen) => self.parse_block(), + _ => match self.peek_next(1) { + Some(Token::RightBrace) | Some(Token::LeftBracket) | Some(Token::Ellipsis) => self.parse_expression_statement(), + Some(Token::Identifier(_)) | Some(Token::StringLiteral(_)) => { + if let Some(Token::Colon) = self.peek_next(2) { + self.parse_expression_statement() + } else { + self.parse_block() + } + } + _ => self.parse_block(), + }, + } + }, // Declaration statements - Some(TokenType::Var) | Some(TokenType::Let) | Some(TokenType::Const) => self.parse_variable_statement(), - Some(TokenType::Function) => self.parse_function_statement(), - Some(TokenType::Class) => self.parse_class_statement(), + Some(Token::Var) | Some(Token::Let) | Some(Token::Const) => self.parse_variable_statement(), + Some(Token::Function) => self.parse_function_statement(), + Some(Token::Class) => self.parse_class_statement(), // Control flow statements - Some(TokenType::If) => self.parse_if(), - Some(TokenType::Switch) => self.parse_switch(), - Some(TokenType::For) => self.parse_for(), - Some(TokenType::While) => self.parse_while(), - Some(TokenType::Do) => self.parse_do_while(), + Some(Token::If) => self.parse_if(), + Some(Token::Switch) => self.parse_switch(), + Some(Token::For) => self.parse_for(), + Some(Token::While) => self.parse_while(), + Some(Token::Do) => self.parse_do_while(), // Exception handling - Some(TokenType::Try) => self.parse_try(), - Some(TokenType::Throw) => self.parse_throw(), + Some(Token::Try) => self.parse_try(), + Some(Token::Throw) => self.parse_throw(), // Function control - Some(TokenType::Return) => self.parse_return(), - Some(TokenType::Break) => self.parse_break(), - Some(TokenType::Continue) => self.parse_continue(), + Some(Token::Return) => self.parse_return(), + Some(Token::Break) => self.parse_break(), + Some(Token::Continue) => self.parse_continue(), // Module statements - Some(TokenType::Import) => self.parse_import_statement(), - Some(TokenType::Export) => self.parse_export_statement(), + Some(Token::Import) => self.parse_import_statement(), + Some(Token::Export) => self.parse_export_statement(), // Other statements - Some(TokenType::With) => self.parse_with(), - Some(TokenType::Debugger) => self.parse_debugger(), + Some(Token::With) => self.parse_with(), + Some(Token::Debugger) => self.parse_debugger(), // Labeled statement - Some(TokenType::Identifier(_)) if self.is_label() => self.parse_labeled(), + Some(Token::Identifier(_)) if self.is_label() => self.parse_labeled(), // Default: expression statement _ => self.parse_expression_statement(), @@ -89,15 +103,15 @@ impl Parser { /// Parse a block statement: { statements... } fn parse_block(&mut self) -> ParseResult { - self.consume(&TokenType::LeftBrace, "Expected '{'")?; + self.consume(&Token::LeftBrace, "Expected '{'")?; let mut statements = Vec::new(); - while !self.check(&TokenType::RightBrace) && !self.is_at_end() { + while !self.check(&Token::RightBrace) && !self.is_at_end() { statements.push(self.parse_statement()?); } - self.consume(&TokenType::RightBrace, "Expected '}'")?; + self.consume(&Token::RightBrace, "Expected '}'")?; Ok(Statement::Block(statements)) } @@ -123,13 +137,13 @@ impl Parser { /// Parse if statement: if (condition) consequent else alternate fn parse_if(&mut self) -> ParseResult { self.advance(); // consume 'if' - self.consume(&TokenType::LeftParen, "Expected '(' after 'if'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'if'")?; let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after if condition")?; + self.consume(&Token::RightParen, "Expected ')' after if condition")?; let consequent = Box::new(self.parse_statement()?); - let alternate = self.match_token(&TokenType::Else) + let alternate = self.match_token(&Token::Else) .then(|| self.parse_statement().map(Box::new)) .transpose()?; @@ -138,24 +152,24 @@ impl Parser { /// Parse a single case in a switch statement fn parse_switch_case(&mut self) -> ParseResult { - let test = if self.match_token(&TokenType::Case) { + let test = if self.match_token(&Token::Case) { // After 'case', we expect an expression Some(self.parse_expression()?) - } else if self.match_token(&TokenType::Default) { + } else if self.match_token(&Token::Default) { None } else { - println!("Current token {:#?}", self.peek_token()); + println!("Current token {:#?}", self.peek()); return Err(parser_error_at_current!(self, "Expected 'case' or 'default'")); }; - self.consume(&TokenType::Colon, "Expected ':' after case value")?; + self.consume(&Token::Colon, "Expected ':' after case value")?; let mut consequent = Vec::new(); // Parse statements until next case, default, or end of switch - while !self.check(&TokenType::Case) && - !self.check(&TokenType::Default) && - !self.check(&TokenType::RightBrace) && + while !self.check(&Token::Case) && + !self.check(&Token::Default) && + !self.check(&Token::RightBrace) && !self.is_at_end() { consequent.push(self.parse_statement()?); } @@ -170,12 +184,12 @@ impl Parser { let block = Box::new(self.parse_block()?); // Parse optional catch clause - let handler = self.match_token(&TokenType::Catch) + let handler = self.match_token(&Token::Catch) .then(|| self.parse_catch_clause()) .transpose()?; // Parse optional finally clause - let finalizer = self.match_token(&TokenType::Finally) + let finalizer = self.match_token(&Token::Finally) .then(|| self.parse_block().map(Box::new)) .transpose()?; @@ -190,12 +204,12 @@ impl Parser { /// Parse catch clause: catch ([param]) block fn parse_catch_clause(&mut self) -> ParseResult { // Optional catch parameter - let param = self.match_token(&TokenType::LeftParen) + let param = self.match_token(&Token::LeftParen) .then(|| { // Attempt to parse the parameter identifier - if let Some(TokenType::Identifier(name)) = self.peek_token_type().cloned() { + if let Some(Token::Identifier(name)) = self.peek().cloned() { self.advance(); // Consume the identifier - self.consume(&TokenType::RightParen, "Expected ')' after catch parameter")?; + self.consume(&Token::RightParen, "Expected ')' after catch parameter")?; Ok(Expression::Identifier(name.into_boxed_str())) } else { // If not an identifier, it's an error @@ -212,7 +226,7 @@ impl Parser { /// Parse throw statement: throw expression; fn parse_throw(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'throw' + self.advance(); // consume 'throw' // No line terminator allowed between throw and expression if self.previous_line_terminator() { @@ -220,7 +234,7 @@ impl Parser { } let expr = self.parse_expression()?; - self.consume(&TokenType::Semicolon, "Expected ';' after throw statement")?; + self.consume(&Token::Semicolon, "Expected ';' after throw statement")?; Ok(Statement::Throw(expr)) } @@ -234,11 +248,11 @@ impl Parser { return Err(parser_error_at_current!(self, "'with' statements are not allowed in strict mode")); } - self.consume(&TokenType::LeftParen, "Expected '(' after 'with'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'with'")?; let object = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after with expression")?; + self.consume(&Token::RightParen, "Expected ')' after with expression")?; let body = Box::new(self.parse_statement()?); @@ -249,18 +263,16 @@ impl Parser { fn parse_debugger(&mut self) -> ParseResult { self.advance(); // consume 'debugger' - self.consume(&TokenType::Semicolon, "Expected ';' after debugger statement")?; + self.consume(&Token::Semicolon, "Expected ';' after debugger statement")?; Ok(Statement::Debugger) } /// Parse labeled statement: identifier: statement fn parse_labeled(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); let label = self.expect_identifier("Expected label name")?; - - self.consume(&TokenType::Colon, "Expected ':' after label")?; + self.consume(&Token::Colon, "Expected ':' after label")?; // Add label to the set of active labels let label_exists = !self.state.labels.insert(label.clone()); @@ -279,10 +291,9 @@ impl Parser { /// Check if the current token is a label fn is_label(&self) -> bool { - // Check if the current token is an identifier and the next token is a colon - if let Some(TokenType::Identifier(_)) = self.peek_token_type() { - if let Some(next_token) = self.tokens.get(self.current + 1) { - return matches!(next_token.token_type, TokenType::Colon); + if let Some(Token::Identifier(_)) = self.peek() { + if let Some(next_token) = self.peek_next(1) { + return matches!(next_token, Token::Colon); } } false @@ -302,13 +313,15 @@ impl Parser { let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { // Only consider as directive if it's at the beginning of a function/program // and is a simple string literal (not an expression) - start_pos == 0 || self.previous().unwrap().token_type == TokenType::LeftBrace + start_pos == 0 || self.peek_previous().unwrap() == &Token::LeftBrace } else { false }; - self.consume(&TokenType::Semicolon, "Expected ';' after expression statement")?; - + if !self.is_in_loop_parameters() { + self.consume(&Token::Semicolon, "Expected ';' after expression statement")?; + } + // If this is a "use strict" directive, update parser state if is_directive { if let Expression::Literal(Literal::String(value)) = &expr { @@ -325,19 +338,19 @@ impl Parser { /// Parse switch statement: switch (discriminant) { case/default... } fn parse_switch(&mut self) -> ParseResult { self.advance(); // consume 'switch' - self.consume(&TokenType::LeftParen, "Expected '(' after 'switch'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'switch'")?; let discriminant = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after switch expression")?; + self.consume(&Token::RightParen, "Expected ')' after switch expression")?; - self.consume(&TokenType::LeftBrace, "Expected '{' before switch cases")?; + self.consume(&Token::LeftBrace, "Expected '{' before switch cases")?; // Use SwitchBody context instead of state flag let cases = self.with_context(LexicalContext::SwitchBody, |parser| { let mut inner_cases = Vec::new(); let mut has_default = false; - while !parser.check(&TokenType::RightBrace) && !parser.is_at_end() { + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { let case = parser.parse_switch_case()?; // Validate only one default case @@ -354,7 +367,7 @@ impl Parser { Ok(inner_cases) })?; - self.consume(&TokenType::RightBrace, "Expected '}' after switch cases")?; + self.consume(&Token::RightBrace, "Expected '}' after switch cases")?; Ok(Statement::Switch { discriminant, cases }) } @@ -362,10 +375,10 @@ impl Parser { /// Parse while statement: while (test) statement fn parse_while(&mut self) -> ParseResult { self.advance(); // consume 'while' - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'while'")?; let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; + self.consume(&Token::RightParen, "Expected ')' after while condition")?; // Use LoopBody context instead of state flag let body = self.with_context(LexicalContext::LoopBody, |parser| { @@ -384,28 +397,28 @@ impl Parser { parser.parse_statement().map(Box::new) })?; - self.consume(&TokenType::While, "Expected 'while' after do block")?; - self.consume(&TokenType::LeftParen, "Expected '(' after 'while'")?; + self.consume(&Token::While, "Expected 'while' after do block")?; + self.consume(&Token::LeftParen, "Expected '(' after 'while'")?; let test = self.parse_expression()?; - self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; - self.consume(&TokenType::Semicolon, "Expected ';' after do-while statement")?; + self.consume(&Token::RightParen, "Expected ')' after while condition")?; + self.consume(&Token::Semicolon, "Expected ';' after do-while statement")?; Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) } /// Parse break statement: break [label]; fn parse_break(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'break' + self.advance(); // consume 'break' - if !self.is_in_loop() && !self.is_in_switch() { + if !self.is_in_loop_body() && !self.is_in_switch() { return Err(parser_error_at_current!(self, "'break' statement outside of loop or switch")); } // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { + let label = if !self.check(&Token::Semicolon) && !self.previous_line_terminator() { + if let Some(Token::Identifier(name)) = self.peek().cloned() { self.advance(); // Verify label exists @@ -422,23 +435,23 @@ impl Parser { None }; - self.consume(&TokenType::Semicolon, "Expected ';' after break statement")?; + self.consume(&Token::Semicolon, "Expected ';' after break statement")?; Ok(Statement::Break(label)) } /// Parse continue statement: continue [label]; fn parse_continue(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'continue' + self.advance(); // consume 'continue' // Check if we're in a loop using context method - if !self.is_in_loop() { + if !self.is_in_loop_body() { return Err(parser_error_at_current!(self, "'continue' statement outside of loop")); } // Optional label - let label = if !self.check(&TokenType::Semicolon) && !self.previous_line_terminator() { - if let Some(TokenType::Identifier(name)) = self.peek_token().map(|t| &t.token_type).cloned() { + let label = if !self.check(&Token::Semicolon) && !self.previous_line_terminator() { + if let Some(Token::Identifier(name)) = self.peek().cloned() { self.advance(); // Verify label exists @@ -455,14 +468,14 @@ impl Parser { None }; - self.consume(&TokenType::Semicolon, "Expected ';' after continue statement")?; + self.consume(&Token::Semicolon, "Expected ';' after continue statement")?; Ok(Statement::Continue(label)) } /// Parse return statement: return [expression]; fn parse_return(&mut self) -> ParseResult { - let token = self.advance().cloned().unwrap(); // consume 'return' + self.advance(); // consume 'return' // Check if we're in a function using context method if !self.is_in_function() { @@ -470,14 +483,14 @@ impl Parser { } // Return with no value if semicolon or end of block - let argument = (!self.check(&TokenType::Semicolon) && - !self.check(&TokenType::RightBrace) && + let argument = (!self.check(&Token::Semicolon) && + !self.check(&Token::RightBrace) && !self.is_at_end() && !self.previous_line_terminator()) .then(|| self.parse_expression()) .transpose()?; - self.consume(&TokenType::Semicolon, "Expected ';' after return statement")?; + self.consume(&Token::Semicolon, "Expected ';' after return statement")?; Ok(Statement::Return(argument)) } @@ -486,293 +499,244 @@ impl Parser { fn parse_for(&mut self) -> ParseResult { self.advance(); // consume 'for' + //println!("In for loop construct"); + // Check for for-await-of - let is_await = self.match_token(&TokenType::Await); + let is_await = self.match_token(&Token::Await); - self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?; + self.consume(&Token::LeftParen, "Expected '(' after 'for'")?; - // Parse initialization with LoopParameters context let result = self.with_context(LexicalContext::LoopParameters, |parser| { - // ... existing for loop parsing code ... - if parser.match_token(&TokenType::Semicolon) { - // No initialization - standard for loop with empty init - // Parse condition - let test = (!parser.check(&TokenType::Semicolon)) + if parser.check(&Token::Semicolon) { + parser.consume(&Token::Semicolon, "Expected ';' after for init")?; + + let test = (!parser.check(&Token::Semicolon)) .then(|| parser.parse_expression()) .transpose()?; - - parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update - let update = (!parser.check(&TokenType::RightParen)) + + parser.consume(&Token::Semicolon, "Expected ';' after for test")?; + + let update = (!parser.check(&Token::Semicolon)) .then(|| parser.parse_expression()) .transpose()?; - - parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body with LoopBody context + + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; let body = parser.with_context(LexicalContext::LoopBody, |p| { p.parse_statement().map(Box::new) })?; - + Ok(LoopStatement::For { init: None, test, update, body }) - } else if parser.check(&TokenType::Var) || parser.check(&TokenType::Let) || parser.check(&TokenType::Const) { - // Variable declaration initialization - let decl = parser.parse_variable_declaration()?; - - // Check for for-in or for-of - if parser.check(&TokenType::In) { - // for-in loop with variable declaration - parser.advance(); // consume 'in' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForIn { - left: ForInOfLeft::Declaration(decl), - right, - body - }) - } else if parser.check(&TokenType::Of) { - // for-of loop with variable declaration - parser.advance(); // consume 'of' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForOf { - left: ForInOfLeft::Declaration(decl), - right, - body, - is_await - }) - } else { - // Standard for loop with variable declaration - parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!parser.check(&TokenType::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update - let update = (!parser.check(&TokenType::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: Some(ForInit::Variable(decl)), - test, - update, - body - }) + } else if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + + //println!("- with initialization"); + + let init = parser.parse_variable_declaration()?; + + match parser.advance() { + Some(&Token::In) => { + let right = parser.parse_expression()?; + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + Ok(LoopStatement::ForIn { + left: ForInit::Declaration(init), + right, + body + }) + }, + Some(&Token::Of) => { + let right = parser.parse_expression()?; + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::ForOf { + left: ForInit::Declaration(init), + right, + body, + is_await + }) + }, + _ => { + + //println!("- as classical loop"); + + let test = (!parser.check(&Token::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::Semicolon, "Expected ';' after for test")?; + + let update = (!parser.check(&Token::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: Some(ForInit::Declaration(init)), + test, + update, + body + }) + }, } - } else if let Some(TokenType::Identifier(_)) = parser.peek_token_type() { - // ... existing identifier handling code ... - // First, check if the next tokens form a for-in or for-of loop - // Save current position to backtrack if needed - let start_pos = parser.current; - - // Parse the identifier - let token = parser.advance().unwrap().clone(); - let name = parser.expect_identifier("Expected label name")?; - let left = Expression::Identifier(name); + + } else if let Some(Token::Identifier(_)) = parser.peek() { - // Check what follows the identifier - if parser.check(&TokenType::In) { - // for-in loop with identifier - parser.advance(); // consume 'in' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForIn { - left: ForInOfLeft::Pattern(left), - right, - body - }) - } else if parser.check(&TokenType::Of) { - // for-of loop with identifier - parser.advance(); // consume 'of' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForOf { - left: ForInOfLeft::Pattern(left), - right, - body, - is_await - }) - } else { - // Not a for-in or for-of loop, so it must be a standard for loop - // Reset position and parse the full initialization expression - parser.current = start_pos; - - // Parse the initialization expression - let init_expr = parser.parse_expression()?; - - parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!parser.check(&TokenType::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!parser.check(&TokenType::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body - }) + match parser.peek_next(1) { + Some(&Token::In) => { + let left = Expression::Identifier(parser.expect_identifier("Expected for init name")?); + + parser.advance(); // consume 'in' + + let right = parser.parse_expression()?; + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + Ok(LoopStatement::ForIn { + left: ForInit::Pattern(left), + right, + body + }) + }, + Some(&Token::Of) => { + let left = Expression::Identifier(parser.expect_identifier("Expected for init name")?); + + parser.advance(); // consume 'in' + + let right = parser.parse_expression()?; + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + Ok(LoopStatement::ForOf { + left: ForInit::Pattern(left), + right, + body, + is_await + }) + }, + _ => { + //println!("classical for loop without initialisation"); + + //println!("current token is {:#?}", parser.peek()); + + let init = (!parser.check(&Token::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::Semicolon, "Expected ';' after for init")?; + + let test = (!parser.check(&Token::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::Semicolon, "Expected ';' after for test")?; + + //println!("Before I am here on {:#?}", parser.peek()); + + let update = (!parser.check(&Token::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + + //println!("After I am here on {:#?}", parser.peek()); + + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: init.map(|exp| ForInit::Pattern(exp)), + test, + update, + body + }) + }, } } else { - // For other expressions (including array/object literals and complex expressions) - // Parse the full initialization expression - let init_expr = parser.parse_expression()?; - - // Check if this is a for-in or for-of loop - if parser.check(&TokenType::In) { - // for-in loop with expression - parser.advance(); // consume 'in' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-in right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForIn { - left: ForInOfLeft::Pattern(init_expr), - right, - body - }) - } else if parser.check(&TokenType::Of) { - // for-of loop with expression - parser.advance(); // consume 'of' - let right = parser.parse_expression()?; - parser.consume(&TokenType::RightParen, "Expected ')' after for-of right-hand side")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForOf { - left: ForInOfLeft::Pattern(init_expr), - right, - body, - is_await - }) - } else { - // Standard for loop with expression initialization - parser.consume(&TokenType::Semicolon, "Expected ';' after for initialization")?; - - // Parse condition - let test = (!parser.check(&TokenType::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - - // Parse update (which might be empty) - let update = (!parser.check(&TokenType::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - - // Parse body with LoopBody context - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: Some(ForInit::Expression(init_expr)), - test, - update, - body - }) - } + //println!("What did happen? {:#?}", parser.peek()); + //Err(parser_error_at_current!(parser, "unknown for construct")) + + + let init = (!parser.check(&Token::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::Semicolon, "Expected ';' after for init")?; + + let test = (!parser.check(&Token::Semicolon)) + .then(|| parser.parse_expression()) + .transpose()?; + + parser.consume(&Token::Semicolon, "Expected ';' after for test")?; + + //println!("Before I am here on {:#?}", parser.peek()); + + let update = (!parser.check(&Token::RightParen)) + .then(|| parser.parse_expression()) + .transpose()?; + + + //println!("After I am here on {:#?}", parser.peek()); + + parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; + let body = parser.with_context(LexicalContext::LoopBody, |p| { + p.parse_statement().map(Box::new) + })?; + + Ok(LoopStatement::For { + init: init.map(|exp| ForInit::Pattern(exp)), + test, + update, + body + }) } + })?; - + Ok(Statement::Loop(result)) - } + } /// Parse a variable declarator: pattern = initializer pub fn parse_variable_declarator(&mut self) -> ParseResult { // Get the current token position for error reporting - let start_token = self.peek_token().cloned(); + let is_const = matches!(self.peek(), Some(&Token::Const)); + + + //println!("1 Now at {:#?}", self.peek()); + // Parse the binding pattern (identifier, object pattern, or array pattern) let id = self.parse_pattern()?; // Check if this is a const declaration without an initializer - let is_const = if let Some(prev_token) = self.tokens.get(self.current - 2) { - matches!(prev_token.token_type, TokenType::Const) - } else { - false - }; + // TODO fix self.tokens direct access + //println!("2 Now at {:#?}", self.peek()); + // Parse optional initializer - let init = if self.match_token(&TokenType::Equal) { + let init = if self.match_token(&Token::Equal) { // Parse the initializer expression Some(self.parse_expression()?) } else { // Const declarations must have initializers if is_const { - if let Some(token) = start_token { - return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); - } else { - return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); - } + return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); } None }; From f71bde380c2661b22a30b9383d3b7df65aeb7173 Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Sat, 10 May 2025 09:19:53 +0200 Subject: [PATCH 6/7] separate grammar definition from parser --- examples/tricky/index.js | 2 +- src/ast.rs | 961 +++++++++++++++++++++++-------------- src/grammar/array.rs | 45 ++ src/grammar/call.rs | 49 ++ src/grammar/class.rs | 223 +++++++++ src/grammar/declaration.rs | 497 +++++++++++++++++++ src/grammar/expression.rs | 739 ++++++++++++++++++++++++++++ src/grammar/function.rs | 242 ++++++++++ src/grammar/literal.rs | 66 +++ src/grammar/member.rs | 67 +++ src/grammar/mod.rs | 29 ++ src/grammar/module.rs | 51 ++ src/grammar/new.rs | 44 ++ src/grammar/object.rs | 166 +++++++ src/grammar/pattern.rs | 285 +++++++++++ src/grammar/statement.rs | 857 +++++++++++++++++++++++++++++++++ src/grammar/this.rs | 18 + src/lexer/context.rs | 41 +- src/lexer/lexer.rs | 156 +++--- src/lexer/token.rs | 4 +- src/main.rs | 8 +- src/parser/asi.rs | 139 +++--- src/parser/classes.rs | 156 ------ src/parser/combinator.rs | 6 + src/parser/context.rs | 74 +++ src/parser/core.rs | 424 ---------------- src/parser/error.rs | 50 +- src/parser/expressions.rs | 865 --------------------------------- src/parser/functions.rs | 192 -------- src/parser/mod.rs | 28 +- src/parser/modules.rs | 384 --------------- src/parser/parser.rs | 243 ++++++++++ src/parser/patterns.rs | 169 ------- src/parser/prelude.rs | 1 - src/parser/state.rs | 15 - src/parser/statements.rs | 836 -------------------------------- src/parser/stream.rs | 94 ++++ 37 files changed, 4597 insertions(+), 3629 deletions(-) create mode 100644 src/grammar/array.rs create mode 100644 src/grammar/call.rs create mode 100644 src/grammar/class.rs create mode 100644 src/grammar/declaration.rs create mode 100644 src/grammar/expression.rs create mode 100644 src/grammar/function.rs create mode 100644 src/grammar/literal.rs create mode 100644 src/grammar/member.rs create mode 100644 src/grammar/mod.rs create mode 100644 src/grammar/module.rs create mode 100644 src/grammar/new.rs create mode 100644 src/grammar/object.rs create mode 100644 src/grammar/pattern.rs create mode 100644 src/grammar/statement.rs create mode 100644 src/grammar/this.rs delete mode 100644 src/parser/classes.rs create mode 100644 src/parser/combinator.rs create mode 100644 src/parser/context.rs delete mode 100644 src/parser/core.rs delete mode 100644 src/parser/expressions.rs delete mode 100644 src/parser/functions.rs delete mode 100644 src/parser/modules.rs create mode 100644 src/parser/parser.rs delete mode 100644 src/parser/patterns.rs delete mode 100644 src/parser/prelude.rs delete mode 100644 src/parser/state.rs delete mode 100644 src/parser/statements.rs create mode 100644 src/parser/stream.rs diff --git a/examples/tricky/index.js b/examples/tricky/index.js index bbb9a24..98329ac 100644 --- a/examples/tricky/index.js +++ b/examples/tricky/index.js @@ -24,7 +24,7 @@ //{ x: y => 3 }; -for (let i=0; i, - pub comments: Vec, + pub source_type: SourceType, } #[derive(Debug, Clone, PartialEq)] @@ -11,177 +20,260 @@ pub enum SourceType { Module, } -#[derive(Debug, Clone)] +/// Represents a JavaScript statement +#[derive(Debug, Clone, PartialEq)] pub enum Statement { - Empty, - Block(Vec), - Expression(Expression), - If { - test: Expression, - consequent: Box, - alternate: Option>, - }, - Loop(LoopStatement), + ExpressionStatement(ExpressionStatement), + BlockStatement(BlockStatement), + EmptyStatement, + DebuggerStatement, + WithStatement(WithStatement), + ReturnStatement(ReturnStatement), + LabeledStatement(LabeledStatement), + BreakStatement(BreakStatement), + ContinueStatement(ContinueStatement), + IfStatement(IfStatement), + SwitchStatement(SwitchStatement), + ThrowStatement(ThrowStatement), + TryStatement(TryStatement), + WhileStatement(WhileStatement), + DoWhileStatement(DoWhileStatement), + ForStatement(ForStatement), + ForInStatement(ForInStatement), + ForOfStatement(ForOfStatement), Declaration(Declaration), - Return(Option), - Labeled { - label: Box, - body: Box, - }, - Break(Option>), - Continue(Option>), - Try { - block: Box, - handler: Option, - finalizer: Option>, - }, - Throw(Expression), - Switch { - discriminant: Expression, - cases: Vec, - }, - Import { - specifiers: Vec, - source: Box, - assertions: Vec, - }, - Export(ExportDeclaration), - With { - object: Expression, - body: Box, - }, - Debugger, -} - -#[derive(Debug, Clone)] -pub enum LoopStatement { - While { - test: Expression, - body: Box, - }, - DoWhile { - body: Box, - test: Expression, - }, - For { - init: Option, - test: Option, - update: Option, - body: Box, - }, - ForIn { - left: ForInit, - right: Expression, - body: Box, - }, - ForOf { - left: ForInit, - right: Expression, - body: Box, - is_await: bool, - }, -} - -#[derive(Debug, Clone)] +} + +/// Represents a JavaScript expression +#[derive(Debug, Clone, PartialEq)] +pub enum Expression { + Identifier(Identifier), + Literal(Literal), + ThisExpression(ThisExpression), + ArrayExpression(ArrayExpression), + ObjectExpression(ObjectExpression), + FunctionExpression(FunctionExpression), + ArrowFunctionExpression(ArrowFunctionExpression), + ClassExpression(ClassExpression), + TaggedTemplateExpression(TaggedTemplateExpression), + MemberExpression(MemberExpression), + SuperExpression(SuperExpression), + MetaProperty(MetaProperty), + NewExpression(NewExpression), + CallExpression(CallExpression), + UpdateExpression(UpdateExpression), + AwaitExpression(AwaitExpression), + UnaryExpression(UnaryExpression), + BinaryExpression(BinaryExpression), + LogicalExpression(LogicalExpression), + ConditionalExpression(ConditionalExpression), + YieldExpression(YieldExpression), + AssignmentExpression(AssignmentExpression), + SequenceExpression(SequenceExpression), +} + +/// Represents a JavaScript declaration +#[derive(Debug, Clone, PartialEq)] pub enum Declaration { - Variable(VariableDeclaration), - Function(FunctionDeclaration), - Class(ClassDeclaration), + VariableDeclaration(VariableDeclaration), + FunctionDeclaration(FunctionDeclaration), + ClassDeclaration(ClassDeclaration), + ImportDeclaration(ImportDeclaration), + ExportNamedDeclaration(ExportNamedDeclaration), + ExportDefaultDeclaration(ExportDefaultDeclaration), + ExportAllDeclaration(ExportAllDeclaration), } -#[derive(Debug, Clone)] -pub struct VariableDeclaration { - pub declarations: Vec, - pub kind: VariableKind, +/// Represents a JavaScript pattern (destructuring) +#[derive(Debug, Clone, PartialEq)] +pub enum Pattern { + Identifier(Identifier), + ObjectPattern(ObjectPattern), + ArrayPattern(ArrayPattern), + RestElement(RestElement), + AssignmentPattern(AssignmentPattern), + MemberExpression(MemberExpression), } -#[derive(Debug, Clone)] -pub struct FunctionDeclaration { - pub id: Box, - pub params: Vec, - pub body: Vec, - pub is_async: bool, - pub is_generator: bool, +/// Represents an identifier +#[derive(Debug, Clone, PartialEq)] +pub struct Identifier { + + pub name: Box, } -#[derive(Debug, Clone)] -pub struct ClassDeclaration { - pub id: Box, - pub super_class: Option, - pub body: Vec, -} - -#[derive(Debug, Clone)] -pub enum ClassMember { - Constructor { - params: Vec, - body: Vec, - }, - Method { - key: PropertyKey, - value: MethodDefinition, - kind: MethodKind, - is_static: bool, - }, - Property { - key: PropertyKey, - value: Option, - is_static: bool, - }, - StaticBlock { - body: Vec, - }, -} - -#[derive(Debug, Clone)] -pub struct MethodDefinition { - pub params: Vec, +#[derive(Debug, Clone, PartialEq)] +pub struct PrivateIdentifier { + pub name: Box, +} + +/// Represents a literal value +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + StringLiteral(StringLiteral), + BooleanLiteral(BooleanLiteral), + NullLiteral(NullLiteral), + NumericLiteral(NumericLiteral), + BigIntLiteral(BigIntLiteral), + RegExpLiteral(RegExpLiteral), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StringLiteral { + pub value: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BooleanLiteral { + pub value: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NullLiteral { + +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NumericLiteral { + pub value: f64, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BigIntLiteral { + pub value: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct RegExpLiteral { + pub pattern: Box, + pub flags: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BlockStatement { pub body: Vec, - pub is_async: bool, - pub is_generator: bool, } -#[derive(Debug, Clone)] -pub enum PropertyKey { - Identifier(Box), - StringLiteral(Box), - NumericLiteral(f64), - Computed(Expression), - PrivateIdentifier(Box), +#[derive(Debug, Clone, PartialEq)] +pub struct ExpressionStatement { + pub expression: Box, } #[derive(Debug, Clone, PartialEq)] -pub enum MethodKind { - Method, - Getter, - Setter, +pub struct WithStatement { + pub object: Box, + pub body: Box, } -#[derive(Debug, Clone)] -pub enum ExportDeclaration { - Named { - declaration: Option>, - specifiers: Vec, - source: Option>, - }, - Default(Box), - All { - source: Box, - exported: Option>, - }, +#[derive(Debug, Clone, PartialEq)] +pub struct ReturnStatement { + pub argument: Option>, } -#[derive(Debug, Clone)] -pub enum ExportDefaultDeclaration { - Expression(Expression), - Function(FunctionDeclaration), - Class(ClassDeclaration), +#[derive(Debug, Clone, PartialEq)] +pub struct LabeledStatement { + pub label: Identifier, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BreakStatement { + pub label: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ContinueStatement { + pub label: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct IfStatement { + pub test: Box, + pub consequent: Box, + pub alternate: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SwitchStatement { + pub discriminant: Box, + pub cases: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SwitchCase { + pub test: Option>, + pub consequent: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ThrowStatement { + pub argument: Box, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] +pub struct TryStatement { + pub block: BlockStatement, + pub handler: Option, + pub finalizer: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CatchClause { + pub param: Option, + pub body: BlockStatement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct WhileStatement { + pub test: Box, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct DoWhileStatement { + pub body: Box, + pub test: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForStatement { + pub init: Option, + pub test: Option>, + pub update: Option>, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] pub enum ForInit { - Declaration(VariableDeclaration), - Pattern(Expression), + VariableDeclaration(VariableDeclaration), + Expression(Box), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForInStatement { + pub left: ForInOf, + pub right: Box, + pub body: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ForOfStatement { + pub left: ForInOf, + pub right: Box, + pub body: Box, + pub await_token: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ForInOf { + VariableDeclaration(VariableDeclaration), + Pattern(Pattern), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VariableDeclaration { + pub declarations: Vec, + pub kind: VariableKind, } #[derive(Debug, Clone, PartialEq)] @@ -191,228 +283,223 @@ pub enum VariableKind { Const, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct VariableDeclarator { - pub id: Expression, // TODO maybe tighter - pub init: Option, + pub id: Pattern, + pub init: Option>, } -#[derive(Debug, Clone)] -pub struct CatchClause { - pub param: Option, - pub body: Box, +#[derive(Debug, Clone, PartialEq)] +pub struct ThisExpression { + } -#[derive(Debug, Clone)] -pub struct SwitchCase { - pub test: Option, - pub consequent: Vec, +#[derive(Debug, Clone, PartialEq)] +pub struct ArrayExpression { + pub elements: Vec>, } -#[derive(Debug, Clone)] -pub enum ImportSpecifier { - Named { - imported: Box, - local: Box, - }, - Default(Box), - Namespace(Box), +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectExpression { + pub properties: Vec, } -#[derive(Debug, Clone)] -pub struct ImportAssertion { - pub key: Box, - pub value: Box, +#[derive(Debug, Clone, PartialEq)] +pub struct Property { + pub key: PropertyKey, + pub value: Box, + pub kind: PropertyKind, + pub method: bool, + pub shorthand: bool, + pub computed: bool, } -#[derive(Debug, Clone)] -pub struct ExportSpecifier { - pub local: Box, - pub exported: Box, +#[derive(Debug, Clone, PartialEq)] +pub enum PropertyKey { + Identifier(Identifier), + PrivateIdentifier(PrivateIdentifier), + Literal(Literal), + Expression(Box), } -#[derive(Debug, Clone)] -pub enum Expression { - Identifier(Box), - This, - Super, - Literal(Literal), - Array(Vec), - Object(Vec), - Function { - id: Option>, - params: Vec, - body: Vec, - is_async: bool, - is_generator: bool, - }, - ArrowFunction { - params: Vec, - body: ArrowFunctionBody, - is_async: bool, - }, - Class { - id: Option>, - super_class: Option>, - body: Vec, - }, - Unary { - operator: UnaryOperator, - argument: Box, - prefix: bool, - }, - Binary { - operator: BinaryOperator, - left: Box, - right: Box, - }, - Logical { - operator: LogicalOperator, - left: Box, - right: Box, - }, - Assignment { - operator: AssignmentOperator, - left: Box, - right: Box, - }, - Member { - object: Box, - property: Box, - computed: bool, - optional: bool, - }, - Call { - callee: Box, - arguments: Vec, - optional: bool, - }, - New(Box), - Conditional { - test: Box, - consequent: Box, - alternate: Box, - }, - TemplateLiteral { - quasis: Vec>, - expressions: Vec, - }, - TaggedTemplate { - tag: Box, - quasi: Box, - }, - Sequence(Vec), - Spread(Box), - Yield { - argument: Option>, - delegate: bool, - }, - Await(Box), - OptionalChain { - base: Box, - chain: Vec, - }, - Import(Box), - MetaProperty { - meta: Box, - property: Box, - }, - PrivateName(Box), - ChainExpression(Box), -} - -#[derive(Debug, Clone)] -pub enum ArrayElement { - Expression(Expression), - Spread(Expression), - Hole, -} - -#[derive(Debug, Clone)] -pub enum Argument { - Expression(Expression), - Spread(Expression), -} - -#[derive(Debug, Clone)] -pub enum OptionalChainElement { - Property { - name: Box, - computed: bool, - }, - Call { - arguments: Vec, - }, -} - -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] +pub enum PropertyKind { + Init, + Get, + Set, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionExpression { + pub id: Option, + pub params: Vec, + pub body: BlockStatement, + pub generator: bool, + pub async_function: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ArrowFunctionExpression { + pub params: Vec, + pub body: ArrowFunctionBody, + pub expression: bool, + pub async_function: bool, +} + +#[derive(Debug, Clone, PartialEq)] pub enum ArrowFunctionBody { - Block(Vec), + BlockStatement(BlockStatement), Expression(Box), } -#[derive(Debug, Clone)] -pub enum ObjectProperty { - Property { - key: PropertyKey, - value: Expression, - kind: PropertyKind, - computed: bool, - shorthand: bool, - }, - Method { - key: PropertyKey, - value: MethodDefinition, - kind: MethodKind, - computed: bool, - }, - Spread(Expression), +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionDeclaration { + pub id: Option, + pub params: Vec, + pub body: BlockStatement, + pub generator: bool, + pub async_function: bool, } #[derive(Debug, Clone, PartialEq)] -pub enum PropertyKind { - Init, +pub struct ClassDeclaration { + pub id: Option, + pub super_class: Option>, + pub body: ClassBody, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ClassExpression { + pub id: Option, + pub super_class: Option>, + pub body: ClassBody, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ClassBody { + pub body: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ClassElement { + MethodDefinition(MethodDefinition), + StaticBlock(StaticBlock), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StaticBlock { + pub body: BlockStatement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MethodDefinition { + pub key: PropertyKey, + pub value: FunctionExpression, + pub kind: MethodKind, + pub computed: bool, + pub static_method: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum MethodKind { + Constructor, + Method, Get, Set, } -#[derive(Debug, Clone)] -pub enum Literal { - Number(f64), - String(Box), - Boolean(bool), - Null, - Undefined, - RegExp { - pattern: Box, - flags: Box, - }, - BigInt(Box), -} - -#[derive(Clone)] -pub struct Comment { - pub text: Box, - pub is_block: bool, - pub span: (u32, u32), +#[derive(Debug, Clone, PartialEq)] +pub struct TaggedTemplateExpression { + pub tag: Box, + pub quasi: TemplateLiteral, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateLiteral { + pub quasis: Vec, + pub expressions: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateElement { + pub value: TemplateElementValue, + pub tail: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateElementValue { + pub raw: Box, + pub cooked: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MemberExpression { + pub object: Box, + pub property: MemberProperty, + pub computed: bool, + pub optional: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum MemberProperty { + Identifier(Identifier), + Expression(Box), + PrivateIdentifier(PrivateIdentifier), // TODO implement +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SuperExpression { + +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MetaProperty { + pub meta: Identifier, + pub property: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NewExpression { + pub callee: Box, + pub arguments: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CallExpression { + pub callee: Box, + pub arguments: Vec, + pub optional: bool, } -impl std::fmt::Debug for Comment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Comment") - .field("text", &self.text) - .field("is_block", &self.is_block) - .field("span", &self.span) - .finish() - } +#[derive(Debug, Clone, PartialEq)] +pub struct UpdateExpression { + pub operator: UpdateOperator, + pub argument: Box, + pub prefix: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum UpdateOperator { + Increment, + Decrement, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AwaitExpression { + pub argument: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct UnaryExpression { + pub operator: UnaryOperator, + pub argument: Box, + pub prefix: bool, } #[derive(Debug, Clone, PartialEq)] pub enum UnaryOperator { Minus, Plus, - Increment, - Decrement, Not, BitwiseNot, Typeof, @@ -420,33 +507,46 @@ pub enum UnaryOperator { Delete, } +#[derive(Debug, Clone, PartialEq)] +pub struct BinaryExpression { + pub operator: BinaryOperator, + pub left: Box, + pub right: Box, +} + #[derive(Debug, Clone, PartialEq)] pub enum BinaryOperator { - Add, - Subtract, - Multiply, - Divide, - Modulo, - Exponent, Equal, - StrictEqual, NotEqual, + StrictEqual, StrictNotEqual, LessThan, - LessThanEqual, + LessThanOrEqual, GreaterThan, - GreaterThanEqual, - BitwiseAnd, - BitwiseOr, - BitwiseXor, + GreaterThanOrEqual, LeftShift, RightShift, UnsignedRightShift, + Addition, + Subtraction, + Multiplication, + Division, + Remainder, + Exponentiation, + BitwiseOr, + BitwiseXor, + BitwiseAnd, In, - Of, InstanceOf, } +#[derive(Debug, Clone, PartialEq)] +pub struct LogicalExpression { + pub operator: LogicalOperator, + pub left: Box, + pub right: Box, +} + #[derive(Debug, Clone, PartialEq)] pub enum LogicalOperator { And, @@ -454,22 +554,155 @@ pub enum LogicalOperator { NullishCoalescing, } +#[derive(Debug, Clone, PartialEq)] +pub struct ConditionalExpression { + pub test: Box, + pub consequent: Box, + pub alternate: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct YieldExpression { + + pub argument: Option>, + pub delegate: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AssignmentExpression { + pub operator: AssignmentOperator, + pub left: AssignmentLeft, + pub right: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum AssignmentLeft { + Pattern(Pattern), + Expression(Box), +} + #[derive(Debug, Clone, PartialEq)] pub enum AssignmentOperator { Assign, - AddAssign, - SubtractAssign, + PlusAssign, + MinusAssign, MultiplyAssign, DivideAssign, - ModuloAssign, - ExponentAssign, - BitwiseAndAssign, - BitwiseOrAssign, - BitwiseXorAssign, + RemainderAssign, + ExponentiationAssign, LeftShiftAssign, RightShiftAssign, UnsignedRightShiftAssign, - LogicalAndAssign, + BitwiseOrAssign, + BitwiseXorAssign, + BitwiseAndAssign, LogicalOrAssign, - NullishAssign, + LogicalAndAssign, + NullishCoalescingAssign, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SequenceExpression { + pub expressions: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectPattern { + pub properties: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ObjectPatternProperty { + Property(ObjectProperty), + RestElement(RestElement), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ObjectProperty { + pub key: PropertyKey, + pub value: Pattern, + pub computed: bool, + pub shorthand: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ArrayPattern { + pub elements: Vec>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct RestElement { + pub argument: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct AssignmentPattern { + pub left: Box, + pub right: Box, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportDeclaration { + pub specifiers: Vec, + pub source: StringLiteral, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ImportSpecifier { + ImportSpecifier(NamedImportSpecifier), + ImportDefaultSpecifier(ImportDefaultSpecifier), + ImportNamespaceSpecifier(ImportNamespaceSpecifier), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NamedImportSpecifier { + pub imported: Identifier, + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportDefaultSpecifier { + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ImportNamespaceSpecifier { + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportNamedDeclaration { + pub declaration: Option>, + pub specifiers: Vec, + pub source: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportSpecifier { + pub exported: Identifier, + pub local: Identifier, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportDefaultDeclaration { + pub declaration: ExportDefaultDeclarationKind, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ExportDefaultDeclarationKind { + Declaration(Box), + Expression(Box), +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ExportAllDeclaration { + pub source: StringLiteral, + pub exported: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Comment { + pub text: String, + pub multiline: bool, + pub location: SourceLocation, } diff --git a/src/grammar/array.rs b/src/grammar/array.rs new file mode 100644 index 0000000..2134358 --- /dev/null +++ b/src/grammar/array.rs @@ -0,0 +1,45 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; + +pub struct ArrayExpressionParser; + +impl ArrayExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrayExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBracket, "Expected '[' at the start of array expression")?; + + let mut elements = Vec::new(); + + while !parser.check(&Token::RightBracket) && !parser.is_at_end() { + if parser.consume(&Token::Comma) { + // Handle elision (hole in the array) + elements.push(None); + } else { + // Parse regular element + let element = ExpressionParser::new().parse(parser)?; + elements.push(Some(element)); + + // If there's no comma, we should be at the end + if !parser.consume(&Token::Comma) { + break; + } + + // If we see a right bracket after a comma, it's a trailing comma + if parser.check(&Token::RightBracket) { + break; + } + } + } + + parser.assert_consume(&Token::RightBracket, "Expected ']' at the end of array expression")?; + + Ok(ArrayExpression { elements }) + } +} diff --git a/src/grammar/call.rs b/src/grammar/call.rs new file mode 100644 index 0000000..be05177 --- /dev/null +++ b/src/grammar/call.rs @@ -0,0 +1,49 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; + +pub struct CallExpressionParser; + +impl CallExpressionParser { + pub fn new() -> Self { + Self + } + + pub fn parse_with_callee(&self, parser: &mut Parser, callee: Expression) -> ParseResult { + + let optional = parser.consume(&Token::QuestionDot); + + if optional && !parser.check(&Token::LeftParen) { + return Err(parser.error_at_current("Expected '(' after optional chaining operator in function call")); + } + + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut arguments = Vec::new(); + + if !parser.check(&Token::RightParen) { + arguments.push(ExpressionParser::new().parse(parser)?); + + while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { + arguments.push(ExpressionParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function arguments")?; + + Ok(CallExpression { + callee: Box::new(callee), + arguments, + optional, + }) + } + +} + +impl ParserCombinator for CallExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let callee = ExpressionParser::new().parse(parser)?; + self.parse_with_callee(parser, callee) + } +} diff --git a/src/grammar/class.rs b/src/grammar/class.rs new file mode 100644 index 0000000..bfc233d --- /dev/null +++ b/src/grammar/class.rs @@ -0,0 +1,223 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; +use super::pattern::*; +use super::statement::*; +use super::function::*; +use super::literal::*; + +pub struct ClassDeclarationParser; + +impl ClassDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Class, "Expected 'class'")?; + + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + let super_class = if parser.consume(&Token::Extends) { + Some(Box::new(ExpressionParser::new().parse(parser)?)) + } else { + None + }; + + let body = ClassBodyParser::new().parse(parser)?; + + Ok(ClassDeclaration { + id, + super_class, + body, + }) + } +} + +pub struct ClassExpressionParser; + +impl ClassExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Class, "Expected 'class'")?; + + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + let super_class = if parser.consume(&Token::Extends) { + Some(Box::new(ExpressionParser::new().parse(parser)?)) + } else { + None + }; + + let body = ClassBodyParser::new().parse(parser)?; + + Ok(ClassExpression { + id, + super_class, + body, + }) + } +} + +pub struct SuperExpressionParser; + +impl SuperExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for SuperExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Super, "Expected 'super'")?; + + Ok(SuperExpression {}) + } +} + +pub struct ClassBodyParser; + +impl ClassBodyParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ClassBodyParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBrace, "Expected '{' after class declaration")?; + + let mut body = Vec::new(); + + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Check for static block + if parser.consume(&Token::Static) && parser.check(&Token::LeftBrace) { + let static_block = StaticBlockParser::new().parse(parser)?; + body.push(ClassElement::StaticBlock(static_block)); + continue; + } + + // Parse method definition + let method = MethodDefinitionParser::new().parse(parser)?; + body.push(ClassElement::MethodDefinition(method)); + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after class body")?; + + Ok(ClassBody { body }) + } +} + +pub struct MethodDefinitionParser; + +impl MethodDefinitionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for MethodDefinitionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for static modifier + let static_method = parser.consume(&Token::Static); + + // Check for method kind + let mut kind = MethodKind::Method; + if parser.consume(&Token::Get) { + kind = MethodKind::Get; + } else if parser.consume(&Token::Set) { + kind = MethodKind::Set; + } else if parser.consume(&Token::Constructor) { + kind = MethodKind::Constructor; + } + + // Check for async modifier + let async_method = parser.consume(&Token::Async); + + // Check for generator modifier + let generator = parser.consume(&Token::Star); + + // Parse the key + let (key, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property name + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property name")?; + (PropertyKey::Expression(Box::new(expr)), true) + } else if parser.check(&Token::Hash) { + // Private field or method + parser.advance(); // Consume the '#' + if let Token::Identifier(name) = parser.peek() { + // Clone the name before advancing the parser + let name_clone = name.clone(); + + // Now advance the parser + parser.advance(); + + (PropertyKey::PrivateIdentifier(PrivateIdentifier { name: name_clone.into() }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '#'")); + } + } else { + // Regular identifier or literal + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralParser::new().parse(parser)?; + (PropertyKey::Literal(literal), false) + }, + _ => { + let ident = IdentifierParser::new().parse(parser)?; + (PropertyKey::Identifier(ident), false) + } + } + }; + + // Parse the function body + parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; + + // Create a function expression for the method + let mut func_expr = FunctionExpressionParser::new().parse(parser)?; + func_expr.generator = generator; + func_expr.async_function = async_method; + + Ok(MethodDefinition { + key, + value: func_expr, + kind, + computed, + static_method, + }) + } +} + +pub struct StaticBlockParser; + +impl StaticBlockParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for StaticBlockParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let block = BlockStatementParser::new().parse(parser)?; + + Ok(StaticBlock { body: block }) + } +} diff --git a/src/grammar/declaration.rs b/src/grammar/declaration.rs new file mode 100644 index 0000000..bc09aac --- /dev/null +++ b/src/grammar/declaration.rs @@ -0,0 +1,497 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::literal::*; +use super::pattern::*; +use super::expression::*; +use super::statement::*; +use super::class::*; + +pub struct VariableDeclarationParser; + +impl VariableDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for VariableDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Parse variable kind + let kind = if parser.consume(&Token::Var) { + VariableKind::Var + } else if parser.consume(&Token::Let) { + VariableKind::Let + } else if parser.consume(&Token::Const) { + VariableKind::Const + } else { + return Err(parser.error_at_current("Expected variable declaration")); + }; + + let mut declarations = Vec::new(); + + // For let/const, track declared identifiers to enforce TDZ + let mut declared_identifiers = Vec::new(); + + // Parse first declarator + let declarator = VariableDeclaratorParser::new().parse(parser)?; + + // For let/const, collect identifiers for TDZ enforcement + if matches!(kind, VariableKind::Let | VariableKind::Const) { + collect_binding_identifiers(&declarator.id, &mut declared_identifiers); + } + + declarations.push(declarator); + + // Parse additional declarators + while parser.consume(&Token::Comma) { + let declarator = VariableDeclaratorParser::new().parse(parser)?; + + // For let/const, collect identifiers and check TDZ + if matches!(kind, VariableKind::Let | VariableKind::Const) { + // If there's an initializer, check that it doesn't reference any of the declared identifiers + if let Some(ref init) = declarator.init { + check_tdz_violation(init, &declared_identifiers, parser)?; + } + + // Add new identifiers to the list + collect_binding_identifiers(&declarator.id, &mut declared_identifiers); + } + + declarations.push(declarator); + } + + Ok(VariableDeclaration { + declarations, + kind, + }) + } +} + +/// Parser for variable declarators +pub struct VariableDeclaratorParser; + +impl VariableDeclaratorParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for VariableDeclaratorParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Parse the identifier or pattern + let id = PatternParser::new().parse(parser)?; + + // Parse the initializer if present + let init = if parser.consume(&Token::Equal) { + Some(Box::new(ExpressionParser::new().parse(parser)?)) + } else { + None + }; + + Ok(VariableDeclarator { + id, + init, + }) + } +} + +/// Parser for function declarations +pub struct FunctionDeclarationParser; + +impl FunctionDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for FunctionDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'function' keyword + parser.assert_consume(&Token::Function, "Expected 'function'")?; + + // Check if this is a generator function + let generator = parser.consume(&Token::Star); + + // Parse the function name + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + // Parse the parameter list + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut params = Vec::new(); + + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternParser::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + params.push(PatternParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: false, allow_yield: generator }, |p| { + BlockStatementParser::new().parse(p) + })?; + + Ok(FunctionDeclaration { + id, + params, + body, + generator, + async_function: false, + }) + } +} + +/// Enum for export declarations +pub enum ExportDeclaration { + Named(ExportNamedDeclaration), + Default(ExportDefaultDeclaration), + All(ExportAllDeclaration), +} + +/// Parser for export declarations +pub struct ExportDeclarationParser; + +impl ExportDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ExportDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' keyword + parser.assert_consume(&Token::Export, "Expected 'export'")?; + + // Check for export type + if parser.consume(&Token::Default) { + // Export default declaration + let declaration = if parser.check(&Token::Function) || parser.check(&Token::Class) { + // Function or class declaration + if parser.check(&Token::Function) { + let func = FunctionDeclarationParser::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::FunctionDeclaration(func))) + } else { + let class = ClassDeclarationParser::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::ClassDeclaration(class))) + } + } else { + // Expression + let expr = ExpressionParser::new().parse(parser)?; + parser.consume_semicolon("Expected ';' after export default expression")?; + ExportDefaultDeclarationKind::Expression(Box::new(expr)) + }; + + Ok(ExportDeclaration::Default(ExportDefaultDeclaration { declaration })) + } else if parser.consume(&Token::Star) { + // Export all declaration + let exported = if parser.consume(&Token::As) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + parser.assert_consume(&Token::From, "Expected 'from' after export *")?; + + let source = match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralParser::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => str_lit, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + }; + + parser.consume_semicolon("Expected ';' after export * from declaration")?; + + Ok(ExportDeclaration::All(ExportAllDeclaration { + source, + exported, + })) + } else { + // Named export declaration + let declaration = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let var_decl = VariableDeclarationParser::new().parse(parser)?; + Some(Box::new(Declaration::VariableDeclaration(var_decl))) + } else if parser.check(&Token::Function) { + // Function declaration + let func_decl = FunctionDeclarationParser::new().parse(parser)?; + Some(Box::new(Declaration::FunctionDeclaration(func_decl))) + } else if parser.check(&Token::Class) { + // Class declaration + let class_decl = ClassDeclarationParser::new().parse(parser)?; + Some(Box::new(Declaration::ClassDeclaration(class_decl))) + } else { + None + }; + + // If there's no declaration, there must be export specifiers + let mut specifiers = Vec::new(); + + if declaration.is_none() { + // Parse export specifiers + parser.assert_consume(&Token::LeftBrace, "Expected '{' in named export declaration")?; + + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_export_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_export_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after export specifiers")?; + } + + // Parse the 'from' clause if present + let source = if parser.consume(&Token::From) { + match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralParser::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => Some(str_lit), + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + } else { + None + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after export declaration")?; + + Ok(ExportDeclaration::Named(ExportNamedDeclaration { + declaration, + specifiers, + source, + })) + } + } +} + +impl ExportDeclarationParser { + fn parse_export_specifier(&self, parser: &mut Parser) -> ParseResult { + // Parse the local name + let local = IdentifierParser::new().parse(parser)?; + + // Parse the exported name if present + let exported = if parser.consume(&Token::As) { + IdentifierParser::new().parse(parser)? + } else { + // If no 'as', the exported name is the same as the local name + Identifier { name: local.name.clone() } + }; + + Ok(ExportSpecifier { + local, + exported, + }) + } +} + +/// Parser for import declarations +pub struct ImportDeclarationParser; + +impl ImportDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ImportDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'import' keyword + parser.assert_consume(&Token::Import, "Expected 'import'")?; + + let mut specifiers = Vec::new(); + + // Check for import type + if matches!(parser.peek(), Token::StringLiteral(_)) { + // Import without specifiers (side-effect import) + // No specifiers to add + } else if parser.consume(&Token::Star) { + // Namespace import + parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; + let local = IdentifierParser::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { + local, + })); + + parser.assert_consume(&Token::From, "Expected 'from' after namespace import")?; + } else if parser.check(&Token::LeftBrace) { + // Named imports + parser.assert_consume(&Token::LeftBrace, "Expected '{' in named import")?; + + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_import_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_import_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after import specifiers")?; + + parser.assert_consume(&Token::From, "Expected 'from' after named imports")?; + } else if matches!(parser.peek(), Token::Identifier(_)) { + // Default import + let local = IdentifierParser::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportDefaultSpecifier(ImportDefaultSpecifier { + local, + })); + + // Check for additional named imports + if parser.consume(&Token::Comma) { + if parser.consume(&Token::Star) { + // Namespace import after default import + parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; + let local = IdentifierParser::new().parse(parser)?; + + specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { + local, + })); + } else if parser.consume(&Token::LeftBrace) { + // Named imports after default import + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_import_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_import_specifier(parser)?); + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' after import specifiers")?; + } + } + + parser.assert_consume(&Token::From, "Expected 'from' after import specifiers")?; + } + + // Parse the source + let source = match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralParser::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => str_lit, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + } + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after import declaration")?; + + Ok(ImportDeclaration { + specifiers, + source, + }) + } +} + +impl ImportDeclarationParser { + fn parse_import_specifier(&self, parser: &mut Parser) -> ParseResult { + // Parse the imported name + let imported = IdentifierParser::new().parse(parser)?; + + // Parse the local name if present + let local = if parser.consume(&Token::As) { + IdentifierParser::new().parse(parser)? + } else { + // If no 'as', the local name is the same as the imported name + Identifier { name: imported.name.clone() } + }; + + Ok(ImportSpecifier::ImportSpecifier(NamedImportSpecifier { + imported, + local, + })) + } +} + + + + +// Helper function to collect identifiers from a binding pattern +fn collect_binding_identifiers(pattern: &Pattern, identifiers: &mut Vec>) { + match pattern { + Pattern::Identifier(ident) => { + identifiers.push(ident.name.clone()); + }, + Pattern::ObjectPattern(obj_pattern) => { + for prop in &obj_pattern.properties { + match prop { + ObjectPatternProperty::Property(prop) => { + collect_binding_identifiers(&prop.value, identifiers); + }, + ObjectPatternProperty::RestElement(rest) => { + collect_binding_identifiers(&rest.argument, identifiers); + }, + } + } + }, + Pattern::ArrayPattern(arr_pattern) => { + for elem in arr_pattern.elements.iter().flatten() { + collect_binding_identifiers(elem, identifiers); + } + }, + Pattern::RestElement(rest) => { + collect_binding_identifiers(&rest.argument, identifiers); + }, + Pattern::AssignmentPattern(assign) => { + collect_binding_identifiers(&assign.left, identifiers); + }, + _ => {} + } +} + +// Helper function to check for TDZ violations in initializers +fn check_tdz_violation(expr: &Expression, declared_identifiers: &[Box], parser: &mut Parser) -> ParseResult<()> { + match expr { + Expression::Identifier(ident) => { + if declared_identifiers.contains(&ident.name) { + return Err(parser.error_at_current(&format!( + "Cannot access '{}' before initialization (temporal dead zone violation)", + ident.name + ))); + } + }, + Expression::MemberExpression(member) => { + check_tdz_violation(&member.object, declared_identifiers, parser)?; + if let MemberProperty::Expression(ref expr) = member.property { + check_tdz_violation(expr, declared_identifiers, parser)?; + } + }, + Expression::CallExpression(call) => { + check_tdz_violation(&call.callee, declared_identifiers, parser)?; + for arg in &call.arguments { + check_tdz_violation(arg, declared_identifiers, parser)?; + } + }, + // Add checks for other expression types as needed + _ => {} + } + Ok(()) +} \ No newline at end of file diff --git a/src/grammar/expression.rs b/src/grammar/expression.rs new file mode 100644 index 0000000..15716db --- /dev/null +++ b/src/grammar/expression.rs @@ -0,0 +1,739 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::function::*; +use super::call::*; +use super::class::*; +use super::array::*; +use super::member::*; +use super::pattern::*; +use super::literal::*; +use super::object::*; +use super::this::*; +use super::new::*; + + +/// Parser for JavaScript expressions +pub struct ExpressionParser; + +impl ExpressionParser { + pub fn new() -> Self { + Self + } + + /// Parse an expression with the given precedence level + pub fn parse_with_precedence(&self, parser: &mut Parser, precedence: Precedence) -> ParseResult { + // Parse the prefix expression + let mut left = self.parse_prefix(parser)?; + + // Continue parsing infix expressions as long as they have higher precedence + while !parser.is_at_end() && precedence < self.get_precedence(parser) { + left = self.parse_infix(parser, left)?; + } + + Ok(left) + } + + /// Parse a prefix expression + fn parse_prefix(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::Identifier(_) => { + // Check if this is a single-parameter arrow function + let pos = parser.save_position(); + let ident = IdentifierParser::new().parse(parser)?; + + if parser.check(&Token::Arrow) { + // This is an arrow function with a single parameter + parser.restore_position(pos); + return ArrowFunctionExpressionParser::new().parse(parser).map(Expression::ArrowFunctionExpression); + } + + Ok(Expression::Identifier(ident)) + }, + Token::StringLiteral(_) | + Token::NumberLiteral(_) | + Token::BigIntLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::True | + Token::False | + Token::Null => { + LiteralParser::new().parse(parser).map(Expression::Literal) + }, + Token::This => { + ThisExpressionParser::new().parse(parser).map(Expression::ThisExpression) + }, + Token::LeftBracket => { + ArrayExpressionParser::new().parse(parser).map(Expression::ArrayExpression) + }, + Token::LeftBrace => { + ObjectExpressionParser::new().parse(parser).map(Expression::ObjectExpression) + }, + Token::Function => { + FunctionExpressionParser::new().parse(parser).map(Expression::FunctionExpression) + }, + Token::Class => { + ClassExpressionParser::new().parse(parser).map(Expression::ClassExpression) + }, + Token::New => { + NewExpressionParser::new().parse(parser).map(Expression::NewExpression) + }, + Token::Super => { + SuperExpressionParser::new().parse(parser).map(Expression::SuperExpression) + }, + Token::LeftParen => { + self.parse_grouped_expression(parser) + }, + Token::PlusPlus | + Token::MinusMinus => { + self.parse_prefix_update_expression(parser) + }, + Token::Plus | + Token::Minus | + Token::Bang | + Token::Tilde | + Token::Typeof | + Token::Void | + Token::Delete => { + self.parse_unary_expression(parser) + }, + Token::Await => { + if parser.allows_await() { + AwaitExpressionParser::new().parse(parser).map(Expression::AwaitExpression) + } else { + Err(parser.error_at_current("'await' expression is only allowed within async functions")) + } + }, + Token::Yield => { + if parser.allows_yield() { + YieldExpressionParser::new().parse(parser).map(Expression::YieldExpression) + } else { + Err(parser.error_at_current("'yield' expression is only allowed within generator functions")) + } + }, + Token::Async => { + // Look ahead to see if this is an async function or arrow function + if parser.peek_next(1) == &Token::Function || (parser.peek_next(1) == &Token::LeftParen && self.is_arrow_function_ahead(parser)) { + AsyncFunctionExpressionParser::new().parse(parser).map(Expression::FunctionExpression) + } else { + // Otherwise, it's just an identifier named "async" + IdentifierParser::new().parse(parser).map(Expression::Identifier) + } + }, + _ => Err(parser.error_at_current("Expected an expression")), + } + } + + /// Parse an infix expression + fn parse_infix(&self, parser: &mut Parser, left: Expression) -> ParseResult { + match parser.peek() { + Token::LeftParen => { + CallExpressionParser::new().parse_with_callee(parser, left).map(Expression::CallExpression) + }, + Token::Dot | + Token::LeftBracket => { + MemberExpressionParser::new().parse_with_object(parser, left).map(Expression::MemberExpression) + }, + Token::QuestionDot => { + // Optional chaining + parser.advance(); // Consume the '?.' + + // Check if this is an optional function call + if parser.check(&Token::LeftParen) { + // Optional function call: obj?.(args) + let call_expr = CallExpressionParser::new().parse_with_callee(parser, left)?; + Ok(Expression::CallExpression(call_expr)) + } else { + // Optional property access: obj?.prop + let member_expr = MemberExpressionParser::new().parse_with_object(parser, left)?; + Ok(Expression::MemberExpression(member_expr)) + } + }, + Token::PlusPlus | + Token::MinusMinus => { + self.parse_postfix_update_expression(parser, left) + }, + Token::Plus | + Token::Minus | + Token::Star | + Token::Slash | + Token::Percent | + Token::StarStar | + Token::LessLess | + Token::GreaterGreater | + Token::GreaterGreaterGreater | + Token::Ampersand | + Token::Pipe | + Token::Caret | + Token::EqualEqual | + Token::BangEqual | + Token::EqualEqualEqual | + Token::BangEqualEqual | + Token::Less | + Token::LessEqual | + Token::Greater | + Token::GreaterEqual | + Token::In | + Token::InstanceOf => { + self.parse_binary_expression(parser, left) + }, + Token::AmpersandAmpersand | + Token::PipePipe | + Token::QuestionQuestion => { + self.parse_logical_expression(parser, left) + }, + Token::Question => { + self.parse_conditional_expression(parser, left) + }, + Token::Equal | + Token::PlusEqual | + Token::MinusEqual | + Token::StarEqual | + Token::SlashEqual | + Token::PercentEqual | + Token::StarStarEqual | + Token::LessLessEqual | + Token::GreaterGreaterEqual | + Token::GreaterGreaterGreaterEqual | + Token::AmpersandEqual | + Token::PipeEqual | + Token::CaretEqual | + Token::AmpersandAmpersandEqual | + Token::PipePipeEqual | + Token::QuestionQuestionEqual => { + self.parse_assignment_expression(parser, left) + }, + Token::Comma => { + self.parse_sequence_expression(parser, left) + }, + Token::Arrow => { + // This should be handled by the arrow function parser + Err(parser.error_at_current("Unexpected arrow function")) + }, + _ => Ok(left), + } + } + + fn parse_grouped_expression(&self, parser: &mut Parser) -> ParseResult { + // Save position in case we need to backtrack for arrow functions + let pos = parser.save_position(); + + // Check if this might be an arrow function with parameters + if self.is_arrow_function_ahead(parser) { + parser.restore_position(pos); + return ArrowFunctionExpressionParser::new().parse(parser).map(Expression::ArrowFunctionExpression); + } + + // Consume the opening parenthesis + parser.assert_consume(&Token::LeftParen, "Expected '(' at the start of grouped expression")?; + + // Check for empty parentheses + if parser.consume(&Token::RightParen) { + return Err(parser.error_at_current("Empty parentheses are not a valid expression")); + } + + // Parse the expression inside the parentheses + let expr = self.parse_with_precedence(parser, Precedence::Lowest)?; + + // Check for trailing comma (which is not allowed in grouped expressions) + if parser.consume(&Token::Comma) { + return Err(parser.error_at_current("Unexpected trailing comma in grouped expression")); + } + + // Consume the closing parenthesis + parser.assert_consume(&Token::RightParen, "Expected ')' after expression")?; + + Ok(expr) + } + + // Helper method to check if an arrow function is ahead + fn is_arrow_function_ahead(&self, parser: &mut Parser) -> bool { + // Save position + let pos = parser.save_position(); + + // Skip the async keyword if present + if parser.check(&Token::Async) { + parser.advance(); + } + + // Check for single parameter without parentheses + if matches!(parser.peek(), Token::Identifier(_)) { + let id_pos = parser.save_position(); + parser.advance(); // Skip identifier + + if parser.check(&Token::Arrow) { + parser.restore_position(pos); + return true; + } + + parser.restore_position(id_pos); + } + + // Check for parameters in parentheses + if !parser.consume(&Token::LeftParen) { + parser.restore_position(pos); + return false; + } + + // Empty parameter list + if parser.consume(&Token::RightParen) { + let is_arrow = parser.check(&Token::Arrow); + parser.restore_position(pos); + return is_arrow; + } + + // Skip parameters and watch for trailing comma + let mut depth = 1; + let mut had_comma = false; + + while depth > 0 && !parser.is_at_end() { + match parser.peek() { + Token::LeftParen => { + depth += 1; + parser.advance(); + }, + Token::RightParen => { + depth -= 1; + if depth == 0 { + // Check if we just saw a comma before this right paren + if had_comma { + // This is a trailing comma in parameter list + parser.advance(); // Consume the right paren + let is_arrow = parser.check(&Token::Arrow); + parser.restore_position(pos); + return is_arrow; + } + } + parser.advance(); + }, + Token::Comma => { + had_comma = true; + parser.advance(); + }, + _ => { + had_comma = false; + parser.advance(); + } + } + } + + // Check if the next token is an arrow + let is_arrow = parser.check(&Token::Arrow); + + // Restore position + parser.restore_position(pos); + + is_arrow + } + + /// Get the precedence of the current token + fn get_precedence(&self, parser: &mut Parser) -> Precedence { + match parser.peek() { + Token::Comma => Precedence::Comma, + + Token::Equal | + Token::PlusEqual | + Token::MinusEqual | + Token::StarEqual | + Token::SlashEqual | + Token::PercentEqual | + Token::StarStarEqual | + Token::LessLessEqual | + Token::GreaterGreaterEqual | + Token::GreaterGreaterGreaterEqual | + Token::AmpersandEqual | + Token::PipeEqual | + Token::CaretEqual | + Token::AmpersandAmpersandEqual | + Token::PipePipeEqual | + Token::QuestionQuestionEqual => Precedence::Assignment, + + Token::Question => Precedence::Conditional, + + Token::QuestionQuestion => Precedence::NullishCoalescing, + Token::PipePipe => Precedence::LogicalOr, + Token::AmpersandAmpersand => Precedence::LogicalAnd, + + Token::Pipe => Precedence::BitwiseOr, + Token::Caret => Precedence::BitwiseXor, + Token::Ampersand => Precedence::BitwiseAnd, + + Token::EqualEqual | + Token::BangEqual | + Token::EqualEqualEqual | + Token::BangEqualEqual => Precedence::Equality, + + Token::Less | + Token::LessEqual | + Token::Greater | + Token::GreaterEqual | + Token::In | + Token::InstanceOf => Precedence::Relational, + + Token::LessLess | + Token::GreaterGreater | + Token::GreaterGreaterGreater => Precedence::Shift, + + Token::Plus | + Token::Minus => Precedence::Additive, + + Token::Star | + Token::Slash | + Token::Percent => Precedence::Multiplicative, + + Token::StarStar => Precedence::Exponentiation, + + Token::PlusPlus | + Token::MinusMinus => Precedence::Postfix, + + Token::Dot | + Token::QuestionDot | + Token::LeftBracket | + Token::LeftParen => Precedence::Call, + + _ => Precedence::Lowest, + } + } + + /// Parse a prefix update expression (++x, --x) + fn parse_prefix_update_expression(&self, parser: &mut Parser) -> ParseResult { + // Parse the operator + let operator = match parser.peek() { + Token::PlusPlus => { + parser.advance(); + UpdateOperator::Increment + }, + Token::MinusMinus => { + parser.advance(); + UpdateOperator::Decrement + }, + _ => return Err(parser.error_at_current("Expected '++' or '--'")), + }; + + // Parse the argument + let argument = self.parse_with_precedence(parser, Precedence::Prefix)?; + + // Check that the argument is a valid left-hand side expression + if !self.is_valid_lhs_expression(&argument) { + return Err(parser.error_at_current("Invalid left-hand side in prefix operation")); + } + + Ok(Expression::UpdateExpression(UpdateExpression { + operator, + argument: Box::new(argument), + prefix: true, + })) + } + + /// Parse a postfix update expression (x++, x--) + fn parse_postfix_update_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + // Check that the left expression is a valid left-hand side expression + if !self.is_valid_lhs_expression(&left) { + return Err(parser.error_at_current("Invalid left-hand side in postfix operation")); + } + + // Parse the operator + let operator = match parser.peek() { + Token::PlusPlus => { + parser.advance(); + UpdateOperator::Increment + }, + Token::MinusMinus => { + parser.advance(); + UpdateOperator::Decrement + }, + _ => return Err(parser.error_at_current("Expected '++' or '--'")), + }; + + Ok(Expression::UpdateExpression(UpdateExpression { + operator, + argument: Box::new(left), + prefix: false, + })) + } + + /// Parse a unary expression + fn parse_unary_expression(&self, parser: &mut Parser) -> ParseResult { + // Parse the operator + let operator = match parser.peek() { + Token::Plus => { + parser.advance(); + UnaryOperator::Plus + }, + Token::Minus => { + parser.advance(); + UnaryOperator::Minus + }, + Token::Bang => { + parser.advance(); + UnaryOperator::Not + }, + Token::Tilde => { + parser.advance(); + UnaryOperator::BitwiseNot + }, + Token::Typeof => { + parser.advance(); + UnaryOperator::Typeof + }, + Token::Void => { + parser.advance(); + UnaryOperator::Void + }, + Token::Delete => { + parser.advance(); + UnaryOperator::Delete + }, + _ => return Err(parser.error_at_current("Expected a unary operator")), + }; + + // Parse the argument + let argument = self.parse_with_precedence(parser, Precedence::Prefix)?; + + Ok(Expression::UnaryExpression(UnaryExpression { + operator, + argument: Box::new(argument), + prefix: true, + })) + } + + /// Parse a binary expression + fn parse_binary_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + let (operator, precedence) = self.get_binary_operator(parser)?; + + parser.advance(); + + let right = self.parse_with_precedence(parser, precedence.next())?; + + Ok(Expression::BinaryExpression(BinaryExpression { + operator, + left: Box::new(left), + right: Box::new(right), + })) + } + + /// Parse a logical expression + fn parse_logical_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + let (operator, precedence) = self.get_logical_operator(parser)?; + + parser.advance(); + + let right = self.parse_with_precedence(parser, precedence)?; + + Ok(Expression::LogicalExpression(LogicalExpression { + operator, + left: Box::new(left), + right: Box::new(right), + })) + } + + /// Parse a conditional expression (ternary) + fn parse_conditional_expression(&self, parser: &mut Parser, test: Expression) -> ParseResult { + // Consume the question mark + parser.assert_consume(&Token::Question, "Expected '?' in conditional expression")?; + + // Parse the consequent + let consequent = self.parse_with_precedence(parser, Precedence::Lowest)?; + + // Consume the colon + parser.assert_consume(&Token::Colon, "Expected ':' in conditional expression")?; + + // Parse the alternate + let alternate = self.parse_with_precedence(parser, Precedence::Assignment)?; + + Ok(Expression::ConditionalExpression(ConditionalExpression { + test: Box::new(test), + consequent: Box::new(consequent), + alternate: Box::new(alternate), + })) + } + + /// Parse an assignment expression + fn parse_assignment_expression(&self, parser: &mut Parser, left: Expression) -> ParseResult { + // Get the operator + let operator = self.get_assignment_operator(parser)?; + + // Consume the operator token + parser.advance(); + + // Parse the right side + let right = self.parse_with_precedence(parser, Precedence::Assignment)?; + + // Convert the left expression to a valid assignment target + let left = match self.to_assignment_target(left) { + Ok(target) => target, + Err(_) => return Err(parser.error_at_current("Invalid left-hand side in assignment")), + }; + + Ok(Expression::AssignmentExpression(AssignmentExpression { + operator, + left, + right: Box::new(right), + })) + } + + /// Parse a sequence expression (comma-separated expressions) + fn parse_sequence_expression(&self, parser: &mut Parser, first: Expression) -> ParseResult { + let mut expressions = vec![first]; + + // Consume the comma + parser.assert_consume(&Token::Comma, "Expected ',' in sequence expression")?; + + // Parse the next expression + let next = self.parse_with_precedence(parser, Precedence::Lowest)?; + expressions.push(next); + + // Parse any additional expressions + while parser.consume(&Token::Comma) { + let expr = self.parse_with_precedence(parser, Precedence::Lowest)?; + expressions.push(expr); + } + + Ok(Expression::SequenceExpression(SequenceExpression { expressions })) + } + + /// Helper method to get a binary operator and its precedence + fn get_binary_operator(&self, parser: &mut Parser) -> ParseResult<(BinaryOperator, Precedence)> { + match parser.peek() { + Token::Plus => Ok((BinaryOperator::Addition, Precedence::Additive)), + Token::Minus => Ok((BinaryOperator::Subtraction, Precedence::Additive)), + Token::Star => Ok((BinaryOperator::Multiplication, Precedence::Multiplicative)), + Token::Slash => Ok((BinaryOperator::Division, Precedence::Multiplicative)), + Token::Percent => Ok((BinaryOperator::Remainder, Precedence::Multiplicative)), + Token::StarStar => Ok((BinaryOperator::Exponentiation, Precedence::Exponentiation)), + Token::LessLess => Ok((BinaryOperator::LeftShift, Precedence::Shift)), + Token::GreaterGreater => Ok((BinaryOperator::RightShift, Precedence::Shift)), + Token::GreaterGreaterGreater => Ok((BinaryOperator::UnsignedRightShift, Precedence::Shift)), + Token::Ampersand => Ok((BinaryOperator::BitwiseAnd, Precedence::BitwiseAnd)), + Token::Pipe => Ok((BinaryOperator::BitwiseOr, Precedence::BitwiseOr)), + Token::Caret => Ok((BinaryOperator::BitwiseXor, Precedence::BitwiseXor)), + Token::EqualEqual => Ok((BinaryOperator::Equal, Precedence::Equality)), + Token::BangEqual => Ok((BinaryOperator::NotEqual, Precedence::Equality)), + Token::EqualEqualEqual => Ok((BinaryOperator::StrictEqual, Precedence::Equality)), + Token::BangEqualEqual => Ok((BinaryOperator::StrictNotEqual, Precedence::Equality)), + Token::Less => Ok((BinaryOperator::LessThan, Precedence::Relational)), + Token::LessEqual => Ok((BinaryOperator::LessThanOrEqual, Precedence::Relational)), + Token::Greater => Ok((BinaryOperator::GreaterThan, Precedence::Relational)), + Token::GreaterEqual => Ok((BinaryOperator::GreaterThanOrEqual, Precedence::Relational)), + Token::In => Ok((BinaryOperator::In, Precedence::Relational)), + Token::InstanceOf => Ok((BinaryOperator::InstanceOf, Precedence::Relational)), + _ => Err(parser.error_at_current("Expected a binary operator")), + } + } + + /// Helper method to get a logical operator and its precedence + fn get_logical_operator(&self, parser: &mut Parser) -> ParseResult<(LogicalOperator, Precedence)> { + match parser.peek() { + Token::AmpersandAmpersand => Ok((LogicalOperator::And, Precedence::LogicalAnd)), + Token::PipePipe => Ok((LogicalOperator::Or, Precedence::LogicalOr)), + Token::QuestionQuestion => Ok((LogicalOperator::NullishCoalescing, Precedence::NullishCoalescing)), + _ => Err(parser.error_at_current("Expected a logical operator")), + } + } + + /// Helper method to get an assignment operator + fn get_assignment_operator(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::Equal => Ok(AssignmentOperator::Assign), + Token::PlusEqual => Ok(AssignmentOperator::PlusAssign), + Token::MinusEqual => Ok(AssignmentOperator::MinusAssign), + Token::StarEqual => Ok(AssignmentOperator::MultiplyAssign), + Token::SlashEqual => Ok(AssignmentOperator::DivideAssign), + Token::PercentEqual => Ok(AssignmentOperator::RemainderAssign), + Token::StarStarEqual => Ok(AssignmentOperator::ExponentiationAssign), + Token::LessLessEqual => Ok(AssignmentOperator::LeftShiftAssign), + Token::GreaterGreaterEqual => Ok(AssignmentOperator::RightShiftAssign), + Token::GreaterGreaterGreaterEqual => Ok(AssignmentOperator::UnsignedRightShiftAssign), + Token::AmpersandEqual => Ok(AssignmentOperator::BitwiseAndAssign), + Token::PipeEqual => Ok(AssignmentOperator::BitwiseOrAssign), + Token::CaretEqual => Ok(AssignmentOperator::BitwiseXorAssign), + Token::AmpersandAmpersandEqual => Ok(AssignmentOperator::LogicalAndAssign), + Token::PipePipeEqual => Ok(AssignmentOperator::LogicalOrAssign), + Token::QuestionQuestionEqual => Ok(AssignmentOperator::NullishCoalescingAssign), + _ => Err(parser.error_at_current("Expected an assignment operator")), + } + } + + /// Helper method to convert an expression to an assignment target + fn to_assignment_target(&self, expr: Expression) -> Result { + match expr { + Expression::Identifier(_) => { + // Convert to pattern + Ok(AssignmentLeft::Pattern(Pattern::Identifier(match expr { + Expression::Identifier(ident) => ident, + _ => unreachable!(), + }))) + }, + Expression::MemberExpression(member) => { + // Member expressions are valid assignment targets + Ok(AssignmentLeft::Expression(Box::new(Expression::MemberExpression(member)))) + }, + _ => Err(()), + } + } + + /// Helper method to check if an expression is a valid left-hand side expression + fn is_valid_lhs_expression(&self, expr: &Expression) -> bool { + match expr { + Expression::Identifier(_) | + Expression::MemberExpression(_) => true, + _ => false, + } + } +} + +impl ParserCombinator for ExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + self.parse_with_precedence(parser, Precedence::Lowest) + } +} + +/// Precedence levels for expression parsing +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Precedence { + Lowest, + Comma, // , + Assignment, // = += -= etc. + Conditional, // ?: + NullishCoalescing, // ?? + LogicalOr, // || + LogicalAnd, // && + BitwiseOr, // | + BitwiseXor, // ^ + BitwiseAnd, // & + Equality, // == != === !== + Relational, // < > <= >= in instanceof + Shift, // << >> >>> + Additive, // + - + Multiplicative, // * / % + Exponentiation, // ** + Prefix, // ! ~ + - typeof void delete ++x --x + Postfix, // x++ x-- + Call, // . [] () +} + +impl Precedence { + /// Get the next higher precedence level + pub fn next(&self) -> Self { + match self { + Precedence::Lowest => Precedence::Comma, + Precedence::Comma => Precedence::Assignment, + Precedence::Assignment => Precedence::Conditional, + Precedence::Conditional => Precedence::NullishCoalescing, + Precedence::NullishCoalescing => Precedence::LogicalOr, + Precedence::LogicalOr => Precedence::LogicalAnd, + Precedence::LogicalAnd => Precedence::BitwiseOr, + Precedence::BitwiseOr => Precedence::BitwiseXor, + Precedence::BitwiseXor => Precedence::BitwiseAnd, + Precedence::BitwiseAnd => Precedence::Equality, + Precedence::Equality => Precedence::Relational, + Precedence::Relational => Precedence::Shift, + Precedence::Shift => Precedence::Additive, + Precedence::Additive => Precedence::Multiplicative, + Precedence::Multiplicative => Precedence::Exponentiation, + Precedence::Exponentiation => Precedence::Prefix, + Precedence::Prefix => Precedence::Postfix, + Precedence::Postfix => Precedence::Call, + Precedence::Call => Precedence::Call, // Can't go higher + } + } +} diff --git a/src/grammar/function.rs b/src/grammar/function.rs new file mode 100644 index 0000000..c4d9462 --- /dev/null +++ b/src/grammar/function.rs @@ -0,0 +1,242 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::pattern::*; +use super::expression::*; +use super::statement::*; + +/// Parser for function expressions +pub struct FunctionExpressionParser; + +impl FunctionExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for FunctionExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'function' keyword + parser.assert_consume(&Token::Function, "Expected 'function'")?; + + // Check if this is a generator function + let generator = parser.consume(&Token::Star); + + // Parse the function name if present + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + // Parse the parameter list + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut params = Vec::new(); + + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternParser::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + params.push(PatternParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: false, allow_yield: generator }, |p| { + BlockStatementParser::new().parse(p) + })?; + + Ok(FunctionExpression { + id, + params, + body, + generator, + async_function: false, + }) + } +} + +/// Parser for async function expressions +pub struct AsyncFunctionExpressionParser; + +impl AsyncFunctionExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for AsyncFunctionExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'async' keyword + parser.assert_consume(&Token::Async, "Expected 'async'")?; + + // Consume the 'function' keyword + parser.assert_consume(&Token::Function, "Expected 'function' after 'async'")?; + + // Check if this is a generator function + let generator = parser.consume(&Token::Star); + + // Parse the function name if present + let id = if matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + // Parse the parameter list + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut params = Vec::new(); + + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternParser::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + params.push(PatternParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: true, allow_yield: generator }, |p| { + BlockStatementParser::new().parse(p) + })?; + + Ok(FunctionExpression { + id, + params, + body, + generator, + async_function: true, + }) + } +} + +/// Parser for arrow function expressions +pub struct ArrowFunctionExpressionParser; + +impl ArrowFunctionExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrowFunctionExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for async arrow function + let async_function = parser.consume(&Token::Async); + + // Parse the parameters + let mut params = Vec::new(); + + if parser.consume(&Token::LeftParen) { + // Multiple parameters in parentheses + if !parser.check(&Token::RightParen) { + // Parse the first parameter + params.push(PatternParser::new().parse(parser)?); + + // Parse additional parameters + while parser.consume(&Token::Comma) { + // Check for trailing comma (right paren immediately after comma) + if parser.check(&Token::RightParen) { + break; // This is a trailing comma, so stop parsing parameters + } + + // Parse the next parameter + params.push(PatternParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after arrow function parameters")?; + } else { + // Single parameter without parentheses + params.push(PatternParser::new().parse(parser)?); + } + + // Consume the arrow + parser.assert_consume(&Token::Arrow, "Expected '=>' after arrow function parameters")?; + + let block = parser.check(&Token::LeftBrace); + + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: false }, |p| { + if p.check(&Token::LeftBrace) { + let block = BlockStatementParser::new().parse(p)?; + Ok(ArrowFunctionBody::BlockStatement(block)) + } else { + let expr = ExpressionParser::new().parse(p)?; + Ok(ArrowFunctionBody::Expression(Box::new(expr))) + } + })?; + + Ok(ArrowFunctionExpression { + params, + body, + expression: !block, + async_function, + }) + } +} + + +/// Parser for await expressions +pub struct AwaitExpressionParser; + +impl AwaitExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for AwaitExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'await' keyword + parser.assert_consume(&Token::Await, "Expected 'await'")?; + + // Parse the argument + let argument = Box::new(ExpressionParser::new().parse(parser)?); + + Ok(AwaitExpression { + argument, + }) + } +} + +/// Parser for yield expressions +pub struct YieldExpressionParser; + +impl YieldExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for YieldExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'yield' keyword + parser.assert_consume(&Token::Yield, "Expected 'yield'")?; + + // Check for delegate (yield*) + let delegate = parser.consume(&Token::Star); + + // Parse the argument if present + let argument = if parser.check(&Token::Semicolon) || parser.is_at_end() || + parser.check(&Token::RightBrace) || parser.check(&Token::Comma) || + parser.check(&Token::RightParen) || parser.check(&Token::RightBracket) || + parser.check(&Token::Colon) || parser.previous_line_terminator() { + None + } else { + Some(Box::new(ExpressionParser::new().parse(parser)?)) + }; + + Ok(YieldExpression { + argument, + delegate, + }) + } +} diff --git a/src/grammar/literal.rs b/src/grammar/literal.rs new file mode 100644 index 0000000..caeb232 --- /dev/null +++ b/src/grammar/literal.rs @@ -0,0 +1,66 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; + +pub struct LiteralParser; + +impl LiteralParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for LiteralParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + Token::StringLiteral(value) => { + let value_clone = value.clone(); + parser.advance(); + + return Ok(Literal::StringLiteral(StringLiteral { + value: value_clone.into_boxed_str(), + })); + }, + Token::NumberLiteral(value) => { + let value_copy = *value; + parser.advance(); + return Ok(Literal::NumericLiteral(NumericLiteral { + value: value_copy, + })); + }, + Token::BigIntLiteral(value) => { + let value_clone = value.clone(); + parser.advance(); + return Ok(Literal::BigIntLiteral(BigIntLiteral { + value: value_clone.into_boxed_str(), + })); + }, + Token::RegExpLiteral(pattern, flags) => { + let pattern_clone = pattern.clone(); + let flags_clone = flags.clone(); + parser.advance(); + return Ok(Literal::RegExpLiteral(RegExpLiteral { + pattern: pattern_clone.into_boxed_str(), + flags: flags_clone.into_boxed_str(), + })); + }, + Token::True => { + parser.advance(); + return Ok(Literal::BooleanLiteral(BooleanLiteral { + value: true, + })); + }, + Token::False => { + parser.advance(); + return Ok(Literal::BooleanLiteral(BooleanLiteral { + value: false, + })); + }, + Token::Null => { + parser.advance(); + return Ok(Literal::NullLiteral(NullLiteral {})); + }, + _ => return Err(parser.error_at_current("Expected a literal")), + }; + } +} diff --git a/src/grammar/member.rs b/src/grammar/member.rs new file mode 100644 index 0000000..15b9265 --- /dev/null +++ b/src/grammar/member.rs @@ -0,0 +1,67 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; +use super::declaration::*; +use super::pattern::*; +use super::call::*; + +pub struct MemberExpressionParser; + +impl MemberExpressionParser { + pub fn new() -> Self { + Self + } + + /// Parse a member expression with a given object + pub fn parse_with_object(&self, parser: &mut Parser, object: Expression) -> ParseResult { + // Check if this is an optional chain + // Note: In many cases, the QuestionDot has already been consumed by the caller + let optional = parser.consume(&Token::QuestionDot); + + // Parse the property access + let (property, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property access: obj[expr] or obj?.[expr] + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; + (MemberProperty::Expression(Box::new(expr)), true) + } else if parser.check(&Token::LeftParen) && optional { + // Optional function call: obj?.(args) + // Let the call expression parser handle it + return Err(parser.error_at_current("Expected property name after optional chaining operator")); + } else if optional || parser.consume(&Token::Dot) { + // Static property access: obj.prop or obj?.prop + // At this point, we should be directly at the identifier + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierParser::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else { + return Err(parser.error_at_current("Expected identifier after '.' or '?.'")); + } + } else { + // If we're here, we're expecting a direct property access without a dot + // This happens when the caller has already consumed the QuestionDot + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierParser::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else { + return Err(parser.error_at_current("Expected '.' or '[' in member expression")); + } + }; + + Ok(MemberExpression { + object: Box::new(object), + property, + computed, + optional, + }) + } + +} + +impl ParserCombinator for MemberExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let object = ExpressionParser::new().parse(parser)?; + self.parse_with_object(parser, object) + } +} diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs new file mode 100644 index 0000000..cb13d5d --- /dev/null +++ b/src/grammar/mod.rs @@ -0,0 +1,29 @@ +mod expression; +mod member; +mod pattern; +mod statement; +mod declaration; +mod literal; +mod function; +mod class; +mod array; +mod object; +mod call; +mod module; +mod this; +mod new; + +pub use self::expression::*; +pub use self::member::*; +pub use self::pattern::*; +pub use self::statement::*; +pub use self::declaration::*; +pub use self::literal::*; +pub use self::class::*; +pub use self::array::*; +pub use self::function::*; +pub use self::object::*; +pub use self::call::*; +pub use self::module::*; +pub use self::this::*; +pub use self::new::*; \ No newline at end of file diff --git a/src/grammar/module.rs b/src/grammar/module.rs new file mode 100644 index 0000000..4b799e1 --- /dev/null +++ b/src/grammar/module.rs @@ -0,0 +1,51 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::statement::*; + +pub struct ModuleParser; + +impl ModuleParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ModuleParser { + + fn parse(&self, parser: &mut Parser) -> ParseResult { + + let body = parser.with_context(LexicalContext::ModuleBody { allow_await: true }, |p| { + let mut result = Vec::new(); + while !p.is_at_end() { + let statement = StatementParser::new().parse(p)?; + result.push(statement); + } + Ok(result) + })?; + + Ok(Program { source_type: SourceType::Module, body }) + } +} + +pub struct ScriptParser; + +impl ScriptParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ScriptParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let mut body = Vec::new(); + + while !parser.is_at_end() { + let statement = StatementParser::new().parse(parser)?; + body.push(statement); + } + + Ok(Program { source_type: SourceType::Script, body }) + } +} + diff --git a/src/grammar/new.rs b/src/grammar/new.rs new file mode 100644 index 0000000..6f049cb --- /dev/null +++ b/src/grammar/new.rs @@ -0,0 +1,44 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; + +pub struct NewExpressionParser; + +impl NewExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for NewExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::New, "Expected 'new'")?; + + // Parse the callee with appropriate precedence + // Use a higher precedence than Call to ensure we don't consume too much + let callee = Box::new(ExpressionParser::new().parse_with_precedence(parser, Precedence::Call)?); + + let mut arguments = Vec::new(); + + // Only parse arguments if there are parentheses + if parser.check(&Token::LeftParen) { + parser.assert_consume(&Token::LeftParen, "Expected '(' after new expression")?; + + if !parser.check(&Token::RightParen) { + arguments.push(ExpressionParser::new().parse(parser)?); + + while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { + arguments.push(ExpressionParser::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after new expression arguments")?; + } + + Ok(NewExpression { + callee, + arguments, + }) + } +} diff --git a/src/grammar/object.rs b/src/grammar/object.rs new file mode 100644 index 0000000..5f40332 --- /dev/null +++ b/src/grammar/object.rs @@ -0,0 +1,166 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; +use super::function::*; +use super::literal::*; + +/// Parser for object expressions +pub struct ObjectExpressionParser; + +impl ObjectExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object expression")?; + + let mut properties = Vec::new(); + + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + + let property = PropertyParser::new().parse(parser)?; + properties.push(property.clone()); + + if parser.consume(&Token::Comma) { + if parser.check(&Token::RightBrace) { + break; + } + } else { + break; + } + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of object expression")?; + + Ok(ObjectExpression { properties }) + } +} + +/// Parser for object properties +pub struct PropertyParser; + +impl PropertyParser { + pub fn new() -> Self { + Self + } + + fn parse_property_value(&self, parser: &mut Parser) -> ParseResult { + ExpressionParser::new().parse_with_precedence(parser, Precedence::Assignment) + } + +} + + +/// Parser for object properties +impl ParserCombinator for PropertyParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for special property types (getter, setter, async, generator) + let mut method = false; + let mut kind = PropertyKind::Init; + + // Parse property modifiers + if parser.consume(&Token::Get) { + kind = PropertyKind::Get; + } else if parser.consume(&Token::Set) { + kind = PropertyKind::Set; + } else if parser.consume(&Token::Async) { + method = true; + } else if parser.consume(&Token::Star) { + method = true; + } + + // Parse the property key (computed or not) + let computed = parser.consume(&Token::LeftBracket); + + let key = if computed { + // Computed property key: [expr] + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; + PropertyKey::Expression(Box::new(expr)) + } else { + // Regular property key: identifier, string, or number + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + // String or number literal as key + let literal = LiteralParser::new().parse(parser)?; + PropertyKey::Literal(literal) + }, + Token::Identifier(_) => { + // Identifier as key + let name = match parser.peek() { + Token::Identifier(ident) => ident.clone(), + _ => unreachable!() + }; + parser.advance(); // Consume the identifier + PropertyKey::Identifier(Identifier { name: name.into() }) + }, + _ => return Err(parser.error_at_current("Expected property name")) + } + }; + + // Check if this is a method (has parentheses after the key) + if parser.check(&Token::LeftParen) { + method = true; + } + + // Check if this is a shorthand property (no colon after key) + let shorthand = !computed && !method && !parser.check(&Token::Colon) && + matches!(kind, PropertyKind::Init); + + // Parse the property value + let value = if shorthand { + // Shorthand property: { x } is equivalent to { x: x } + match &key { + PropertyKey::Identifier(ident) => { + Box::new(Expression::Identifier(Identifier { name: ident.name.clone() })) + }, + _ => return Err(parser.error_at_current("Invalid shorthand property")) + } + } else if method { + // Method definition: { method() { ... } } + parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; + let func_expr = FunctionExpressionParser::new().parse(parser)?; + Box::new(Expression::FunctionExpression(func_expr)) + } else { + // Regular property: { key: value } + parser.assert_consume(&Token::Colon, "Expected ':' after property key")?; + + // Check for arrow function + let pos = parser.save_position(); + if matches!(parser.peek(), Token::Identifier(_)) && + parser.peek_next(1) == &Token::Arrow { + // This might be an arrow function + if let Ok(arrow_func) = ArrowFunctionExpressionParser::new().parse(parser) { + return Ok(Property { + key, + value: Box::new(Expression::ArrowFunctionExpression(arrow_func)), + kind, + method, + shorthand, + computed, + }); + } + parser.restore_position(pos); + } + + // Parse the value as an expression + let expr = self.parse_property_value(parser)?; + Box::new(expr) + }; + + Ok(Property { + key, + value, + kind, + method, + shorthand, + computed, + }) + } +} diff --git a/src/grammar/pattern.rs b/src/grammar/pattern.rs new file mode 100644 index 0000000..28ff5da --- /dev/null +++ b/src/grammar/pattern.rs @@ -0,0 +1,285 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::literal::*; +use super::expression::*; +use super::member::*; + +/// Parser for JavaScript patterns (destructuring) +pub struct PatternParser; + +impl PatternParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for PatternParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Try to parse as object pattern + if parser.check(&Token::LeftBrace) { + return ObjectPatternParser::new().parse(parser).map(Pattern::ObjectPattern); + } + + // Try to parse as array pattern + if parser.check(&Token::LeftBracket) { + return ArrayPatternParser::new().parse(parser).map(Pattern::ArrayPattern); + } + + // Try to parse as rest element + if parser.check(&Token::Ellipsis) { + return RestElementParser::new().parse(parser).map(Pattern::RestElement); + } + + // Try to parse as identifier or assignment pattern + // First parse an identifier + let pos = parser.save_position(); + + if let Ok(ident) = IdentifierParser::new().parse(parser) { + // Check if this is an assignment pattern + if parser.check(&Token::Equal) { + // Consume the equals sign + parser.assert_consume(&Token::Equal, "Expected '=' in assignment pattern")?; + + // Parse the right side (must be a valid expression) + let right = ExpressionParser::new().parse(parser)?; + + return Ok(Pattern::AssignmentPattern(AssignmentPattern { + left: Box::new(Pattern::Identifier(ident)), + right: Box::new(right), + })); + } + + // If not an assignment, return the identifier + return Ok(Pattern::Identifier(ident)); + } + + // Restore position after failed identifier attempt + parser.restore_position(pos); + + // Try to parse as member expression (only valid in some contexts) + let result = MemberExpressionParser::new().parse(parser); + if result.is_ok() { + return result.map(Pattern::MemberExpression); + } + + // If all attempts failed, return an error + Err(parser.error_at_current("Expected a valid pattern")) + } +} + +/// Parser for object patterns +pub struct ObjectPatternParser; + +impl ObjectPatternParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectPatternParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the opening brace + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object pattern")?; + + let mut properties = Vec::new(); + + // Parse properties until we hit the closing brace + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Check for rest element + if parser.check(&Token::Ellipsis) { + let rest = RestElementParser::new().parse(parser)?; + properties.push(ObjectPatternProperty::RestElement(rest)); + + // After rest element, we can only have a closing brace + if !parser.check(&Token::RightBrace) { + // Consume comma if present + if parser.consume(&Token::Comma) { + return Err(parser.error_at_current("Rest element must be the last element in an object pattern")); + } + } + break; + } else { + // Parse regular property + let property = ObjectPropertyParser::new().parse(parser)?; + properties.push(ObjectPatternProperty::Property(property)); + + // If there's no comma, we should be at the end + if !parser.consume(&Token::Comma) { + break; + } + } + } + + // Consume the closing brace + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of object pattern")?; + + Ok(ObjectPattern { + properties, + }) + } +} + +/// Parser for object pattern properties +pub struct ObjectPropertyParser; + +impl ObjectPropertyParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ObjectPropertyParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if we have a computed property + let computed = parser.consume(&Token::LeftBracket); + + // Parse the key + let key = if computed { + // Parse expression inside brackets + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; + PropertyKey::Expression(Box::new(expr)) + } else { + // Parse identifier or literal + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralParser::new().parse(parser)?; + PropertyKey::Literal(literal) + }, + _ => { + let ident = IdentifierParser::new().parse(parser)?; + PropertyKey::Identifier(ident) + } + } + }; + + // Check if this is a shorthand property + let shorthand = !computed && !parser.check(&Token::Colon); + + // Parse the value if not shorthand + let value = if shorthand { + // For shorthand, the value is the same as the key + match &key { + PropertyKey::Identifier(ident) => { + // Create a new identifier with the same name + let name = ident.name.clone(); + Pattern::Identifier(Identifier { name }) + }, + _ => return Err(parser.error_at_current("Invalid shorthand property in object pattern")), + } + } else { + // Consume the colon + parser.assert_consume(&Token::Colon, "Expected ':' after property key in object pattern")?; + + // Parse the pattern + PatternParser::new().parse(parser)? + }; + + Ok(ObjectProperty { + key, + value, + computed, + shorthand, + }) + } +} + +/// Parser for array patterns +pub struct ArrayPatternParser; + +impl ArrayPatternParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ArrayPatternParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBracket, "Expected '[' at start of array pattern")?; + + let mut elements = Vec::new(); + + while !parser.check(&Token::RightBracket) && !parser.is_at_end() { + if parser.consume(&Token::Comma) { + // Handle elision (hole in the pattern) + elements.push(None); + } else if parser.consume(&Token::Ellipsis) { + // Handle rest element + let argument = Box::new(PatternParser::new().parse(parser)?); + elements.push(Some(Pattern::RestElement(RestElement { argument }))); + + // Rest element must be the last one + if parser.consume(&Token::Comma) && !parser.check(&Token::RightBracket) { + return Err(parser.error_at_current("Rest element must be the last element in array pattern")); + } + break; + } else { + // Parse regular element + let element = PatternParser::new().parse(parser)?; + elements.push(Some(element)); + + // If there's no comma, we should be at the end + if !parser.consume(&Token::Comma) { + break; + } + } + } + + parser.assert_consume(&Token::RightBracket, "Expected ']' at end of array pattern")?; + + Ok(ArrayPattern { elements }) + } +} + + +/// Parser for rest elements +pub struct RestElementParser; + +impl RestElementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for RestElementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the ellipsis + parser.assert_consume(&Token::Ellipsis, "Expected '...' for rest element")?; + + // Parse the argument pattern + let argument = PatternParser::new().parse(parser)?; + + Ok(RestElement { + argument: Box::new(argument), + }) + } +} + +/// Parser for identifiers +pub struct IdentifierParser; + +impl IdentifierParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for IdentifierParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + + let name = match parser.peek() { + Token::Identifier(name) => { + let name = name.clone().into_boxed_str(); + parser.advance(); + Ok(name) + }, + _ => Err(parser.error_at_current("Expected an identifier")), + }?; + + Ok(Identifier { + name, + }) + } +} diff --git a/src/grammar/statement.rs b/src/grammar/statement.rs new file mode 100644 index 0000000..ad158e0 --- /dev/null +++ b/src/grammar/statement.rs @@ -0,0 +1,857 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use super::expression::*; +use super::declaration::*; +use super::pattern::*; +use super::class::*; +use super::object::*; + +/// Parser for JavaScript statements +pub struct StatementParser; + +impl StatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for StatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + match parser.peek() { + // Special case for object literals at the start of a statement + Token::LeftBrace => { + // Try to parse as object expression first + let pos = parser.save_position(); + + // Attempt to parse as an object literal + match ObjectExpressionParser::new().parse(parser) { + Ok(obj_expr) => { + + // Successfully parsed as object expression + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + return Ok(Statement::ExpressionStatement(ExpressionStatement { + expression: Box::new(Expression::ObjectExpression(obj_expr)), + })); + }, + Err(_) => { + // Failed to parse as object expression, restore position and try as block statement + parser.restore_position(pos); + return BlockStatementParser::new().parse(parser).map(Statement::BlockStatement); + } + } + }, + Token::Var | + Token::Let | + Token::Const => { + VariableDeclarationParser::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::VariableDeclaration(decl)) + ) + }, + Token::Function => { + // Check if this is a function declaration (has an identifier) + let pos = parser.save_position(); + parser.advance(); // Skip 'function' + + // Check for generator function + let _is_generator = parser.consume(&Token::Star); + + // If the next token is an identifier, this is a function declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + FunctionDeclarationParser::new().parse(parser).map(|decl| Statement::Declaration(Declaration::FunctionDeclaration(decl))) + } else { + // Otherwise, it's a function expression statement + parser.restore_position(pos); + ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + Token::Class => { + // Check if this is a class declaration (has an identifier) + let pos = parser.save_position(); + parser.advance(); // Skip 'class' + + // If the next token is an identifier, this is a class declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + ClassDeclarationParser::new().parse(parser).map(|decl| Statement::Declaration(Declaration::ClassDeclaration(decl))) + } else { + // Otherwise, it's a class expression statement + parser.restore_position(pos); + ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + Token::Import => { + ImportDeclarationParser::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::ImportDeclaration(decl)) + ) + }, + Token::Export => { + ExportDeclarationParser::new().parse(parser).map(|decl| { + match decl { + ExportDeclaration::Named(named) => Statement::Declaration(Declaration::ExportNamedDeclaration(named)), + ExportDeclaration::Default(default) => Statement::Declaration(Declaration::ExportDefaultDeclaration(default)), + ExportDeclaration::All(all) => Statement::Declaration(Declaration::ExportAllDeclaration(all)), + } + }) + }, + Token::If => { + IfStatementParser::new().parse(parser).map(Statement::IfStatement) + }, + Token::Switch => { + SwitchStatementParser::new().parse(parser).map(Statement::SwitchStatement) + }, + Token::For => { + // Try to parse as for statement + let pos = parser.save_position(); + match ForStatementParser::new().parse(parser) { + Ok(stmt) => Ok(Statement::ForStatement(stmt)), + Err(_) => { + // Try to parse as for-in statement + parser.restore_position(pos); + match ForInStatementParser::new().parse(parser) { + Ok(stmt) => Ok(Statement::ForInStatement(stmt)), + Err(_) => { + // Try to parse as for-of statement + parser.restore_position(pos); + ForOfStatementParser::new().parse(parser).map(Statement::ForOfStatement) + } + } + } + } + }, + Token::While => { + WhileStatementParser::new().parse(parser).map(Statement::WhileStatement) + }, + Token::Do => { + DoWhileStatementParser::new().parse(parser).map(Statement::DoWhileStatement) + }, + Token::Try => { + TryStatementParser::new().parse(parser).map(Statement::TryStatement) + }, + Token::With => { + WithStatementParser::new().parse(parser).map(Statement::WithStatement) + }, + Token::Throw => { + ThrowStatementParser::new().parse(parser).map(Statement::ThrowStatement) + }, + Token::Return => { + ReturnStatementParser::new().parse(parser).map(Statement::ReturnStatement) + }, + Token::Break => { + BreakStatementParser::new().parse(parser).map(Statement::BreakStatement) + }, + Token::Continue => { + ContinueStatementParser::new().parse(parser).map(Statement::ContinueStatement) + }, + Token::Debugger => { + parser.advance(); // Consume 'debugger' + parser.consume(&Token::Semicolon); // Optional semicolon + Ok(Statement::DebuggerStatement) + }, + Token::Semicolon => { + parser.advance(); // Consume ';' + Ok(Statement::EmptyStatement) + }, + // Check for labeled statements (identifier followed by colon) + Token::Identifier(_) => { + let pos = parser.save_position(); + let ident = IdentifierParser::new().parse(parser)?; + + if parser.consume(&Token::Colon) { + // This is a labeled statement + let body = Box::new(self.parse(parser)?); + Ok(Statement::LabeledStatement(LabeledStatement { label: ident, body })) + } else { + // Not a labeled statement, restore position and parse as expression statement + parser.restore_position(pos); + ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + } + }, + // Default to expression statement + _ => { + ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + } + } + } +} + +/// Parser for block statements +pub struct BlockStatementParser; + +impl BlockStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for BlockStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of block statement")?; + + let mut body = Vec::new(); + + while !parser.check(&Token::RightBrace) && !parser.is_at_end() { + // Parse a statement + let statement = StatementParser::new().parse(parser)?; + body.push(statement); + } + + parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of block statement")?; + + Ok(BlockStatement { body }) + } +} + +/// Parser for expression statements +pub struct ExpressionStatementParser; + +impl ExpressionStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ExpressionStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for directive prologue (string literals that might be "use strict") + if let Token::StringLiteral(_) = parser.peek() { + let pos = parser.save_position(); + let expr = ExpressionParser::new().parse(parser)?; + + // If this is followed by a semicolon or end of block, it's a directive + if parser.check(&Token::Semicolon) || parser.check(&Token::RightBrace) || parser.is_at_end() { + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + /* + // Check if this is "use strict" + if let Expression::Literal(Literal::StringLiteral(StringLiteral { value })) = &expr { + if value == "use strict" { + // Set strict mode + parser.set_strict_mode(true); + } + }*/ + + return Ok(ExpressionStatement { + expression: Box::new(expr), + }); + } + + // Not a directive, restore position and continue with normal parsing + parser.restore_position(pos); + } + + // Special case for object literals at the start of a statement + if parser.check(&Token::LeftBrace) { + let pos = parser.save_position(); + + // Try to parse as object expression + match ObjectExpressionParser::new().parse(parser) { + Ok(obj_expr) => { + // Successfully parsed as object expression + // Consume the semicolon if present + parser.consume(&Token::Semicolon); + + return Ok(ExpressionStatement { + expression: Box::new(Expression::ObjectExpression(obj_expr)), + }); + }, + Err(_) => { + // Failed to parse as object expression, restore position + parser.restore_position(pos); + // Will fall through to regular expression parsing + } + } + } + + // Regular expression statement parsing + let expr = ExpressionParser::new().parse(parser)?; + + // Consume the semicolon if present (ASI rules apply) + // In JavaScript, semicolons are optional in many cases due to Automatic Semicolon Insertion (ASI) + if !parser.previous_line_terminator() && + !parser.check(&Token::RightBrace) && + !parser.is_at_end() { + parser.assert_consume(&Token::Semicolon, "Expected ';' after expression statement")?; + } else { + // Semicolon is optional if: + // 1. There's a line terminator after the expression + // 2. The next token is a closing brace + // 3. We're at the end of the input + parser.consume(&Token::Semicolon); + } + + Ok(ExpressionStatement { + expression: Box::new(expr), + }) + } +} + +/// Parser for if statements +pub struct IfStatementParser; + +impl IfStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for IfStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::If, "Expected 'if'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'if'")?; + + let test = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after if condition")?; + + let consequent = Box::new(StatementParser::new().parse(parser)?); + + let alternate = if parser.consume(&Token::Else) { + Some(Box::new(StatementParser::new().parse(parser)?)) + } else { + None + }; + + Ok(IfStatement { + test, + consequent, + alternate, + }) + } +} + +/// Parser for switch statements +pub struct SwitchStatementParser; + +impl SwitchStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for SwitchStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Switch, "Expected 'switch'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'switch'")?; + + let discriminant = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after switch expression")?; + parser.assert_consume(&Token::LeftBrace, "Expected '{' to start switch body")?; + + let cases = parser.with_context(LexicalContext::SwitchBody, |p| { + + let mut result = Vec::new(); + + while !p.check(&Token::RightBrace) && !p.is_at_end() { + if p.consume(&Token::Case) { + // Case clause + let test = Some(Box::new(ExpressionParser::new().parse(p)?)); + p.assert_consume(&Token::Colon, "Expected ':' after case value")?; + + let mut consequent = Vec::new(); + while !p.check(&Token::Case) && + !p.check(&Token::Default) && + !p.check(&Token::RightBrace) && + !p.is_at_end() { + consequent.push(StatementParser::new().parse(p)?); + } + + result.push(SwitchCase { test, consequent }); + } else if p.consume(&Token::Default) { + // Default clause + p.assert_consume(&Token::Colon, "Expected ':' after 'default'")?; + + let mut consequent = Vec::new(); + while !p.check(&Token::Case) && + !p.check(&Token::Default) && + !p.check(&Token::RightBrace) && + !p.is_at_end() { + consequent.push(StatementParser::new().parse(p)?); + } + + result.push(SwitchCase { test: None, consequent }); + } else { + return Err(p.error_at_current("Expected 'case' or 'default' in switch statement")); + } + } + + p.assert_consume(&Token::RightBrace, "Expected '}' to end switch statement")?; + + Ok(result) + })?; + + + Ok(SwitchStatement { + discriminant, + cases, + }) + } +} + +/// Parser for while statements +pub struct WhileStatementParser; + +impl WhileStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for WhileStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::While, "Expected 'while'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; + + let test = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + StatementParser::new().parse(p) + })?; + + Ok(WhileStatement { + test, + body: Box::new(body), + }) + } +} + +/// Parser for do-while statements +pub struct DoWhileStatementParser; + +impl DoWhileStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for DoWhileStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Do, "Expected 'do'")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + StatementParser::new().parse(p) + })?; + + parser.assert_consume(&Token::While, "Expected 'while' after do block")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; + + let test = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after while condition")?; + + Ok(DoWhileStatement { + body: Box::new(body), + test, + }) + } +} + +/// Parser for for statements +pub struct ForStatementParser; + +impl ForStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ForStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse initialization + let init = if parser.consume(&Token::Semicolon) { + None + } else if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationParser::new().parse(parser)?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; + Some(ForInit::VariableDeclaration(decl)) + } else { + // Expression + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; + Some(ForInit::Expression(Box::new(expr))) + }; + + // Parse condition + let test = if parser.consume(&Token::Semicolon) { + None + } else { + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::Semicolon, "Expected ';' after for condition")?; + Some(Box::new(expr)) + }; + + // Parse update + let update = if parser.consume(&Token::RightParen) { + None + } else { + let expr = ExpressionParser::new().parse(parser)?; + parser.assert_consume(&Token::RightParen, "Expected ')' after for clauses")?; + Some(Box::new(expr)) + }; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + StatementParser::new().parse(p) + })?; + + Ok(ForStatement { + init, + test, + update, + body: Box::new(body), + }) + } +} + +/// Parser for for-in statements +pub struct ForInStatementParser; + +impl ForInStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ForInStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse left side (variable declaration or pattern) + let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationParser::new().parse(parser)?; + ForInOf::VariableDeclaration(decl) + } else { + // Pattern + let pattern = PatternParser::new().parse(parser)?; + ForInOf::Pattern(pattern) + }; + + // Expect 'in' keyword + parser.assert_consume(&Token::In, "Expected 'in' in for-in statement")?; + + // Parse right side (expression) + let right = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after for-in clauses")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + StatementParser::new().parse(p) + })?; + + Ok(ForInStatement { + left, + right, + body: Box::new(body), + }) + } +} + +/// Parser for for-of statements +pub struct ForOfStatementParser; + +impl ForOfStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ForOfStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::For, "Expected 'for'")?; + + // Check for 'await' (for await of) + let await_token = parser.consume(&Token::Await); + + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; + + // Parse left side (variable declaration or pattern) + let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let decl = VariableDeclarationParser::new().parse(parser)?; + ForInOf::VariableDeclaration(decl) + } else { + // Pattern + let pattern = PatternParser::new().parse(parser)?; + ForInOf::Pattern(pattern) + }; + + // Expect 'of' keyword + parser.assert_consume(&Token::Of, "Expected 'of' in for-of statement")?; + + // Parse right side (expression) + let right = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after for-of clauses")?; + + let body = parser.with_context(LexicalContext::LoopBody, |p| { + StatementParser::new().parse(p) + })?; + + Ok(ForOfStatement { + left, + right, + body: Box::new(body), + await_token, + }) + } +} + +/// Parser for break statements +pub struct BreakStatementParser; + +impl BreakStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for BreakStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Break, "Expected 'break'")?; + + // Check if we're in a loop or switch + if !parser.is_in_loop_body() && !parser.is_in_switch() { + return Err(parser.error_at_current("'break' statement can only be used within a loop or switch statement")); + } + + // Check for label + let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(BreakStatement { label }) + } +} + +/// Parser for continue statements +pub struct ContinueStatementParser; + +impl ContinueStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ContinueStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Continue, "Expected 'continue'")?; + + // Check if we're in a loop + if !parser.is_in_loop_body() { + return Err(parser.error_at_current("'continue' statement can only be used within a loop")); + } + + // Check for label + let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { + Some(IdentifierParser::new().parse(parser)?) + } else { + None + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(ContinueStatement { label }) + } +} + +/// Parser for return statements +pub struct ReturnStatementParser; + +impl ReturnStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ReturnStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Return, "Expected 'return'")?; + + // Check if we're in a function + if !parser.is_in_function() { + return Err(parser.error_at_current("'return' statement can only be used within a function")); + } + + // Check for return value + let argument = if parser.previous_line_terminator() || + parser.check(&Token::Semicolon) || + parser.check(&Token::RightBrace) || + parser.is_at_end() { + None + } else { + Some(Box::new(ExpressionParser::new().parse(parser)?)) + }; + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(ReturnStatement { argument }) + } +} + +/// Parser for with statements +pub struct WithStatementParser; + +impl WithStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for WithStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::With, "Expected 'with'")?; + parser.assert_consume(&Token::LeftParen, "Expected '(' after 'with'")?; + + let object = Box::new(ExpressionParser::new().parse(parser)?); + + parser.assert_consume(&Token::RightParen, "Expected ')' after with object")?; + + let body = Box::new(StatementParser::new().parse(parser)?); + + Ok(WithStatement { + object, + body, + }) + } +} + +/// Parser for throw statements +pub struct ThrowStatementParser; + +impl ThrowStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ThrowStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Throw, "Expected 'throw'")?; + + // Line terminator not allowed between throw and expression + if parser.previous_line_terminator() { + return Err(parser.error_at_current("Line terminator not allowed after 'throw'")); + } + + let argument = Box::new(ExpressionParser::new().parse(parser)?); + + // Consume semicolon if present + parser.consume(&Token::Semicolon); + + Ok(ThrowStatement { argument }) + } +} + +/// Parser for try statements +pub struct TryStatementParser; + +impl TryStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for TryStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::Try, "Expected 'try'")?; + + let block = BlockStatementParser::new().parse(parser)?; + + // Parse catch clause if present + let handler = if parser.consume(&Token::Catch) { + Some(self.parse_catch_clause(parser)?) + } else { + None + }; + + // Parse finally clause if present + let finalizer = if parser.consume(&Token::Finally) { + Some(BlockStatementParser::new().parse(parser)?) + } else { + None + }; + + // Either catch or finally must be present + if handler.is_none() && finalizer.is_none() { + return Err(parser.error_at_current("Missing catch or finally after try")); + } + + Ok(TryStatement { + block, + handler, + finalizer, + }) + } +} + +impl TryStatementParser { + fn parse_catch_clause(&self, parser: &mut Parser) -> ParseResult { + // The 'catch' keyword has already been consumed + + // Parse parameter if present + let param = if parser.consume(&Token::LeftParen) { + let pattern = PatternParser::new().parse(parser)?; + parser.assert_consume(&Token::RightParen, "Expected ')' after catch parameter")?; + Some(pattern) + } else { + None + }; + + let body = BlockStatementParser::new().parse(parser)?; + + Ok(CatchClause { + param, + body, + }) + } +} + +/// Parser for labeled statements +pub struct LabeledStatementParser; + +impl LabeledStatementParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for LabeledStatementParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let label = IdentifierParser::new().parse(parser)?; + + parser.assert_consume(&Token::Colon, "Expected ':' after label")?; + + // Add label to context + //parser.add_label(label.name.clone()); + + let body = Box::new(StatementParser::new().parse(parser)?); + + // Remove label from context + //parser.remove_label(&label.name); + + Ok(LabeledStatement { + label, + body, + }) + } +} diff --git a/src/grammar/this.rs b/src/grammar/this.rs new file mode 100644 index 0000000..cfb0628 --- /dev/null +++ b/src/grammar/this.rs @@ -0,0 +1,18 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; + +pub struct ThisExpressionParser; + +impl ThisExpressionParser { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ThisExpressionParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + parser.assert_consume(&Token::This, "Expected 'this'")?; + Ok(ThisExpression {}) + } +} \ No newline at end of file diff --git a/src/lexer/context.rs b/src/lexer/context.rs index d55fe0a..92de66f 100644 --- a/src/lexer/context.rs +++ b/src/lexer/context.rs @@ -13,12 +13,14 @@ pub enum LexicalContext { LoopParameters, LoopBody, SwitchBody, + ModuleBody { allow_await: bool }, } impl fmt::Display for LexicalContext { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Default => write!(f, "global"), + Self::ModuleBody { allow_await: _ } => write!(f, "module"), Self::PropertyKey => write!(f, "property key"), Self::MemberAccess => write!(f, "member access"), Self::ImportExport => write!(f, "import export"), @@ -38,14 +40,6 @@ impl fmt::Display for LexicalContext { impl LexicalContext { - // Fast check if this context allows any keywords as identifiers - pub fn has_keywords_as_identifiers(&self) -> bool { - match self { - LexicalContext::Default => false, - _ => true, - } - } - pub fn allows_token_as_identifier(&self, token: &Token) -> bool { match self { // In property contexts, all keywords can be identifiers except a few special ones @@ -53,7 +47,7 @@ impl LexicalContext { //let result = matches!(keyword, "default"); - println!("Checking in MemberAccess with {:#?}", token); + //println!("Checking in MemberAccess with {:#?}", token); if token == &Token::Default { true @@ -63,6 +57,10 @@ impl LexicalContext { true } else if token == &Token::Get { true + } else if token == &Token::Set { + true + } else if token == &Token::As { + true } else { false } @@ -71,8 +69,25 @@ impl LexicalContext { //false }, Self::PropertyKey => { + + if token == &Token::Default { + true + } else if token == &Token::From { + true + } else if token == &Token::For { + true + } else if token == &Token::Get { + true + } else if token == &Token::Set { + true + } else if token == &Token::As { + true + } else { + false + } + //println!("Currently in PropertyKey with {:#?}", keyword); - false + //false }, // In import/export contexts, specific keywords are allowed as identifiers @@ -135,6 +150,10 @@ impl LexicalContext { //println!("Currently in Default with {:#?}", keyword); false }, - } + + Self::ModuleBody { allow_await: _ } => { + false + }, + } } } diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 24902e6..810cca1 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -5,7 +5,7 @@ pub struct Lexer<'a> { source: &'a str, bytes: &'a [u8], source_len: usize, - tokens: Vec<(Token, (usize, usize))>, + tokens: Vec<(Token, [usize; 2])>, start: usize, current: usize, line: usize, @@ -14,10 +14,10 @@ pub struct Lexer<'a> { previous_char: char, } -macro_rules! add_token { - ($self:expr, $token_type:expr) => { - $self.tokens.push(($token_type, ($self.line, $self.column))) - }; +macro_rules! emit_token { + ($lexer:expr, $token:expr) => { + $lexer.tokens.push(($token, [$lexer.line, $lexer.column])) + } } impl<'a> Lexer<'a> { @@ -185,7 +185,7 @@ impl<'a> Lexer<'a> { }; // Add the token - add_token!(self, token_type); + emit_token!(self, token_type); } // Helper method to create an identifier token @@ -195,13 +195,13 @@ impl<'a> Lexer<'a> { Token::Identifier(text.to_string()) } - pub fn scan_tokens(&mut self) -> Result, LexerError> { + pub fn scan_tokens(&mut self) -> Result, LexerError> { while !self.is_at_end() { self.start = self.current; self.scan_token()?; } let _eof_column = self.column; - add_token!(self, Token::EOF); + emit_token!(self, Token::EOS); Ok(std::mem::take(&mut self.tokens)) } @@ -209,168 +209,168 @@ impl<'a> Lexer<'a> { let c = self.advance(); match c { - '(' => add_token!(self, Token::LeftParen), - ')' => add_token!(self, Token::RightParen), - '{' => add_token!(self, Token::LeftBrace), - '}' => add_token!(self, Token::RightBrace), - '[' => add_token!(self, Token::LeftBracket), - ']' => add_token!(self, Token::RightBracket), - ',' => add_token!(self, Token::Comma), - ';' => add_token!(self, Token::Semicolon), - ':' => add_token!(self, Token::Colon), - '#' => add_token!(self, Token::Hash), + '(' => emit_token!(self, Token::LeftParen), + ')' => emit_token!(self, Token::RightParen), + '{' => emit_token!(self, Token::LeftBrace), + '}' => emit_token!(self, Token::RightBrace), + '[' => emit_token!(self, Token::LeftBracket), + ']' => emit_token!(self, Token::RightBracket), + ',' => emit_token!(self, Token::Comma), + ';' => emit_token!(self, Token::Semicolon), + ':' => emit_token!(self, Token::Colon), + '#' => emit_token!(self, Token::Hash), '.' => { if self.match_char('.') && self.match_char('.') { - add_token!(self, Token::Ellipsis); + emit_token!(self, Token::Ellipsis); } else { - add_token!(self, Token::Dot); + emit_token!(self, Token::Dot); } }, '+' => { if self.match_char('+') { - add_token!(self, Token::PlusPlus); + emit_token!(self, Token::PlusPlus); } else if self.match_char('=') { - add_token!(self, Token::PlusEqual); + emit_token!(self, Token::PlusEqual); } else { - add_token!(self, Token::Plus); + emit_token!(self, Token::Plus); } }, '-' => { if self.match_char('-') { - add_token!(self, Token::MinusMinus); + emit_token!(self, Token::MinusMinus); } else if self.match_char('=') { - add_token!(self, Token::MinusEqual); + emit_token!(self, Token::MinusEqual); } else { - add_token!(self, Token::Minus); + emit_token!(self, Token::Minus); } }, '%' => { if self.match_char('=') { - add_token!(self, Token::PercentEqual); + emit_token!(self, Token::PercentEqual); } else { - add_token!(self, Token::Percent); + emit_token!(self, Token::Percent); } }, '^' => { if self.match_char('=') { - add_token!(self, Token::CaretEqual); + emit_token!(self, Token::CaretEqual); } else { - add_token!(self, Token::Caret); + emit_token!(self, Token::Caret); } }, '*' => { if self.match_char('*') { if self.match_char('=') { - add_token!(self, Token::StarStarEqual); + emit_token!(self, Token::StarStarEqual); } else { - add_token!(self, Token::StarStar); + emit_token!(self, Token::StarStar); } } else if self.match_char('=') { - add_token!(self, Token::StarEqual); + emit_token!(self, Token::StarEqual); } else { - add_token!(self, Token::Star); + emit_token!(self, Token::Star); } }, '/' => self.handle_slash()?, '!' => { if self.match_char('=') { if self.match_char('=') { - add_token!(self, Token::BangEqualEqual); + emit_token!(self, Token::BangEqualEqual); } else { - add_token!(self, Token::BangEqual); + emit_token!(self, Token::BangEqual); } } else { - add_token!(self, Token::Bang); + emit_token!(self, Token::Bang); } }, '=' => { if self.match_char('>') { - add_token!(self, Token::Arrow); + emit_token!(self, Token::Arrow); } else if self.match_char('=') { if self.match_char('=') { - add_token!(self, Token::EqualEqualEqual); + emit_token!(self, Token::EqualEqualEqual); } else { - add_token!(self, Token::EqualEqual); + emit_token!(self, Token::EqualEqual); } } else { - add_token!(self, Token::Equal); + emit_token!(self, Token::Equal); } }, '<' => { if self.match_char('=') { - add_token!(self, Token::LessEqual); + emit_token!(self, Token::LessEqual); } else if self.match_char('<') { if self.match_char('=') { - add_token!(self, Token::LessLessEqual); + emit_token!(self, Token::LessLessEqual); } else { - add_token!(self, Token::LessLess); + emit_token!(self, Token::LessLess); } } else { - add_token!(self, Token::Less); + emit_token!(self, Token::Less); } }, '>' => { if self.match_char('=') { - add_token!(self, Token::GreaterEqual); + emit_token!(self, Token::GreaterEqual); } else if self.match_char('>') { if self.match_char('>') { if self.match_char('=') { - add_token!(self, Token::GreaterGreaterGreaterEqual); + emit_token!(self, Token::GreaterGreaterGreaterEqual); } else { - add_token!(self, Token::GreaterGreaterGreater); + emit_token!(self, Token::GreaterGreaterGreater); } } else if self.match_char('=') { - add_token!(self, Token::GreaterGreaterEqual); + emit_token!(self, Token::GreaterGreaterEqual); } else { - add_token!(self, Token::GreaterGreater); + emit_token!(self, Token::GreaterGreater); } } else { - add_token!(self, Token::Greater); + emit_token!(self, Token::Greater); } }, '&' => { if self.match_char('&') { if self.match_char('=') { - add_token!(self, Token::AmpersandAmpersandEqual); + emit_token!(self, Token::AmpersandAmpersandEqual); } else { - add_token!(self, Token::AmpersandAmpersand); + emit_token!(self, Token::AmpersandAmpersand); } } else if self.match_char('=') { - add_token!(self, Token::AmpersandEqual); + emit_token!(self, Token::AmpersandEqual); } else { - add_token!(self, Token::Ampersand); + emit_token!(self, Token::Ampersand); } }, '|' => { if self.match_char('|') { if self.match_char('=') { - add_token!(self, Token::PipePipeEqual); + emit_token!(self, Token::PipePipeEqual); } else { - add_token!(self, Token::PipePipe); + emit_token!(self, Token::PipePipe); } } else if self.match_char('=') { - add_token!(self, Token::PipeEqual); + emit_token!(self, Token::PipeEqual); } else { - add_token!(self, Token::Pipe); + emit_token!(self, Token::Pipe); } }, - '~' => add_token!(self, Token::Tilde), + '~' => emit_token!(self, Token::Tilde), '?' => { if self.match_char('?') { if self.match_char('=') { - add_token!(self, Token::QuestionQuestionEqual); + emit_token!(self, Token::QuestionQuestionEqual); } else { - add_token!(self, Token::QuestionQuestion); + emit_token!(self, Token::QuestionQuestion); } } else if self.match_char('.') { - add_token!(self, Token::QuestionDot); + emit_token!(self, Token::QuestionDot); } else { - add_token!(self, Token::Question); + emit_token!(self, Token::Question); } }, @@ -444,11 +444,11 @@ impl<'a> Lexer<'a> { } else if self.match_char('*') { self.block_comment()?; } else if self.match_char('=') { - add_token!(self, Token::SlashEqual); + emit_token!(self, Token::SlashEqual); } else if self.is_regexp_start() { self.regexp()?; } else { - add_token!(self, Token::Slash); + emit_token!(self, Token::Slash); } Ok(()) } @@ -562,7 +562,7 @@ impl<'a> Lexer<'a> { } } - add_token!(self, Token::RegExpLiteral(pattern, flags)); + emit_token!(self, Token::RegExpLiteral(pattern, flags)); Ok(()) } @@ -702,7 +702,7 @@ impl<'a> Lexer<'a> { // Consume the closing backtick self.advance(); - add_token!(self, Token::TemplateLiteral(parts)); + emit_token!(self, Token::TemplateLiteral(parts)); Ok(()) } @@ -783,7 +783,7 @@ impl<'a> Lexer<'a> { // Consume the closing quote self.advance(); - add_token!(self, Token::StringLiteral(value)); + emit_token!(self, Token::StringLiteral(value)); Ok(()) } @@ -1032,7 +1032,7 @@ impl<'a> Lexer<'a> { )); } - add_token!(self, Token::BigIntLiteral(value_str)); + emit_token!(self, Token::BigIntLiteral(value_str)); return Ok(()); } @@ -1045,14 +1045,14 @@ impl<'a> Lexer<'a> { !value_str.contains('E') && value_str.len() < 10 { // For small integers, parse directly to avoid floating point conversion if let Ok(int_val) = value_str.parse::() { - add_token!(self, Token::NumberLiteral(int_val as f64)); + emit_token!(self, Token::NumberLiteral(int_val as f64)); return Ok(()); } } match value_str.parse::() { Ok(value) => { - add_token!(self, Token::NumberLiteral(value)); + emit_token!(self, Token::NumberLiteral(value)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1091,7 +1091,7 @@ impl<'a> Lexer<'a> { // Parse as binary match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(_) => { - add_token!(self, Token::BigIntLiteral(format!("0b{}", value_str))); + emit_token!(self, Token::BigIntLiteral(format!("0b{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1107,7 +1107,7 @@ impl<'a> Lexer<'a> { // Parse as binary and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 2) { Ok(value) => { - add_token!(self, Token::NumberLiteral(value as f64)); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1147,7 +1147,7 @@ impl<'a> Lexer<'a> { // Parse as octal match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(_) => { - add_token!(self, Token::BigIntLiteral(format!("0o{}", value_str))); + emit_token!(self, Token::BigIntLiteral(format!("0o{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1163,7 +1163,7 @@ impl<'a> Lexer<'a> { // Parse as octal and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 8) { Ok(value) => { - add_token!(self, Token::NumberLiteral(value as f64)); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1203,7 +1203,7 @@ impl<'a> Lexer<'a> { // Parse as hex match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(_) => { - add_token!(self, Token::BigIntLiteral(format!("0x{}", value_str))); + emit_token!(self, Token::BigIntLiteral(format!("0x{}", value_str))); Ok(()) }, Err(_) => Err(LexerError::new( @@ -1219,7 +1219,7 @@ impl<'a> Lexer<'a> { // Parse as hex and convert to f64 match i64::from_str_radix(&value_str.replace('_', ""), 16) { Ok(value) => { - add_token!(self, Token::NumberLiteral(value as f64)); + emit_token!(self, Token::NumberLiteral(value as f64)); Ok(()) }, Err(_) => Err(LexerError::new( diff --git a/src/lexer/token.rs b/src/lexer/token.rs index cc0675e..3198cfc 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -125,7 +125,7 @@ pub enum Token { From, As, // Sentinel - EOF, + EOS, } impl Token { @@ -221,7 +221,7 @@ impl Token { Token::As => Some("as"), // Sentinel - Token::EOF => None, + Token::EOS => None, } } diff --git a/src/main.rs b/src/main.rs index 61973dc..ceb9574 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod ast; mod lexer; mod parser; +mod grammar; use std::path::PathBuf; use std::fs; @@ -41,10 +42,11 @@ fn run(file: &str) -> Result<(), Box> where st let mut lexer = Lexer::new(&source); let tokens = lexer.scan_tokens()?; - let mut parser = Parser::new(tokens); + + let mut parser = Parser::new(&tokens); parser.attach_source(&source); - let ast = parser.parse()?; - + let ast = parser.parse_script()?; + println!("AST: {:#?}", ast); Ok(()) diff --git a/src/parser/asi.rs b/src/parser/asi.rs index 3b568a2..bbf759e 100644 --- a/src/parser/asi.rs +++ b/src/parser/asi.rs @@ -1,82 +1,75 @@ -use super::prelude::*; - use crate::lexer::Token; use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - pub fn consume_semicolon(&mut self, message: &str) -> ParseResult { +use super::parser::Parser; - if self.match_token(&Token::Semicolon) { - return Ok(self.peek_previous().unwrap().clone()); - } +impl<'a> Parser<'a> { + pub fn consume_semicolon(&mut self, message: &str) -> ParseResult<&Token> { + if self.consume(&Token::Semicolon) { + return Ok(self.peek_previous()); + } - // Automatic Semicolon Insertion (ASI) rules - - if self.check(&Token::RightBrace) { - return Ok(self.peek_previous().unwrap().clone()); - } - - if self.is_at_end() { - return Ok(self.peek_previous().unwrap().clone()); - } + // Automatic Semicolon Insertion (ASI) rules + if self.check(&Token::RightBrace) { + return Ok(self.peek_previous()); + } + + if self.is_at_end() { + return Ok(self.peek_previous()); + } if self.previous_line_terminator() { + // Special case: restricted productions + // These statements cannot be followed by a line terminator without a semicolon + let prev = self.peek_previous(); - // Special case: restricted productions - // These statements cannot be followed by a line terminator without a semicolon - if let Some(prev) = self.peek_previous() { - match prev { - // Rule: No LineTerminator here after return/throw/yield/break/continue - Token::Return | - Token::Throw | - Token::Yield | - Token::Break | - Token::Continue => { - // Check if there's an expression after these keywords - // If not, ASI applies - if !self.is_expression_start() { - return Err(parser_error_at_current!(self, message)); - } - }, - _ => {} - } - } - - return Ok(self.peek_previous().unwrap().clone()); - } - - // Otherwise, it's an error - Err(parser_error_at_current!(self, message)) - } + match prev { + // Rule: No LineTerminator here after return/throw/yield/break/continue + Token::Return | + Token::Throw | + Token::Yield | + Token::Break | + Token::Continue => { + // Check if there's an expression after these keywords + // If not, ASI applies + if !self.is_expression_start() { + return Err(self.error_at_current(message)); + } + }, + _ => { + return Ok(prev) + }, + } + } + // Otherwise, it's an error + Err(self.error_at_current(message)) + } - // Helper method to check if the current token would start an expression - fn is_expression_start(&self) -> bool { - match self.peek() { - Some(Token::Identifier(_)) | - Some(Token::NumberLiteral(_)) | - Some(Token::StringLiteral(_)) | - Some(Token::TemplateLiteral(_)) | - Some(Token::RegExpLiteral(_, _)) | - Some(Token::True) | - Some(Token::False) | - Some(Token::Null) | - Some(Token::This) | - Some(Token::LeftParen) | - Some(Token::LeftBracket) | - Some(Token::LeftBrace) | - Some(Token::Function) | - Some(Token::New) | - Some(Token::Delete) | - Some(Token::Typeof) | - Some(Token::Void) | - Some(Token::Plus) | - Some(Token::Minus) | - Some(Token::Bang) | - Some(Token::Tilde) => true, - _ => false - } - } -} \ No newline at end of file + // Helper method to check if the current token would start an expression + fn is_expression_start(&self) -> bool { + match self.peek() { + Token::Identifier(_) | + Token::NumberLiteral(_) | + Token::StringLiteral(_) | + Token::TemplateLiteral(_) | + Token::RegExpLiteral(_, _) | + Token::True | + Token::False | + Token::Null | + Token::This | + Token::LeftParen | + Token::LeftBracket | + Token::LeftBrace | + Token::Function | + Token::New | + Token::Delete | + Token::Typeof | + Token::Void | + Token::Plus | + Token::Minus | + Token::Bang | + Token::Tilde => true, + _ => false + } + } +} diff --git a/src/parser/classes.rs b/src/parser/classes.rs deleted file mode 100644 index 55586ae..0000000 --- a/src/parser/classes.rs +++ /dev/null @@ -1,156 +0,0 @@ -use super::prelude::*; - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; -use super::expressions::Precedence; - -impl Parser { - - pub fn parse_class_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'class' - - let id = self.expect_identifier("Expected class name")?; - let super_class = self.match_token(&Token::Extends) - .then(|| self.parse_expression_with_precedence(Precedence::Call)) - .transpose()?; - - let body = self.parse_class_body()?; - - Ok(ClassDeclaration { id, super_class, body }) - } - - pub fn parse_class_expression(&mut self) -> ParseResult { - self.advance(); // consume 'class' - - // Optional class name for expressions - let id = matches!(self.peek(), Some(Token::Identifier(_))) - .then(|| self.expect_identifier("Expected class name")) - .transpose()?; - - // Optional extends clause - let super_class = self.match_token(&Token::Extends) - .then(|| self.parse_expression_with_precedence(Precedence::Call).map(Box::new)) - .transpose()?; - - let body = self.parse_class_body()?; - - Ok(Expression::Class { id, super_class, body }) - } - - pub fn parse_class_body(&mut self) -> ParseResult> { - self.consume(&Token::LeftBrace, "Expected '{' before class body")?; - - // Classes are always in strict mode - let prev_strict = self.state.in_strict_mode; - self.state.in_strict_mode = true; - - let mut body = Vec::new(); - - while !self.check(&Token::RightBrace) && !self.is_at_end() { - // Skip empty class elements (semicolons) - if self.match_token(&Token::Semicolon) { - continue; - } - - body.push(self.parse_class_member()?); - } - - self.consume(&Token::RightBrace, "Expected '}' after class body")?; - self.state.in_strict_mode = prev_strict; - - Ok(body) - } - - pub fn parse_class_member(&mut self) -> ParseResult { - let is_static = self.match_token(&Token::Static); - - // Handle static blocks (ES2022) - if is_static && self.check(&Token::LeftBrace) { - return self.parse_static_block(); - } - - // Parse method modifiers - let is_async = self.match_token(&Token::Async); - let is_generator = self.match_token(&Token::Star); - - // Check for getter/setter - let mut kind = MethodKind::Method; - if !is_async && !is_generator { - if self.match_token(&Token::Get) { - kind = MethodKind::Getter; - } else if self.match_token(&Token::Set) { - kind = MethodKind::Setter; - } - } - - // Parse property key - let key = self.with_context(LexicalContext::PropertyKey, |parser| { - parser.parse_property_key() - })?; - - // Check for constructor method - if !is_static && !is_async && !is_generator && kind == MethodKind::Method { - if let PropertyKey::Identifier(name) = &key { - if name.as_ref() == "constructor" { - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(false, false)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - return Ok(ClassMember::Constructor { params, body }); - } - } - } - - // Method definition - if self.check(&Token::LeftParen) || is_generator || is_async { - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_async, is_generator)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - return Ok(ClassMember::Method { - key, - value: MethodDefinition { - params, - body, - is_async, - is_generator, - }, - kind, - is_static, - }); - } - - // Class field - let value = self.match_token(&Token::Equal) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&Token::Semicolon, "Expected ';' after class field")?; - - Ok(ClassMember::Property { - key, - value, - is_static, - }) - } - - pub fn parse_static_block(&mut self) -> ParseResult { - self.consume(&Token::LeftBrace, "Expected '{' after 'static'")?; - - let mut body = Vec::new(); - - while !self.check(&Token::RightBrace) && !self.is_at_end() { - body.push(self.parse_statement()?); - } - - self.consume(&Token::RightBrace, "Expected '}' after static block")?; - - Ok(ClassMember::StaticBlock { body }) - } -} diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs new file mode 100644 index 0000000..c0ce981 --- /dev/null +++ b/src/parser/combinator.rs @@ -0,0 +1,6 @@ +use super::error::ParseResult; +use super::parser::Parser; + +pub trait ParserCombinator { + fn parse(&self, parser: &mut Parser) -> ParseResult; +} diff --git a/src/parser/context.rs b/src/parser/context.rs new file mode 100644 index 0000000..db99b0b --- /dev/null +++ b/src/parser/context.rs @@ -0,0 +1,74 @@ +use std::collections::HashSet; +use crate::lexer::LexicalContext; + +/// Maintains parser state and context information +pub struct ParserContext { + pub in_strict_mode: bool, + pub labels: HashSet>, + pub context_stack: Vec, + pub comments: Vec, +} + +impl ParserContext { + pub fn new() -> Self { + Self { + in_strict_mode: false, + labels: HashSet::new(), + context_stack: vec![LexicalContext::Default], + comments: Vec::new(), + } + } + + pub fn current_context(&self) -> &LexicalContext { + self.context_stack.last().unwrap_or(&LexicalContext::Default) + } + + pub fn push_context(&mut self, context: LexicalContext) { + self.context_stack.push(context); + } + + pub fn pop_context(&mut self) { + if self.context_stack.len() > 1 { + self.context_stack.pop(); + } + } + + pub fn has_context(&self, predicate: F) -> bool + where + F: Fn(&LexicalContext) -> bool + { + self.context_stack.iter().any(predicate) + } + + pub fn is_in_loop_body(&self) -> bool { + self.has_context(|ctx| matches!(ctx, LexicalContext::LoopBody)) + } + + pub fn is_in_switch(&self) -> bool { + self.has_context(|ctx| matches!(ctx, LexicalContext::SwitchBody)) + } + + pub fn is_in_function(&self) -> bool { + self.has_context(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + } + + pub fn allows_yield(&self) -> bool { + matches!(self.current_context(), LexicalContext::FunctionBody { allow_yield: true, .. }) + } + + pub fn allows_await(&self) -> bool { + matches!(self.current_context(), LexicalContext::FunctionBody { allow_await: true, .. }) + } + + pub fn get_context_stack_info(&self) -> Vec { + let depth = 10; + let stack_len = self.context_stack.len(); + let start_idx = if stack_len > depth { stack_len - depth } else { 0 }; + + self.context_stack[start_idx..] + .iter() + .rev() + .map(|ctx| format!("{}", ctx)) + .collect() + } +} diff --git a/src/parser/core.rs b/src/parser/core.rs deleted file mode 100644 index 3605375..0000000 --- a/src/parser/core.rs +++ /dev/null @@ -1,424 +0,0 @@ -use super::prelude::*; - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::{ParserError, ParseResult}; -use super::state::ParserState; -use std::collections::HashSet; - - -pub struct Parser { - pub tokens: Vec<(Token, (usize, usize))>, - pub current: usize, - pub comments: Vec, - pub state: ParserState, - pub source: Option, - - context_stack: Vec, -} - -impl Parser { - - pub fn new(tokens: Vec<(Token, (usize, usize))>) -> Self { - Parser { - tokens, - current: 0, - comments: Vec::new(), - state: ParserState::new(), - source: None, - context_stack: vec![LexicalContext::Default], - } - } - - // Add methods to manage the context stack - pub fn push_context(&mut self, context: LexicalContext) { - self.context_stack.push(context); - } - - pub fn pop_context(&mut self) -> Option { - if self.context_stack.len() > 1 { - self.context_stack.pop() - } else { - None - } - } - - pub fn current_context(&self) -> LexicalContext { - *self.context_stack.last().unwrap_or(&LexicalContext::Default) - } - - pub fn get_context_stack_info(&self) -> Vec { - // Get up to the last 6 entries from the context stack - let stack_len = self.context_stack.len(); - let start_idx = if stack_len > 6 { stack_len - 6 } else { 0 }; - - self.context_stack[start_idx..] - .iter() - .rev() - .map(|ctx| format!("{}", ctx)) - .collect() - } - - pub fn get_current_position(&self) -> (usize, usize) { - let item = if self.is_at_end() { - self.tokens.get(self.current - 1) - } else { - self.tokens.get(self.current) - }; - - item.map(|(_, pos)| pos).unwrap_or(&(0,0)).clone() - } - - // TODO FIXME this is erroneous it takes All the tokens starting from now until end not just those ones to be processed within the context scope - pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult - where - F: FnOnce(&mut Self) -> ParseResult, - { - let current_pos = self.current; - - // Only process tokens if the context has any keywords that can be identifiers - if context.has_keywords_as_identifiers() { - // Iterate over tokens starting from the current position - for (token_type, _) in self.tokens.iter_mut().skip(current_pos) { - // Check if the token type can be treated as an identifier in the current context - if context.allows_token_as_identifier(token_type) { - // Get the keyword text only when a conversion is needed - if let Some(text) = token_type.keyword_text() { - // Modify the token type in-place - *token_type = Token::Identifier(text.to_string()); - } - } - } - } - - self.push_context(context); - let result = f(self); - self.pop_context(); - result - } - - // Helper methods to check contexts - pub fn is_in_loop_body(&self) -> bool { - self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::LoopBody)) - } - - pub fn is_in_loop_parameters(&self) -> bool { - //self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::LoopParameters)) - matches!(self.context_stack.last(), Some(LexicalContext::LoopParameters)) - } - - pub fn is_in_switch(&self) -> bool { - //self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::SwitchBody)) - matches!(self.context_stack.last(), Some(LexicalContext::SwitchBody)) - } - - pub fn is_in_function(&self) -> bool { - self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) - } - - pub fn allows_yield(&self) -> bool { - if let Some(LexicalContext::FunctionBody { allow_yield, .. }) = self.context_stack.iter() - .rev() - .find(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) - { - *allow_yield - } else { - false - } - } - - pub fn allows_await(&self) -> bool { - if let Some(LexicalContext::FunctionBody { allow_await, .. }) = self.context_stack.iter() - .rev() - .find(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) - { - *allow_await - } else { - false - } - } - - // Method to attach source code to an existing parser - pub fn attach_source(&mut self, source: &str) { - self.source = Some(source.to_string()); - } - - // Token navigation methods - pub fn is_at_end(&self) -> bool { - self.current >= self.tokens.len() || matches!(self.peek(), Some(Token::EOF)) - } - - pub fn peek_previous(&self) -> Option<&Token> { - if self.current > 0 { - self.tokens.get(self.current - 1).map(|(token_type, _)| token_type) - } else { - None - } - } - - pub fn peek_next(&self, offset: usize) -> Option<&Token> { - if self.current + offset < self.tokens.len() { - self.tokens.get(self.current + offset).map(|(token_type, _)| token_type) - } else { - None - } - } - - pub fn peek(&self) -> Option<&Token> { - self.tokens.get(self.current).map(|(token_type, _)| token_type) - } - - pub fn advance(&mut self) -> Option<&Token> { - if !self.is_at_end() { - self.current += 1; - } - self.peek_previous() - } - - pub fn check(&self, token_type: &Token) -> bool { - match self.peek() { - Some(t) => t == token_type, - None => false, - } - } - - pub fn match_token(&mut self, token_type: &Token) -> bool { - if self.check(token_type) { - self.advance(); - true - } else { - false - } - } - - pub fn match_any(&mut self, token_types: &[Token]) -> bool { - for token_type in token_types { - if self.check(token_type) { - self.advance(); - return true; - } - } - false - } - - pub fn consume(&mut self, token_type: &Token, message: &str) -> ParseResult { - if token_type == &Token::Semicolon { - self.consume_semicolon(message)?; - return Ok(Token::Semicolon); - } else if self.check(token_type) { - return Ok(self.advance().unwrap().clone()); - } - - Err(parser_error_at_current!(self, message)) - } - - pub fn previous_line_terminator(&self) -> bool { - if let Some((_, prev_pos)) = self.tokens.get(self.current.saturating_sub(1)) { - if let Some((_, curr_pos)) = self.tokens.get(self.current) { - return prev_pos.0 < curr_pos.0; - } - } - false - } - - pub fn expect_identifier(&mut self, message: &str) -> ParseResult> { - if let Some(Token::Identifier(name)) = self.peek().cloned() { - self.advance(); - Ok(name.into_boxed_str()) - } else { - Err(parser_error_at_current_mut!(self, message)) - } - } - - pub fn get_source_text(&self) -> String { - self.source.clone().unwrap_or_default() - } - - // Main parse methods - pub fn parse(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Program { - source_type: SourceType::Script, - body: Vec::new(), - comments: Vec::new(), - }); - } - - self.parse_program() - } - - pub fn parse_as_module(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Program { - source_type: SourceType::Module, - body: Vec::new(), - comments: Vec::new(), - }); - } - - self.parse_module() - } - - pub fn parse_single_statement(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Ok(Statement::Empty); - } - - let stmt = self.parse_statement()?; - - // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek(), Some(Token::EOF)) { - return Err(parser_error_at_current!(self, "Unexpected token after statement")); - } - - Ok(stmt) - } - - pub fn parse_single_expression(&mut self) -> ParseResult { - if self.tokens.is_empty() { - return Err(parser_error_at_current!(self, "Empty input")); - } - - let expr = self.parse_expression()?; - - // Ensure we've consumed all tokens - if !self.is_at_end() && !matches!(self.peek(), Some(Token::EOF)) { - return Err(parser_error_at_current!(self, "Unexpected token after expression")); - } - - Ok(expr) - } - - pub fn parse_comment(&mut self, text: String, is_block: bool, start: usize, end: usize) { - let comment = Comment { - text: text.into_boxed_str(), - is_block, - span: (start as u32, end as u32), - }; - self.comments.push(comment); - } - - // TODO delete - // Helper method to validate variable names - pub fn validate_variable_name(&self, name: &str) -> ParseResult<()> { - if self.state.in_strict_mode { - if name == "eval" || name == "arguments" { - return Err(parser_error_at_current!(self, "'{}' cannot be used as a variable name in strict mode", name)); - } - } - Ok(()) - } - - // Helper method to validate function parameters - pub fn validate_function_params(&self, params: &[Expression]) -> ParseResult<()> { - let mut seen_params = HashSet::new(); - - for param in params { - if let Expression::Identifier(name) = param { - if self.state.in_strict_mode && (name.as_ref() == "eval" || name.as_ref() == "arguments") { - // TODO should be previous so backtrack one? - return Err(parser_error_at_current!(self, "'{}' cannot be used as a parameter name in strict mode", name)); - } - if !seen_params.insert(name.clone()) { - return Err(parser_error_at_current!(self, "Duplicate parameter name '{}'", name)); - } - } - } - - Ok(()) - } - - // Helper method to handle octal literals in strict mode - pub fn validate_octal_literal(&self, value: &str) -> ParseResult<()> { - if self.state.in_strict_mode && value.starts_with('0') && !value.starts_with("0x") && !value.starts_with("0b") && !value.starts_with("0o") { - // TODO should be previous so backtrack one? - return Err(parser_error_at_current!(self, "Octal literals are not allowed in strict mode")); - } - Ok(()) - } - - // Helper method to parse a list of elements separated by commas - pub fn parse_comma_separated_list(&mut self, terminator: &Token, parser_fn: F) -> ParseResult> - where - F: Fn(&mut Self) -> ParseResult, - { - let mut elements = Vec::new(); - - if !self.check(terminator) { - loop { - elements.push(parser_fn(self)?); - - if !self.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if self.check(terminator) { - break; - } - } - } - - self.consume(terminator, &format!("Expected '{:?}'", terminator))?; - Ok(elements) - } - - // Helper method to parse arguments for function calls - pub fn parse_arguments(&mut self) -> ParseResult> { - let mut args = Vec::new(); - - if !self.check(&Token::RightParen) { - loop { - if self.match_token(&Token::Ellipsis) { - // Spread argument - let expr = self.parse_expression()?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression()?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if self.check(&Token::RightParen) { - break; - } - } - } - - self.consume(&Token::RightParen, "Expected ')' after arguments")?; - - Ok(args) - } - - // Property key parsing for object literals, class members, and destructuring patterns - pub fn parse_property_key(&mut self) -> ParseResult { - if self.match_token(&Token::LeftBracket) { - let expr = self.parse_expression()?; - self.consume(&Token::RightBracket, "Expected ']' after computed property key")?; - Ok(PropertyKey::Computed(expr)) - } else if self.match_token(&Token::Hash) { - let name = self.expect_identifier("Expected private identifier name")?; - Ok(PropertyKey::PrivateIdentifier(name)) - } else if let Some(Token::StringLiteral(_)) = self.peek() { - if let Token::StringLiteral(s) = self.advance().unwrap().clone() { - Ok(PropertyKey::StringLiteral(s.into_boxed_str())) - } else { - unreachable!() - } - } else if let Some(Token::NumberLiteral(_)) = self.peek() { - if let Token::NumberLiteral(n) = self.advance().unwrap() { - Ok(PropertyKey::NumericLiteral(*n)) - } else { - unreachable!() - } - } else { - let name = self.expect_identifier("Expected property name 0")?; - Ok(PropertyKey::Identifier(name)) - } - } -} diff --git a/src/parser/error.rs b/src/parser/error.rs index d6d7ad1..67ab01b 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,5 +1,5 @@ use crate::lexer::{LexerError, TemplatePart, Token}; -use super::core::Parser; +use super::parser::Parser; use std::fmt; #[derive(Debug, Clone)] @@ -19,12 +19,12 @@ impl ParserError { let context_stack = parser.get_context_stack_info(); - let token = parser.peek().unwrap_or_else(|| &Token::EOF); + let token = parser.peek(); // Infer token length based on its type let token_length = match token { - Token::EOF => 0, + Token::EOS => 0, Token::LeftParen | Token::RightParen @@ -85,7 +85,6 @@ impl ParserError { | Token::GreaterGreaterEqual | Token::GreaterGreaterGreater | Token::LessLessEqual - | Token::AmpersandAmpersand | Token::AmpersandAmpersandEqual | Token::PipePipeEqual | Token::Ellipsis @@ -154,12 +153,12 @@ impl ParserError { Token::Constructor => 11, // Literals - Token::Identifier(name) => name.len(), - Token::StringLiteral(value) => value.len() + 2, // Account for quotation marks - Token::NumberLiteral(value) => value.to_string().len(), - Token::BigIntLiteral(value) => value.len() + 1, // Account for the trailing 'n' - Token::RegExpLiteral(pattern, flags) => pattern.len() + flags.len() + 2, // Account for the slashes - Token::TemplateLiteral(parts) => parts.iter().fold(2, |acc, part| { + Token::Identifier(ref name) => name.len(), + Token::StringLiteral(ref value) => value.len() + 2, // Account for quotation marks + Token::NumberLiteral(ref value) => value.to_string().len(), + Token::BigIntLiteral(ref value) => value.len() + 1, // Account for the trailing 'n' + Token::RegExpLiteral(ref pattern, ref flags) => pattern.len() + flags.len() + 2, // Account for the slashes + Token::TemplateLiteral(ref parts) => parts.iter().fold(2, |acc, part| { acc + match part { TemplatePart::String(s) => s.len(), TemplatePart::Expression(e) => e.len(), @@ -168,7 +167,7 @@ impl ParserError { }; - let (line, column) = parser.get_current_position(); + let [line, column] = parser.get_current_position(); let col = column - token_length; @@ -200,11 +199,6 @@ impl ParserError { pub fn at_current(parser: &Parser, message: &str) -> Self { Self::new(parser, message) } - - /// Create a parser error from the current token with a mutable reference - pub fn at_current_mut(parser: &mut Parser, message: &str) -> Self { - Self::new(&*parser, message) - } } @@ -250,7 +244,7 @@ impl fmt::Display for ParserError { } // Print current token information if available - if !matches!(self.current_token, Token::EOF) { + if !matches!(self.current_token, Token::EOS) { writeln!(f, "\nCurrent token: {:#?}", self.current_token)?; } @@ -292,7 +286,7 @@ impl From for ParserError { source_line: None, source_span: None, context_stack: Vec::new(), - current_token: Token::EOF, + current_token: Token::EOS, } } } @@ -344,23 +338,3 @@ fn extract_source_line_with_context(source: &str, line_number: usize, column: us } pub type ParseResult = Result; - -#[macro_export] -macro_rules! parser_error_at_current { - ($self:expr, $message:expr) => { - $crate::parser::error::ParserError::at_current($self, $message) - }; - ($self:expr, $fmt:expr, $($arg:tt)*) => { - $crate::parser::error::ParserError::at_current($self, &format!($fmt, $($arg)*)) - }; -} - -#[macro_export] -macro_rules! parser_error_at_current_mut { - ($self:expr, $message:expr) => { - $crate::parser::error::ParserError::at_current_mut($self, $message) - }; - ($self:expr, $fmt:expr, $($arg:tt)*) => { - $crate::parser::error::ParserError::at_current_mut($self, &format!($fmt, $($arg)*)) - }; -} diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs deleted file mode 100644 index d477f66..0000000 --- a/src/parser/expressions.rs +++ /dev/null @@ -1,865 +0,0 @@ -use super::prelude::*; - -use crate::ast::*; -use crate::lexer::{Token, TemplatePart, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; - -// Define operator precedence levels and associativity -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub enum Precedence { - None, - Comma, // , - Assignment, // = += -= etc. - Conditional, // ?: - LogicalOr, // || ?? - LogicalAnd, // && - BitwiseOr, // | - BitwiseXor, // ^ - BitwiseAnd, // & - Equality, // == != === !== - Relational, // < > <= >= in instanceof - Shift, // << >> >>> - Additive, // + - - Multiplicative, // * / % - Exponentiation, // ** - Prefix, // ! ~ + - ++ -- typeof void delete - Postfix, // ++ -- - Call, // . [] () - Primary -} - -impl Parser { - - pub fn parse_expression(&mut self) -> ParseResult { - self.parse_expression_with_precedence(Precedence::Comma) - } - - pub fn parse_expression_with_precedence(&mut self, precedence: Precedence) -> ParseResult { - // Parse prefix expressions - let mut expr = match self.peek() { - // Unary prefix operators - Some(Token::Bang) | - Some(Token::Tilde) | - Some(Token::Plus) | - Some(Token::Minus) | - Some(Token::PlusPlus) | - Some(Token::MinusMinus) | - Some(Token::Typeof) | - Some(Token::Void) | - Some(Token::Delete) => { - self.advance(); - let operator = match self.peek_previous().unwrap() { - Token::Bang => UnaryOperator::Not, - Token::Tilde => UnaryOperator::BitwiseNot, - Token::Plus => UnaryOperator::Plus, - Token::Minus => UnaryOperator::Minus, - Token::PlusPlus => UnaryOperator::Increment, - Token::MinusMinus => UnaryOperator::Decrement, - Token::Typeof => UnaryOperator::Typeof, - Token::Void => UnaryOperator::Void, - Token::Delete => UnaryOperator::Delete, - _ => unreachable!(), - }; - - let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; - - Expression::Unary { - operator, - argument: Box::new(argument), - prefix: true, - } - }, - // Await expression - Some(Token::Await) if self.allows_await() => { - self.advance(); - let argument = self.parse_expression_with_precedence(Precedence::Prefix)?; - Expression::Await(Box::new(argument)) - }, - // Yield expression - Some(Token::Yield) if self.allows_yield() => { - self.advance(); - let delegate = self.match_token(&Token::Star); - - // Yield can be used without an argument - let argument = if self.check(&Token::Semicolon) || - self.check(&Token::RightBrace) || - self.check(&Token::Comma) || - self.check(&Token::RightParen) || - self.check(&Token::Colon) || - self.is_at_end() { - None - } else { - Some(Box::new(self.parse_expression_with_precedence(Precedence::Assignment)?)) - }; - - Expression::Yield { - argument, - delegate, - } - }, - // Primary expressions - Some(Token::This) => { - self.advance(); - Expression::This - }, - Some(Token::Arguments) => { - self.advance(); - Expression::Identifier("arguments".into()) - }, - Some(Token::Super) => { - self.advance(); - Expression::Super - }, - Some(Token::Null) => { - self.advance(); - Expression::Literal(Literal::Null) - }, - Some(Token::Undefined) => { - self.advance(); - Expression::Literal(Literal::Undefined) - }, - Some(Token::True) => { - self.advance(); - Expression::Literal(Literal::Boolean(true)) - }, - Some(Token::False) => { - self.advance(); - Expression::Literal(Literal::Boolean(false)) - }, - Some(Token::NumberLiteral(n)) => { - let value = *n; - self.advance(); - Expression::Literal(Literal::Number(value)) - }, - Some(Token::StringLiteral(_)) => { - if let Token::StringLiteral(s) = &self.advance().unwrap() { - Expression::Literal(Literal::String(s.clone().into_boxed_str())) - } else { - unreachable!() - } - }, - Some(Token::RegExpLiteral(_, _)) => { - if let Token::RegExpLiteral(pattern, flags) = self.advance().unwrap().clone() { - Expression::Literal(Literal::RegExp { - pattern: pattern.into_boxed_str(), - flags: flags.into_boxed_str(), - }) - } else { - unreachable!() - } - }, - Some(Token::BigIntLiteral(_)) => { - if let Token::BigIntLiteral(s) = self.advance().unwrap().clone() { - Expression::Literal(Literal::BigInt(s.into_boxed_str())) - } else { - unreachable!() - } - }, - Some(Token::TemplateLiteral(_)) => { - if let Token::TemplateLiteral(parts) = self.advance().unwrap().clone() { - - let mut quasis = Vec::new(); - let mut expressions = Vec::new(); - - for (i, part) in parts.iter().enumerate() { - match part { - TemplatePart::String(s) => { - // Add the string part to quasis - quasis.push(s.clone().into_boxed_str()); - - // If this is the last part and it's a string, we need to ensure - // we have one more expression than quasis (as per JS spec) - if i == parts.len() - 1 && !expressions.is_empty() { - quasis.push("".into()); - } - }, - TemplatePart::Expression(expr_str) => { - // Create a temporary parser to parse the expression - let expr_str_clone = expr_str.clone(); - let mut temp_lexer = crate::lexer::Lexer::new(&expr_str_clone); - match temp_lexer.scan_tokens() { - Ok(tokens) => { - let mut temp_parser = Parser::new(tokens); - match temp_parser.parse_expression() { - Ok(expr) => expressions.push(expr), - Err(e) => { - return Err(parser_error_at_current!(self, "Invalid expression in template literal: {}", e.message)); - } - } - }, - Err(e) => { - return Err(parser_error_at_current!(self, "Error tokenizing expression in template literal: {}", e.message)); - } - } - - // If this is the last part and it's an expression, we need to add an empty string - if i == parts.len() - 1 { - quasis.push("".into()); - } - } - } - } - - // Validate that we have one more quasi than expressions (as per JS spec) - if quasis.len() != expressions.len() + 1 { - // Add an empty string at the end if needed - if quasis.len() == expressions.len() { - quasis.push("".into()); - } else { - return Err(parser_error_at_current!(self, "Invalid template literal: expected {} quasis but got {}", expressions.len() + 1, quasis.len())); - } - } - - Expression::TemplateLiteral { quasis, expressions } - } else { - unreachable!("Expected TemplateLiteral token") - } - }, - // TODO everything but Identifier hoists matches below, need a better approach to var as = e.class; scenarios - Some(Token::Identifier(_)) => { - let name = self.expect_identifier("Expected identifier in expression")?; - if self.check(&Token::Arrow) { - let param = Expression::Identifier(name); - self.advance(); - return self.parse_arrow_function_body(vec![param], false); - } - Expression::Identifier(name) - }, - Some(Token::LeftParen) => { - //println!("In ( {:#?}", self.peek()); - self.advance(); // Consume the '(' - - // Handle empty parameter list: () => ... - if self.match_token(&Token::RightParen) { - return if self.match_token(&Token::Arrow) { - self.parse_arrow_function_body(vec![], false) - } else { - Err(parser_error_at_current!(self, "Unexpected empty parentheses '()'")) - }; - } - - //println!("Here 1 current token {:#?}", self.peek()); - let mut expr = self.parse_expression()?; - - //println!("Here 2"); - // Handle single-parameter or nested parentheses: (x) => ..., ((expr)) - if self.match_token(&Token::RightParen) { - if self.match_token(&Token::Arrow) { - let params = match expr { - //Expression::Identifier(_) => vec![expr], - //Expression::Sequence(seq) => seq, - Expression::Sequence(seq) => seq,//self.flatten_sequence(seq), - _ => vec![expr], - }; - return self.parse_arrow_function_body(params, false); - } - } else if self.check(&Token::Comma) { - - //println!("Some comma {:#?}", self.peek()); - - // Handle comma-separated parameters: (a, b, c) - let mut params = vec![expr]; - while self.match_token(&Token::Comma) { - //println!("Current token {:#?}", self.peek()); - params.push(self.parse_expression_with_precedence(Precedence::Assignment)?); - } - self.consume(&Token::RightParen, "Expected ')' after parameters")?; - return if self.match_token(&Token::Arrow) { - self.parse_arrow_function_body(params, false) - } else { - Ok(Expression::Sequence(params)) - }; - } else { - self.consume(&Token::RightParen, "Expected ')' after expression")?; - } - - // Handle expressions after ')': ., [ or ( - if self.match_token(&Token::Dot) { - expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - } else if self.check(&Token::LeftBracket) || self.check(&Token::LeftParen) { - expr = self.parse_expression_with_precedence(Precedence::Call)?; - } - - expr - }, - Some(Token::LeftBracket) => { - //println!("I am here"); - self.advance(); - let mut elements = Vec::new(); - while !self.check(&Token::RightBracket) && !self.is_at_end() { - if self.match_token(&Token::Comma) { - elements.push(ArrayElement::Hole); - } else { - if self.match_token(&Token::Ellipsis) { - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(ArrayElement::Spread(expr)); - } else { - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - elements.push(ArrayElement::Expression(expr)); - } - if !self.check(&Token::RightBracket) { - self.consume(&Token::Comma, "Expected ',' after array element")?; - } - } - } - self.consume(&Token::RightBracket, "Expected ']' after array elements")?; - Expression::Array(elements) - }, - Some(Token::LeftBrace) => { - self.advance(); - let mut properties = Vec::new(); - while !self.check(&Token::RightBrace) && !self.is_at_end() { - if self.match_token(&Token::Ellipsis) { - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - properties.push(ObjectProperty::Spread(expr)); - } else { - let is_async = self.match_token(&Token::Async); - let is_generator = self.match_token(&Token::Star); - let mut kind = PropertyKind::Init; - if !is_async && !is_generator { - if self.check(&Token::Get) || self.check(&Token::Set) { - let is_property_name = if let Some(next_token) = self.peek_next(1) { - matches!(next_token, Token::Colon) - } else { - false - }; - if !is_property_name { - if self.match_token(&Token::Get) { - kind = PropertyKind::Get; - } else if self.match_token(&Token::Set) { - kind = PropertyKind::Set; - } - } - } - } - - // Use with_context for property key parsing - let key = self.with_context(LexicalContext::PropertyKey, |parser| { - parser.parse_property_key() - })?; - - let computed = matches!(key, PropertyKey::Computed(_)); - - // Method definition - if self.check(&Token::LeftParen) || is_generator || is_async { - let method_kind = match kind { - PropertyKind::Get => MethodKind::Getter, - PropertyKind::Set => MethodKind::Setter, - _ => MethodKind::Method, - }; - - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_async, is_generator)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - properties.push(ObjectProperty::Method { - key, - value: MethodDefinition { - params, - body, - is_async, - is_generator, - }, - kind: method_kind, - computed, - }); - } else { - // Regular property - let shorthand = !computed && - !self.check(&Token::Colon) && - matches!(key, PropertyKey::Identifier(_)); - - let value = if shorthand { - if let PropertyKey::Identifier(name) = &key { - Expression::Identifier(name.clone()) - } else { - unreachable!() - } - } else { - self.consume(&Token::Colon, "Expected ':' after property name")?; - self.parse_expression_with_precedence(Precedence::Assignment)? - }; - - properties.push(ObjectProperty::Property { - key, - value, - kind, - computed, - shorthand, - }); - } - } - - if !self.check(&Token::RightBrace) { - //println!("Now have token {:#?}", self.peek()); - - self.consume(&Token::Comma, "Expected ',' after property")?; - - // Allow trailing comma - if self.check(&Token::RightBrace) { - break; - } - } else { - break; - } - } - - self.consume(&Token::RightBrace, "Expected '}' after object literal")?; - - Expression::Object(properties) - }, - - Some(Token::Function) => self.parse_function_expression()?, - Some(Token::Class) => self.parse_class_expression()?, - Some(Token::New) => { - self.advance(); // consume 'new' - - // Handle new.target meta property - if self.match_token(&Token::Dot) { - if let Some(Token::Identifier(name)) = self.peek().cloned() { - if name == "target" { - self.advance(); // consume 'target' - Expression::MetaProperty { - meta: "new".into(), - property: "target".into(), - } - } else { - return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); - } - } else { - return Err(parser_error_at_current!(self, "Expected 'target' after 'new.'")); - } - } else { - // Regular new expression - let callee = self.parse_expression_with_precedence(Precedence::Call)?; - - Expression::New(Box::new(callee)) - } - }, - Some(Token::Import) => { - self.advance(); // consume 'import' - self.consume(&Token::LeftParen, "Expected '(' after 'import'")?; - let source = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&Token::RightParen, "Expected ')' after import source")?; - - Expression::Import(Box::new(source)) - }, - Some(Token::Hash) => { - self.advance(); // consume '#' - let name = self.expect_identifier("Expected private identifier name")?; - Expression::PrivateName(name) - }, - Some(Token::Async) if self.is_async_function() => self.parse_async_function_expression()?, - _ => { - return Err(parser_error_at_current!(self, "Unexpected token in expression")); - } - }; - - // Parse infix and postfix expressions based on precedence - while !self.is_at_end() { - let current_precedence = match self.peek() { - Some(Token::Comma) => Precedence::Comma, - Some(Token::Question) => { - match self.peek_next(1) { - Some(Token::Dot) => Precedence::Call, - _ => Precedence::Conditional, - } - }, - Some(Token::Equal) | - Some(Token::PlusEqual) | - Some(Token::MinusEqual) | - Some(Token::StarEqual) | - Some(Token::SlashEqual) | - Some(Token::PercentEqual) | - Some(Token::StarStarEqual) | - Some(Token::AmpersandEqual) | - Some(Token::PipeEqual) | - Some(Token::CaretEqual) | - Some(Token::LessLessEqual) | - Some(Token::GreaterGreaterEqual) | - Some(Token::GreaterGreaterGreaterEqual) | - Some(Token::AmpersandAmpersandEqual) | - Some(Token::PipePipeEqual) | - Some(Token::QuestionQuestionEqual) => Precedence::Assignment, - Some(Token::PipePipe) | - Some(Token::QuestionQuestion) => Precedence::LogicalOr, - Some(Token::AmpersandAmpersand) => Precedence::LogicalAnd, - Some(Token::Pipe) => Precedence::BitwiseOr, - Some(Token::Caret) => Precedence::BitwiseXor, - Some(Token::Ampersand) => Precedence::BitwiseAnd, - Some(Token::EqualEqual) | - Some(Token::BangEqual) | - Some(Token::EqualEqualEqual) | - Some(Token::BangEqualEqual) => Precedence::Equality, - Some(Token::Less) | - Some(Token::LessEqual) | - Some(Token::Greater) | - Some(Token::GreaterEqual) | - Some(Token::In) | - Some(Token::InstanceOf) => Precedence::Relational, - Some(Token::LessLess) | - Some(Token::GreaterGreater) | - Some(Token::GreaterGreaterGreater) => Precedence::Shift, - Some(Token::Plus) | - Some(Token::Minus) => Precedence::Additive, - Some(Token::Star) | - Some(Token::Slash) | - Some(Token::Percent) => Precedence::Multiplicative, - Some(Token::StarStar) => Precedence::Exponentiation, - Some(Token::PlusPlus) | - Some(Token::MinusMinus) => Precedence::Postfix, - Some(Token::Dot) | - Some(Token::LeftBracket) | - Some(Token::LeftParen) | - Some(Token::QuestionDot) => Precedence::Call, - _ => Precedence::None, - }; - - if current_precedence == Precedence::None || precedence > current_precedence { - break; - } - // Handle postfix operators - if current_precedence == Precedence::Postfix { - if self.match_any(&[Token::PlusPlus, Token::MinusMinus]) { - if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. }) { - return Err(parser_error_at_current!(self, "Invalid left-hand side in postfix operation")); - } - - let operator = match self.peek_previous().unwrap() { - Token::PlusPlus => UnaryOperator::Increment, - Token::MinusMinus => UnaryOperator::Decrement, - _ => unreachable!(), - }; - - expr = Expression::Unary { - operator, - argument: Box::new(expr), - prefix: false, - }; - continue; - } - } - - // Handle infix operators - match current_precedence { - Precedence::Comma => { - self.advance(); // consume comma - - if !self.check(&Token::RightParen) { - let right = self.parse_expression_with_precedence(Precedence::Assignment)?; - if let Expression::Sequence(ref mut seq) = expr { - seq.push(right); - } else { - expr = Expression::Sequence(vec![expr, right]); - } - } - }, - Precedence::Assignment => { - // Match assignment operator - let op = if self.match_token(&Token::Equal) { - AssignmentOperator::Assign - } else if self.match_token(&Token::PlusEqual) { - AssignmentOperator::AddAssign - } else if self.match_token(&Token::MinusEqual) { - AssignmentOperator::SubtractAssign - } else if self.match_token(&Token::StarEqual) { - AssignmentOperator::MultiplyAssign - } else if self.match_token(&Token::SlashEqual) { - AssignmentOperator::DivideAssign - } else if self.match_token(&Token::PercentEqual) { - AssignmentOperator::ModuloAssign - } else if self.match_token(&Token::StarStarEqual) { - AssignmentOperator::ExponentAssign - } else if self.match_token(&Token::AmpersandEqual) { - AssignmentOperator::BitwiseAndAssign - } else if self.match_token(&Token::PipeEqual) { - AssignmentOperator::BitwiseOrAssign - } else if self.match_token(&Token::CaretEqual) { - AssignmentOperator::BitwiseXorAssign - } else if self.match_token(&Token::LessLessEqual) { - AssignmentOperator::LeftShiftAssign - } else if self.match_token(&Token::GreaterGreaterEqual) { - AssignmentOperator::RightShiftAssign - } else if self.match_token(&Token::GreaterGreaterGreaterEqual) { - AssignmentOperator::UnsignedRightShiftAssign - } else if self.match_token(&Token::AmpersandAmpersandEqual) { - AssignmentOperator::LogicalAndAssign - } else if self.match_token(&Token::PipePipeEqual) { - AssignmentOperator::LogicalOrAssign - } else if self.match_token(&Token::QuestionQuestionEqual) { - AssignmentOperator::NullishAssign - } else { - break; // No assignment operator found - }; - - // Validate left-hand side - if !matches!(expr, Expression::Identifier(_) | Expression::Member { .. } | Expression::Array(_) | Expression::Object(_)) { - return Err(parser_error_at_current!(self, "Invalid left-hand side in assignment")); - } - - let right = self.parse_expression_with_precedence(Precedence::Assignment)?; - - expr = Expression::Assignment { - operator: op, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Conditional => { - self.advance(); // consume ? - - // Check if this is part of optional chaining - if self.check(&Token::Dot) { - // This is optional chaining - self.advance(); // consume . - - // Now handle the optional chaining - if self.match_token(&Token::LeftBracket) { - let property = self.parse_expression()?; - self.consume(&Token::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: true, - }; - } else if self.match_token(&Token::LeftParen) { - let arguments = self.parse_arguments()?; - expr = Expression::Call { - callee: Box::new(expr), - arguments, - optional: true, - }; - } else { - let property = self.expect_identifier("Expected property name 2")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: true, - }; - } - } else { - // This is a ternary operator - let consequent = self.parse_expression_with_precedence(Precedence::Assignment)?; - self.consume(&Token::Colon, "Expected ':' in conditional expression")?; - let alternate = self.parse_expression_with_precedence(Precedence::Assignment)?; - - expr = Expression::Conditional { - test: Box::new(expr), - consequent: Box::new(consequent), - alternate: Box::new(alternate), - }; - } - }, - Precedence::LogicalOr => { - let operator = if self.match_token(&Token::PipePipe) { - LogicalOperator::Or - } else if self.match_token(&Token::QuestionQuestion) { - LogicalOperator::NullishCoalescing - } else { - break; - }; - - let right = self.parse_expression_with_precedence(Precedence::LogicalAnd)?; - - expr = Expression::Logical { - operator, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::LogicalAnd => { - self.advance(); // consume && - let right = self.parse_expression_with_precedence(Precedence::BitwiseOr)?; - - expr = Expression::Logical { - operator: LogicalOperator::And, - left: Box::new(expr), - right: Box::new(right), - }; - }, - // TODO implement -// Precedence::NullishCoalescing => { -// self.advance(); // consume '??' -// let right = self.parse_expression_with_precedence(Precedence::NullishCoalescing)?; - // -// expr = Expression::Logical { -// operator: LogicalOperator::NullishCoalescing, -// left: Box::new(expr), -// right: Box::new(right), -// }; -// }, - Precedence::BitwiseOr | - Precedence::BitwiseXor | - Precedence::BitwiseAnd | - Precedence::Equality | - Precedence::Relational | - Precedence::Shift | - Precedence::Additive | - Precedence::Multiplicative => { - self.advance(); - let token_type = self.peek_previous().unwrap().clone(); - - let operator = match token_type { - Token::Plus => BinaryOperator::Add, - Token::Minus => BinaryOperator::Subtract, - Token::Star => BinaryOperator::Multiply, - Token::Slash => BinaryOperator::Divide, - Token::Percent => BinaryOperator::Modulo, - Token::StarStar => BinaryOperator::Exponent, - Token::Pipe => BinaryOperator::BitwiseOr, - Token::Ampersand => BinaryOperator::BitwiseAnd, - Token::Caret => BinaryOperator::BitwiseXor, - Token::LessLess => BinaryOperator::LeftShift, - Token::GreaterGreater => BinaryOperator::RightShift, - Token::GreaterGreaterGreater => BinaryOperator::UnsignedRightShift, - Token::EqualEqual => BinaryOperator::Equal, - Token::BangEqual => BinaryOperator::NotEqual, - Token::EqualEqualEqual => BinaryOperator::StrictEqual, - Token::BangEqualEqual => BinaryOperator::StrictNotEqual, - Token::Less => BinaryOperator::LessThan, - Token::LessEqual => BinaryOperator::LessThanEqual, - Token::Greater => BinaryOperator::GreaterThan, - Token::GreaterEqual => BinaryOperator::GreaterThanEqual, - Token::In => BinaryOperator::In, - Token::InstanceOf => BinaryOperator::InstanceOf, - _ => { - return Err(parser_error_at_current!(self, "Unexpected token: {:?}", token_type)); - } - }; - - // Determine next precedence level - let next_precedence = match current_precedence { - Precedence::BitwiseOr => Precedence::BitwiseXor, - Precedence::BitwiseXor => Precedence::BitwiseAnd, - Precedence::BitwiseAnd => Precedence::Equality, - Precedence::Equality => Precedence::Relational, - Precedence::Relational => Precedence::Shift, - Precedence::Shift => Precedence::Additive, - Precedence::Additive => Precedence::Multiplicative, - Precedence::Multiplicative => Precedence::Exponentiation, - _ => unreachable!(), - }; - - let right = self.parse_expression_with_precedence(next_precedence)?; - - expr = Expression::Binary { - operator, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Exponentiation => { - self.advance(); // consume ** - // Exponentiation is right-associative - let right = self.parse_expression_with_precedence(Precedence::Exponentiation)?; - - expr = Expression::Binary { - operator: BinaryOperator::Exponent, - left: Box::new(expr), - right: Box::new(right), - }; - }, - Precedence::Call => { - - //println!("In call {:#?}", self.peek()); - - if self.match_token(&Token::Dot) { - - let property = self.with_context(LexicalContext::MemberAccess, |parser| { - if let Some(Token::Identifier(name)) = parser.peek().cloned() { - parser.advance(); - return Ok(name.into_boxed_str()) - } else { - return Err(parser_error_at_current!(parser, "Expected property name 3")); - }; - })?; - - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: false, - }; - } else if self.match_token(&Token::LeftBracket) { - //println!("This case"); - // Member access with bracket notation - let property = self.parse_expression()?; - self.consume(&Token::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: false, - }; - } else if self.match_token(&Token::QuestionDot) { - // Optional chaining - if self.match_token(&Token::LeftBracket) { - let property = self.parse_expression()?; - self.consume(&Token::RightBracket, "Expected ']' after computed property")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(property), - computed: true, - optional: true, - }; - } else { - let property = self.expect_identifier("Expected property name 4")?; - expr = Expression::Member { - object: Box::new(expr), - property: Box::new(Expression::Identifier(property)), - computed: false, - optional: true, - }; - } - } else if self.match_token(&Token::LeftParen) { - - // Function call - let mut args = Vec::new(); - - if !self.check(&Token::RightParen) { - loop { - if self.match_token(&Token::Ellipsis) { - // Spread argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Spread(expr)); - } else { - // Regular argument - let expr = self.parse_expression_with_precedence(Precedence::Assignment)?; - args.push(Argument::Expression(expr)); - } - - if !self.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if self.check(&Token::RightParen) { - break; - } - } - } - - self.consume(&Token::RightParen, "Expected ')' after arguments 2")?; - - expr = Expression::Call { - callee: Box::new(expr), - arguments: args, - optional: false, - }; - } else { - break; - } - }, - _ => break, - } - } - - Ok(expr) - } - -} diff --git a/src/parser/functions.rs b/src/parser/functions.rs deleted file mode 100644 index a9ab036..0000000 --- a/src/parser/functions.rs +++ /dev/null @@ -1,192 +0,0 @@ -use super::prelude::*; - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - pub fn parse_function_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'function' - - let is_generator = self.match_token(&Token::Star); - let id = self.expect_identifier("Expected function name")?; - - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_generator, false)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - Ok(FunctionDeclaration { - id, - params, - body, - is_async: false, - is_generator, - }) - } - - pub fn parse_function_expression(&mut self) -> ParseResult { - self.advance(); // consume 'function' - - let is_generator = self.match_token(&Token::Star); - - // Optional function name for function expressions - let id = if matches!(self.peek(), Some(Token::Identifier(_))) { - Some(self.expect_identifier("Expected function name")?) - } else { - None - }; - - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_generator, false)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - Ok(Expression::Function { - id, - params, - body, - is_async: false, - is_generator, - }) - } - - pub fn parse_async_function_expression(&mut self) -> ParseResult { - self.advance(); // consume 'async' - self.advance(); // consume 'function' - - let is_generator = self.match_token(&Token::Star); - - // Optional function name for function expressions - let id = if matches!(self.peek(), Some(Token::Identifier(_))) { - Some(self.expect_identifier("Expected function name")?) - } else { - None - }; - - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_generator, true)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - Ok(Expression::Function { - id, - params, - body, - is_async: true, - is_generator, - }) - } - - pub fn parse_arrow_function_body(&mut self, params: Vec, is_async: bool) -> ParseResult { - // Create a new function body context with appropriate yield/await flags - - let body = if self.check(&Token::LeftBrace) { - // Block body - let body = self.parse_function_body(false, is_async)?; - ArrowFunctionBody::Block(body) - } else { - // Expression body - let function_body_context = LexicalContext::FunctionBody { - allow_yield: false, - allow_await: is_async - }; - - self.with_context(function_body_context, |parser| { - let expr = parser.parse_expression()?; - Ok(ArrowFunctionBody::Expression(Box::new(expr))) - })? - }; - - Ok(Expression::ArrowFunction { - params, - body, - is_async, - }) - } - - pub fn parse_async_function_declaration(&mut self) -> ParseResult { - self.advance(); // consume 'async' - self.consume(&Token::Function, "Expected 'function' after 'async'")?; - - let is_generator = self.match_token(&Token::Star); - let id = self.expect_identifier("Expected function name")?; - - let params = self.parse_function_params()?; - - self.consume(&Token::LeftBrace, "Expected '{' before function body")?; - let body = self.parse_function_body(is_generator, true)?; - self.consume(&Token::RightBrace, "Expected '}' after function body")?; - - Ok(FunctionDeclaration { - id, - params, - body, - is_async: true, - is_generator, - }) - } - - pub fn parse_function_params(&mut self) -> ParseResult> { - self.consume(&Token::LeftParen, "Expected '(' after function name")?; - - // Create parameter name context with current strict mode - let param_context = LexicalContext::ParameterName { - strict_mode: self.state.in_strict_mode - }; - - self.with_context(param_context, |parser| { - let mut params = Vec::new(); - - if !parser.check(&Token::RightParen) { - loop { - if parser.match_token(&Token::Ellipsis) { - // Rest parameter - let arg = parser.parse_pattern()?; - params.push(Expression::Spread(Box::new(arg))); - break; // Rest parameter must be the last one - } else { - params.push(parser.parse_pattern()?); - } - if !parser.match_token(&Token::Comma) { - break; - } - // Handle trailing comma - if parser.check(&Token::RightParen) { - break; - } - } - } - - parser.consume(&Token::RightParen, "Expected ')' after function parameters")?; - - Ok(params) - }) - } - - pub fn parse_function_body(&mut self, is_async: bool, is_generator: bool) -> ParseResult> { - let function_body_context = LexicalContext::FunctionBody { allow_yield: is_generator, allow_await: is_async }; - self.with_context(function_body_context, |parser| { - let mut body = Vec::new(); - while !parser.check(&Token::RightBrace) && !parser.is_at_end() { - body.push(parser.parse_statement()?); - } - Ok(body) - }) - } - - // Helper method to check if we're looking at an async function - pub fn is_async_function(&self) -> bool { - if let Some(Token::Async) = self.peek() { - if let Some(next_token) = self.peek_next(1) { - return matches!(next_token, Token::Function); - } - } - false - } -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e760443..ce8ff84 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,21 +1,15 @@ //! JavaScript parser module //! -//! This module contains the parser for JavaScript code. -//! It has been split into multiple files for better maintainability. +//! This module provides a composable, extensible parser for JavaScript code. -mod error; -mod state; -mod core; -mod expressions; -mod statements; -mod patterns; -mod functions; -mod classes; -mod modules; mod asi; - -pub use self::core::Parser; - -mod prelude; - -pub use prelude::*; \ No newline at end of file +mod error; +mod stream; +mod context; +mod parser; +mod combinator; + +// Public exports +pub use self::parser::Parser; +pub use self::combinator::ParserCombinator; +pub use self::error::{ParserError, ParseResult}; \ No newline at end of file diff --git a/src/parser/modules.rs b/src/parser/modules.rs deleted file mode 100644 index 73da792..0000000 --- a/src/parser/modules.rs +++ /dev/null @@ -1,384 +0,0 @@ -use super::prelude::*; - - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - pub fn parse_program(&mut self) -> ParseResult { - let mut body = Vec::new(); - - while !self.is_at_end() { - body.push(self.parse_statement()?); - } - - // Extract comments - let comments = std::mem::take(&mut self.comments); - - Ok(Program { - source_type: SourceType::Script, - body, - comments, - }) - } - - pub fn parse_module(&mut self) -> ParseResult { - // Set strict mode for modules - this is fine to keep as state - // since modules are always in strict mode - self.state.in_strict_mode = true; - - let mut body = Vec::new(); - - while !self.is_at_end() { - body.push(self.parse_statement()?); - } - - // Extract comments - let comments = std::mem::take(&mut self.comments); - - Ok(Program { - source_type: SourceType::Module, - body, - comments, - }) - } - - pub fn parse_import_statement(&mut self) -> ParseResult { - let start_token = self.advance().unwrap(); // consume 'import' - - // Handle import() expression vs import statement - if self.check(&Token::LeftParen) { - // This is an import() expression, not an import statement - // Rewind and parse as expression statement - self.current -= 1; - return self.parse_expression_statement(); - } - - let mut specifiers = Vec::new(); - let mut source: Option> = None; - - // Handle different import forms - if matches!(self.peek(), Some(Token::StringLiteral(_))) { - // import "module-name"; (side-effect import) - source = self.parse_module_source()?; - } else if self.match_token(&Token::Star) { - // import * as name from "module-name"; (namespace import) - specifiers.push(self.parse_namespace_import()?); - source = self.parse_from_clause()?; - } else { - // import defaultExport, { named1, named2 } from "module-name"; - // or just { named1, named2 } from "module-name"; - - // Check for default import - if !self.check(&Token::LeftBrace) && !self.check(&Token::From) { - specifiers.push(self.parse_default_import()?); - - // Optional comma before named imports - if self.match_token(&Token::Comma) && !self.check(&Token::From) { - // Continue to named imports - } else if !self.check(&Token::From) { - // If no comma and not 'from', it's an error - return Err(parser_error_at_current!(self, "Expected ',' or 'from' after default import")); - } - } - - // Named imports - if self.match_token(&Token::LeftBrace) { - let named_imports = self.parse_named_imports()?; - specifiers.extend(named_imports); - } - - // Module source - if !specifiers.is_empty() { - source = self.parse_from_clause()?; - } else { - return Err(parser_error_at_current!(self, "Expected import specifiers")); - } - } - - // Parse import assertions if present - let assertions = if self.match_token(&Token::With) { - self.parse_import_assertions()? - } else { - Vec::new() - }; - - self.consume(&Token::Semicolon, "Expected ';' after import statement")?; - - if let Some(src) = source { - Ok(Statement::Import { - specifiers, - source: src, - assertions, - }) - } else { - Err(parser_error_at_current!(self, "Missing module source in import statement")) - } - } - - // Helper method to parse a module source string - pub fn parse_module_source(&mut self) -> ParseResult>> { - if let Token::StringLiteral(s) = self.advance().unwrap().clone() { - Ok(Some(s.into_boxed_str())) - } else { - Err(parser_error_at_current!(self, "Expected string literal for module source")) - } - } - - // Helper method to parse the 'from "module-name"' part - pub fn parse_from_clause(&mut self) -> ParseResult>> { - self.consume(&Token::From, "Expected 'from' after import specifiers")?; - self.parse_module_source() - } - - // Helper method to parse namespace import: * as name - pub fn parse_namespace_import(&mut self) -> ParseResult { - self.consume(&Token::As, "Expected 'as' after '*'")?; - // Use ImportExport context for parsing the identifier - let local = self.with_context(LexicalContext::ImportExport, |parser| { - parser.expect_identifier("Expected namespace import name") - })?; - - Ok(ImportSpecifier::Namespace(local)) - } - - // Helper method to parse default import: defaultExport - pub fn parse_default_import(&mut self) -> ParseResult { - // Use ImportExport context for parsing the identifier - let local = self.with_context(LexicalContext::ImportExport, |parser| { - parser.expect_identifier("Expected default import name") - })?; - - Ok(ImportSpecifier::Default(local)) - } - - // Helper method to parse named imports: { name1, name2 as alias2 } - pub fn parse_named_imports(&mut self) -> ParseResult> { - self.with_context(LexicalContext::ImportExport, |parser| { - let mut specifiers = Vec::new(); - - if !parser.check(&Token::RightBrace) { - loop { - let imported = parser.expect_identifier("Expected imported name")?; - - let local = if parser.match_token(&Token::As) { - parser.expect_identifier("Expected local name after 'as'")? - } else { - imported.clone() - }; - - specifiers.push(ImportSpecifier::Named { - imported, - local, - }); - - if !parser.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if parser.check(&Token::RightBrace) { - break; - } - } - } - - parser.consume(&Token::RightBrace, "Expected '}' after named imports")?; - Ok(specifiers) - }) - } - - pub fn parse_import_assertions(&mut self) -> ParseResult> { - self.consume(&Token::LeftBrace, "Expected '{' after 'with'")?; - - let mut assertions = Vec::new(); - - if !self.check(&Token::RightBrace) { - loop { - // Use ImportExport context for parsing assertion keys - let key = self.with_context(LexicalContext::ImportExport, |parser| { - parser.expect_identifier("Expected assertion key") - })?; - - self.consume(&Token::Colon, "Expected ':' after assertion key")?; - - let value = if let Token::StringLiteral(s) = self.advance().unwrap().clone() { - s.into_boxed_str() - } else { - return Err(parser_error_at_current!(self, "Expected string literal for assertion value")); - }; - - assertions.push(ImportAssertion { key, value }); - - if !self.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if self.check(&Token::RightBrace) { - break; - } - } - } - - self.consume(&Token::RightBrace, "Expected '}' after import assertions")?; - - Ok(assertions) - } - - pub fn parse_export_statement(&mut self) -> ParseResult { - let start_token = self.advance().unwrap().clone(); // consume 'export' - - // Handle export * from "module" or export * as name from "module" - if self.match_token(&Token::Star) { - return self.parse_export_all(&start_token); - } - - // Handle export default ... - if self.match_token(&Token::Default) { - return self.parse_export_default(&start_token); - } - - // Handle export declaration (var, let, const, function, class) - if self.is_declaration_start() { - return self.parse_export_declaration(&start_token); - } - - // Handle export { ... } [from "..."] - if self.match_token(&Token::LeftBrace) { - return self.parse_export_named_specifiers(&start_token); - } - - // If we get here, it's an invalid export statement - Err(parser_error_at_current!(self, "Invalid export statement. Expected '*', default, declaration, or named exports")) - } - - // Helper method for export * from "module" or export * as name from "module" - pub fn parse_export_all(&mut self, start_token: &Token) -> ParseResult { - // Use ImportExport context for parsing the exported name - let exported = if self.match_token(&Token::As) { - Some(self.with_context(LexicalContext::ImportExport, |parser| { - parser.expect_identifier("Expected exported name after 'as'") - })?) - } else { - None - }; - - if !self.match_token(&Token::From) { - return Err(parser_error_at_current!(self, "Expected 'from' after export *")); - } - - let source = self.parse_module_source()? - .ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?; - - self.consume(&Token::Semicolon, "Expected ';' after export statement")?; - - Ok(Statement::Export(ExportDeclaration::All { source, exported })) - } - - // Helper method for export default ... - pub fn parse_export_default(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&Token::Function) { - let func_decl = self.parse_function_declaration()?; - ExportDefaultDeclaration::Function(func_decl) - } else if self.check(&Token::Class) { - let class_decl = self.parse_class_declaration()?; - ExportDefaultDeclaration::Class(class_decl) - } else if self.check(&Token::Async) && self.is_async_function() { - // Handle async function - let func_decl = self.parse_async_function_declaration()?; - ExportDefaultDeclaration::Function(func_decl) - } else { - // export default expression; - let expr = self.parse_expression()?; - self.consume(&Token::Semicolon, "Expected ';' after export default expression")?; - ExportDefaultDeclaration::Expression(expr) - }; - - Ok(Statement::Export(ExportDeclaration::Default(Box::new(declaration)))) - } - - // Helper method for export declaration - pub fn parse_export_declaration(&mut self, start_token: &Token) -> ParseResult { - let declaration = if self.check(&Token::Function) { - Declaration::Function(self.parse_function_declaration()?) - } else if self.check(&Token::Class) { - Declaration::Class(self.parse_class_declaration()?) - } else if self.check(&Token::Async) && self.is_async_function() { - Declaration::Function(self.parse_async_function_declaration()?) - } else if self.check(&Token::Var) || self.check(&Token::Let) || self.check(&Token::Const) { - Declaration::Variable(self.parse_variable_declaration()?) - } else { - return Err(parser_error_at_current!(self, "Expected declaration in export statement")); - }; - - Ok(Statement::Export(ExportDeclaration::Named { - declaration: Some(Box::new(declaration)), - specifiers: Vec::new(), - source: None, - })) - } - - // Helper method for export { ... } [from "..."] - pub fn parse_export_named_specifiers(&mut self, start_token: &Token) -> ParseResult { - let specifiers = self.parse_export_specifiers()?; - - // Optional from clause - let source = if self.match_token(&Token::From) { - Some(self.parse_module_source()?.ok_or_else(|| super::error::ParserError::at_current(self, "Expected string literal for module source"))?) - } else { - None - }; - - self.consume(&Token::Semicolon, "Expected ';' after export statement")?; - - Ok(Statement::Export(ExportDeclaration::Named { - declaration: None, - specifiers, - source, - })) - } - - // Helper method to parse export specifiers: { name1, name2 as alias2 } - pub fn parse_export_specifiers(&mut self) -> ParseResult> { - self.with_context(LexicalContext::ImportExport, |parser| { - let mut specifiers = Vec::new(); - - if !parser.check(&Token::RightBrace) { - loop { - let local = parser.expect_identifier("Expected exported identifier")?; - let exported = if parser.match_token(&Token::As) { - parser.expect_identifier("Expected exported name after 'as'")? - } else { - local.clone() - }; - specifiers.push(ExportSpecifier { local, exported }); - - if !parser.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if parser.check(&Token::RightBrace) { - break; - } - } - } - - parser.consume(&Token::RightBrace, "Expected '}' after export specifiers")?; - Ok(specifiers) - }) - } - - // Helper method to check if the current token starts a declaration - pub fn is_declaration_start(&self) -> bool { - self.check(&Token::Var) || - self.check(&Token::Let) || - self.check(&Token::Const) || - self.check(&Token::Function) || - self.check(&Token::Class) || - (self.check(&Token::Async) && self.is_async_function()) - } -} diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..225755f --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,243 @@ +use crate::ast::*; +use crate::lexer::{Token, LexicalContext}; +use super::error::{ParserError, ParseResult}; +use super::stream::TokenStream; +use super::combinator::ParserCombinator; +use super::context::ParserContext; +use crate::grammar::*; + +use std::borrow::Cow; + +pub struct TokenAccess<'a> { + token: Cow<'a, Token>, +} + +impl<'a> std::ops::Deref for TokenAccess<'a> { + type Target = Token; + + fn deref(&self) -> &Self::Target { + self.token.as_ref() + } +} + +/// JavaScript parser +pub struct Parser<'a> { + stream: TokenStream<'a>, + context: ParserContext, +} + +impl<'a> Parser<'a> { + pub fn new(tokens: &'a [(Token, [usize; 2])]) -> Self { + Self { + stream: TokenStream::new(tokens), + context: ParserContext::new(), + } + } + + // Main parsing methods + pub fn parse_module(&mut self) -> ParseResult { + ModuleParser::new().parse(self) + } + + pub fn parse_script(&mut self) -> ParseResult { + ScriptParser::new().parse(self) + } + + pub fn parse_expression(&mut self) -> ParseResult { + ExpressionParser::new().parse(self) + } + + pub fn parse_statement(&mut self) -> ParseResult { + StatementParser::new().parse(self) + } + + // Source handling + pub fn attach_source(&mut self, source: &'a str) { + self.stream.attach_source(source); + } + + pub fn get_source_text(&self) -> &str { + self.stream.get_source_text() + } + + // Error handling + pub fn error_at_current(&self, message: &str) -> ParserError { + ParserError::at_current(self, message) + } + + // TokenStream delegations + pub fn is_at_end(&self) -> bool { + self.stream.is_at_end() + } + + /* + #[inline(always)] + fn peek_internal(&self) -> Option> { + self.stream.peek().map(|token| { + let cow = if !matches!(token, Token::Identifier(_)) && self.current_context().allows_token_as_identifier(token) { + if let Some(text) = token.keyword_text() { + Cow::Owned(Token::Identifier(text.to_string())) + } else { + Cow::Borrowed(token) + } + } else { + Cow::Borrowed(token) + }; + + TokenAccess { token: cow } + }) + + } + */ + + pub fn peek(&self) -> &Token { + self.stream.peek() + } + + pub fn peek_previous(&self) -> &Token { + self.stream.peek_previous() + } + + pub fn peek_next(&self, offset: usize) -> &Token { + self.stream.peek_next(offset) + } + + //pub fn peek_position(&self) -> [usize; 2] { + // self.stream.peek_position() + //} + + pub fn advance(&mut self) -> bool { + self.stream.advance() + } + + pub fn check(&self, token_type: &Token) -> bool { + self.stream.check(token_type) + } + + pub fn consume(&mut self, token_type: &Token) -> bool { + self.stream.consume(token_type) + } + + pub fn previous_line_terminator(&self) -> bool { + self.stream.previous_line_terminator() + } + + pub fn save_position(&self) -> usize { + self.stream.save_position() + } + + pub fn restore_position(&mut self, position: usize) { + self.stream.restore_position(position) + } + + pub fn get_current_position(&self) -> [usize; 2] { + self.stream.peek_position() + } + + pub fn assert_consume(&mut self, token_type: &Token, message: &str) -> ParseResult<&Token> { + if self.consume(token_type) { + Ok(self.peek_previous()) + } else { + Err(self.error_at_current(message)) + } + } + + // ParserContext delegations + pub fn get_context_stack_info(&self) -> Vec { + self.context.get_context_stack_info() + } + + pub fn has_context(&self, predicate: F) -> bool + where + F: Fn(&LexicalContext) -> bool + { + self.context.has_context(predicate) + } + + pub fn is_in_function(&self) -> bool { + self.context.is_in_function() + } + + pub fn is_in_loop_body(&self) -> bool { + self.context.is_in_loop_body() + } + + pub fn is_in_switch(&self) -> bool { + self.context.is_in_switch() + } + + pub fn allows_yield(&self) -> bool { + self.context.allows_yield() + } + + pub fn allows_await(&self) -> bool { + self.context.allows_await() + } + + // TODO maybe + /* + + pub struct ContextGuard<'a, 'b> { + parser: &'a mut Parser<'b>, + } + + impl<'a, 'b> Drop for ContextGuard<'a, 'b> { + fn drop(&mut self) { + self.parser.pop_context(); + } + } + + impl<'a> Parser<'a> { + pub fn with_context_guard(&mut self, context: LexicalContext) -> ContextGuard<'_, 'a> { + self.push_context(context); + ContextGuard { parser: self } + } + } + */ + pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + self.context.push_context(context); + let result = f(self); + self.context.pop_context(); + result + } + + // Label management + pub fn add_label(&mut self, label: Box) { + self.context.labels.insert(label); + } + + pub fn remove_label(&mut self, label: &str) { + self.context.labels.remove(label); + } + + pub fn has_label(&self, label: &str) -> bool { + self.context.labels.contains(label) + } + + // Strict mode handling + pub fn set_strict_mode(&mut self, strict: bool) { + self.context.in_strict_mode = strict; + } + + pub fn is_strict_mode(&self) -> bool { + self.context.in_strict_mode + } + +} + + +/* + #[inline(always)] + fn coalesce_identifier<'t>(&self, token: &'t mut Token) { + // Only transform if not already an identifier and context allows it + if !matches!(token, Token::Identifier(_)) && self.current_context().allows_token_as_identifier(token) { + if let Some(text) = token.keyword_text() { + // Transform the token in place to an identifier + *token = Token::Identifier(text.to_string()); + } + } + } + */ \ No newline at end of file diff --git a/src/parser/patterns.rs b/src/parser/patterns.rs deleted file mode 100644 index 3124105..0000000 --- a/src/parser/patterns.rs +++ /dev/null @@ -1,169 +0,0 @@ -use super::prelude::*; - - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - pub fn parse_pattern(&mut self) -> ParseResult { - match self.peek() { - // Identifier pattern - Some(Token::Identifier(_)) => { - let name = self.expect_identifier("Expected identifier in pattern")?; - Ok(Expression::Identifier(name)) - }, - // Object pattern: { x, y } - Some(Token::LeftBrace) => { - println!("In pattern"); - self.advance(); // consume '{' - - let mut properties = Vec::new(); - - if !self.check(&Token::RightBrace) { - loop { - if self.match_token(&Token::Ellipsis) { - // Rest element - let argument = self.parse_pattern()?; - properties.push(ObjectProperty::Spread(argument)); - - // Rest element must be the last one - if !self.check(&Token::RightBrace) { - return Err(parser_error_at_current!(self, "Rest element must be the last element in object pattern")); - } - break; - } else { - // Regular property - let key = self.with_context(LexicalContext::PropertyKey, |parser| { - parser.parse_property_key() - })?; - - // Handle shorthand: { x } - let (value, computed, shorthand) = if !self.check(&Token::Colon) { - if let PropertyKey::Identifier(name) = &key { - // Shorthand property: { x } - let pattern = Expression::Identifier(name.clone()); - - // Check for default value: { x = 1 } - if self.match_token(&Token::Equal) { - let default = self.parse_expression()?; - (Expression::Assignment { - operator: AssignmentOperator::Assign, - left: Box::new(pattern), - right: Box::new(default), - }, false, true) - } else { - (pattern, false, true) - } - } else { - return Err(parser_error_at_current!(self, "Invalid shorthand property in object pattern")); - } - } else { - // Full syntax: { key: value } - self.advance(); // consume ':' - let pattern = self.parse_pattern()?; - - // Check for default value: { key: value = 1 } - if self.match_token(&Token::Equal) { - let default = self.parse_expression()?; - (Expression::Assignment { - operator: AssignmentOperator::Assign, - left: Box::new(pattern), - right: Box::new(default), - }, matches!(key, PropertyKey::Computed(_)), false) - } else { - (pattern, matches!(key, PropertyKey::Computed(_)), false) - } - }; - - properties.push(ObjectProperty::Property { - key, - value, - computed, - shorthand, - kind: PropertyKind::Init, // FIXME not true Get/Set - }); - } - - if !self.match_token(&Token::Comma) { - break; - } - - // Handle trailing comma - if self.check(&Token::RightBrace) { - break; - } - } - } - - self.consume(&Token::RightBrace, "Expected '}' after object pattern")?; - - Ok(Expression::Object(properties)) - }, - - // Array pattern: [x, y, z = 1] - Some(Token::LeftBracket) => { - self.advance(); // consume '[' - - let mut elements = Vec::new(); - - while !self.check(&Token::RightBracket) && !self.is_at_end() { - if self.match_token(&Token::Comma) { - // Elision (hole) - elements.push(ArrayElement::Hole); // TODO could use - } else { - if self.match_token(&Token::Ellipsis) { - // Rest element - let argument = self.parse_pattern()?; - elements.push(ArrayElement::Spread(Expression::Spread(Box::new(argument)))); - - // Rest element must be the last one - if !self.check(&Token::RightBracket) { - if self.match_token(&Token::Comma) { - if !self.check(&Token::RightBracket) { - return Err(parser_error_at_current!(self, "Rest element must be the last element in array pattern")); - } - } else { - return Err(parser_error_at_current!(self, "Expected ',' or ']' after rest element in array pattern")); - } - } - break; - } else { - // Regular element - let pattern = self.parse_pattern()?; - - // Check for default value: [x = 1] - if self.match_token(&Token::Equal) { - let default = self.parse_expression()?; - elements.push(ArrayElement::Expression(Expression::Assignment { - operator: AssignmentOperator::Assign, - left: Box::new(pattern), - right: Box::new(default), - })); - } else { - elements.push(ArrayElement::Expression(pattern)); - } - } - - if !self.check(&Token::RightBracket) { - self.consume(&Token::Comma, "Expected ',' after array pattern element")?; - } - } - } - - self.consume(&Token::RightBracket, "Expected ']' after array pattern")?; - - Ok(Expression::Array(elements)) - }, - - // Assignment pattern: x = 1 (handled by the caller) - - _ => { - Err(parser_error_at_current!(self, "Expected pattern")) - } - } - } - -} diff --git a/src/parser/prelude.rs b/src/parser/prelude.rs deleted file mode 100644 index 60a5d88..0000000 --- a/src/parser/prelude.rs +++ /dev/null @@ -1 +0,0 @@ -pub use crate::{parser_error_at_current, parser_error_at_current_mut}; \ No newline at end of file diff --git a/src/parser/state.rs b/src/parser/state.rs deleted file mode 100644 index 2e46997..0000000 --- a/src/parser/state.rs +++ /dev/null @@ -1,15 +0,0 @@ -use std::collections::HashSet; - -pub struct ParserState { - pub in_strict_mode: bool, - pub labels: HashSet>, -} - -impl ParserState { - pub fn new() -> Self { - Self { - in_strict_mode: false, - labels: HashSet::new(), - } - } -} diff --git a/src/parser/statements.rs b/src/parser/statements.rs deleted file mode 100644 index 86ea20d..0000000 --- a/src/parser/statements.rs +++ /dev/null @@ -1,836 +0,0 @@ -use super::prelude::*; - -use crate::ast::*; -use crate::lexer::{Token, LexicalContext}; -use super::error::ParseResult; -use super::core::Parser; - -impl Parser { - - // Variable declarations - pub fn parse_variable_declaration(&mut self) -> ParseResult { - - let kind = match self.advance() { - Some(Token::Var) => VariableKind::Var, - Some(Token::Let) => VariableKind::Let, - Some(Token::Const) => VariableKind::Const, - _ => unreachable!(), - }; - - // Parse first declarator (required) - let mut declarations = vec![self.parse_variable_declarator()?]; - - // Parse additional declarators separated by commas - while self.match_token(&Token::Comma) { - declarations.push(self.parse_variable_declarator()?); - } - - // Consume semicolon unless we're in a for-in/of loop context - //let current_context = self.current_context(); - //let is_in_loop_parameters = matches!(current_context, LexicalContext::LoopParameters); - - - if !self.is_in_loop_parameters() { - self.consume(&Token::Semicolon, "Expected ';' after variable declaration")?; - } - - Ok(VariableDeclaration { declarations, kind }) - } - - - pub fn parse_statement(&mut self) -> ParseResult { - match self.peek() { - // Empty statement (just a semicolon) - Some(Token::Semicolon) => { - self.advance(); - Ok(Statement::Empty) - }, - - // Block statement or literal object expression { ... } - Some(Token::LeftBrace) => { - match self.peek_previous() { - Some(Token::RightParen) => self.parse_block(), - _ => match self.peek_next(1) { - Some(Token::RightBrace) | Some(Token::LeftBracket) | Some(Token::Ellipsis) => self.parse_expression_statement(), - Some(Token::Identifier(_)) | Some(Token::StringLiteral(_)) => { - if let Some(Token::Colon) = self.peek_next(2) { - self.parse_expression_statement() - } else { - self.parse_block() - } - } - _ => self.parse_block(), - }, - } - }, - - // Declaration statements - Some(Token::Var) | Some(Token::Let) | Some(Token::Const) => self.parse_variable_statement(), - Some(Token::Function) => self.parse_function_statement(), - Some(Token::Class) => self.parse_class_statement(), - - // Control flow statements - Some(Token::If) => self.parse_if(), - Some(Token::Switch) => self.parse_switch(), - Some(Token::For) => self.parse_for(), - Some(Token::While) => self.parse_while(), - Some(Token::Do) => self.parse_do_while(), - - // Exception handling - Some(Token::Try) => self.parse_try(), - Some(Token::Throw) => self.parse_throw(), - - // Function control - Some(Token::Return) => self.parse_return(), - Some(Token::Break) => self.parse_break(), - Some(Token::Continue) => self.parse_continue(), - - // Module statements - Some(Token::Import) => self.parse_import_statement(), - Some(Token::Export) => self.parse_export_statement(), - - // Other statements - Some(Token::With) => self.parse_with(), - Some(Token::Debugger) => self.parse_debugger(), - - // Labeled statement - Some(Token::Identifier(_)) if self.is_label() => self.parse_labeled(), - - // Default: expression statement - _ => self.parse_expression_statement(), - } - } - - /// Parse a block statement: { statements... } - fn parse_block(&mut self) -> ParseResult { - self.consume(&Token::LeftBrace, "Expected '{'")?; - - let mut statements = Vec::new(); - - while !self.check(&Token::RightBrace) && !self.is_at_end() { - statements.push(self.parse_statement()?); - } - - self.consume(&Token::RightBrace, "Expected '}'")?; - - Ok(Statement::Block(statements)) - } - - /// Parse variable declarations as a statement - fn parse_variable_statement(&mut self) -> ParseResult { - let declaration = self.parse_variable_declaration()?; - Ok(Statement::Declaration(Declaration::Variable(declaration))) - } - - /// Parse function declaration as a statement - fn parse_function_statement(&mut self) -> ParseResult { - let declaration = self.parse_function_declaration()?; - Ok(Statement::Declaration(Declaration::Function(declaration))) - } - - /// Parse class declaration as a statement - fn parse_class_statement(&mut self) -> ParseResult { - let declaration = self.parse_class_declaration()?; - Ok(Statement::Declaration(Declaration::Class(declaration))) - } - - /// Parse if statement: if (condition) consequent else alternate - fn parse_if(&mut self) -> ParseResult { - self.advance(); // consume 'if' - self.consume(&Token::LeftParen, "Expected '(' after 'if'")?; - - let test = self.parse_expression()?; - self.consume(&Token::RightParen, "Expected ')' after if condition")?; - - let consequent = Box::new(self.parse_statement()?); - let alternate = self.match_token(&Token::Else) - .then(|| self.parse_statement().map(Box::new)) - .transpose()?; - - Ok(Statement::If { test, consequent, alternate }) - } - - /// Parse a single case in a switch statement - fn parse_switch_case(&mut self) -> ParseResult { - let test = if self.match_token(&Token::Case) { - // After 'case', we expect an expression - Some(self.parse_expression()?) - } else if self.match_token(&Token::Default) { - None - } else { - println!("Current token {:#?}", self.peek()); - return Err(parser_error_at_current!(self, "Expected 'case' or 'default'")); - }; - - self.consume(&Token::Colon, "Expected ':' after case value")?; - - let mut consequent = Vec::new(); - - // Parse statements until next case, default, or end of switch - while !self.check(&Token::Case) && - !self.check(&Token::Default) && - !self.check(&Token::RightBrace) && - !self.is_at_end() { - consequent.push(self.parse_statement()?); - } - - Ok(SwitchCase { test, consequent }) - } - - /// Parse try statement: try block [catch] [finally] - fn parse_try(&mut self) -> ParseResult { - self.advance(); // consume 'try' - - let block = Box::new(self.parse_block()?); - - // Parse optional catch clause - let handler = self.match_token(&Token::Catch) - .then(|| self.parse_catch_clause()) - .transpose()?; - - // Parse optional finally clause - let finalizer = self.match_token(&Token::Finally) - .then(|| self.parse_block().map(Box::new)) - .transpose()?; - - // Either catch or finally must be present - if handler.is_none() && finalizer.is_none() { - return Err(parser_error_at_current!(self, "Expected 'catch' or 'finally' after try block")); - } - - Ok(Statement::Try { block, handler, finalizer }) - } - - /// Parse catch clause: catch ([param]) block - fn parse_catch_clause(&mut self) -> ParseResult { - // Optional catch parameter - let param = self.match_token(&Token::LeftParen) - .then(|| { - // Attempt to parse the parameter identifier - if let Some(Token::Identifier(name)) = self.peek().cloned() { - self.advance(); // Consume the identifier - self.consume(&Token::RightParen, "Expected ')' after catch parameter")?; - Ok(Expression::Identifier(name.into_boxed_str())) - } else { - // If not an identifier, it's an error - Err(parser_error_at_current!(self, "Expected identifier for catch parameter")) - } - }) - .transpose()?; - - let body = Box::new(self.parse_block()?); - - Ok(CatchClause { param, body }) - - } - - /// Parse throw statement: throw expression; - fn parse_throw(&mut self) -> ParseResult { - self.advance(); // consume 'throw' - - // No line terminator allowed between throw and expression - if self.previous_line_terminator() { - return Err(parser_error_at_current!(self, "Illegal newline after throw")); - } - - let expr = self.parse_expression()?; - self.consume(&Token::Semicolon, "Expected ';' after throw statement")?; - - Ok(Statement::Throw(expr)) - } - - /// Parse with statement: with (object) statement - fn parse_with(&mut self) -> ParseResult { - self.advance(); // consume 'with' - - // Check if in strict mode - if self.state.in_strict_mode { - return Err(parser_error_at_current!(self, "'with' statements are not allowed in strict mode")); - } - - self.consume(&Token::LeftParen, "Expected '(' after 'with'")?; - - let object = self.parse_expression()?; - - self.consume(&Token::RightParen, "Expected ')' after with expression")?; - - let body = Box::new(self.parse_statement()?); - - Ok(Statement::With { object, body }) - } - - /// Parse debugger statement: debugger; - fn parse_debugger(&mut self) -> ParseResult { - self.advance(); // consume 'debugger' - - self.consume(&Token::Semicolon, "Expected ';' after debugger statement")?; - - Ok(Statement::Debugger) - } - - /// Parse labeled statement: identifier: statement - fn parse_labeled(&mut self) -> ParseResult { - let label = self.expect_identifier("Expected label name")?; - - self.consume(&Token::Colon, "Expected ':' after label")?; - - // Add label to the set of active labels - let label_exists = !self.state.labels.insert(label.clone()); - if label_exists { - return Err(parser_error_at_current!(self, "Label '{}' has already been declared", label)); - } - - // Parse the labeled statement - let body = Box::new(self.parse_statement()?); - - // Remove the label from the set - self.state.labels.remove(&label); - - Ok(Statement::Labeled { label, body }) - } - - /// Check if the current token is a label - fn is_label(&self) -> bool { - if let Some(Token::Identifier(_)) = self.peek() { - if let Some(next_token) = self.peek_next(1) { - return matches!(next_token, Token::Colon); - } - } - false - } - - /// Parse expression statement: expression; - pub fn parse_expression_statement(&mut self) -> ParseResult { - //println!("in parse_expression_statement"); - - let start_pos = self.current; - - let expr = self.parse_expression()?; - - //println!("in parse_expression_statement parsed {:#?}", expr); - - // Check for directive prologue - let is_directive = if let Expression::Literal(Literal::String(_)) = &expr { - // Only consider as directive if it's at the beginning of a function/program - // and is a simple string literal (not an expression) - start_pos == 0 || self.peek_previous().unwrap() == &Token::LeftBrace - } else { - false - }; - - if !self.is_in_loop_parameters() { - self.consume(&Token::Semicolon, "Expected ';' after expression statement")?; - } - - // If this is a "use strict" directive, update parser state - if is_directive { - if let Expression::Literal(Literal::String(value)) = &expr { - if value.as_ref() == "use strict" { - self.state.in_strict_mode = true; - } - } - } - - Ok(Statement::Expression(expr)) - } - - - /// Parse switch statement: switch (discriminant) { case/default... } - fn parse_switch(&mut self) -> ParseResult { - self.advance(); // consume 'switch' - self.consume(&Token::LeftParen, "Expected '(' after 'switch'")?; - - let discriminant = self.parse_expression()?; - self.consume(&Token::RightParen, "Expected ')' after switch expression")?; - - self.consume(&Token::LeftBrace, "Expected '{' before switch cases")?; - - // Use SwitchBody context instead of state flag - let cases = self.with_context(LexicalContext::SwitchBody, |parser| { - let mut inner_cases = Vec::new(); - let mut has_default = false; - - while !parser.check(&Token::RightBrace) && !parser.is_at_end() { - let case = parser.parse_switch_case()?; - - // Validate only one default case - if case.test.is_none() { - if has_default { - return Err(parser_error_at_current!(parser, "Multiple default clauses in switch statement")); - } - has_default = true; - } - - inner_cases.push(case); - } - - Ok(inner_cases) - })?; - - self.consume(&Token::RightBrace, "Expected '}' after switch cases")?; - - Ok(Statement::Switch { discriminant, cases }) - } - - /// Parse while statement: while (test) statement - fn parse_while(&mut self) -> ParseResult { - self.advance(); // consume 'while' - self.consume(&Token::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - self.consume(&Token::RightParen, "Expected ')' after while condition")?; - - // Use LoopBody context instead of state flag - let body = self.with_context(LexicalContext::LoopBody, |parser| { - parser.parse_statement().map(Box::new) - })?; - - Ok(Statement::Loop(LoopStatement::While { test, body })) - } - - /// Parse do-while statement: do statement while (test); - fn parse_do_while(&mut self) -> ParseResult { - self.advance(); // consume 'do' - - // Use LoopBody context instead of state flag - let body = self.with_context(LexicalContext::LoopBody, |parser| { - parser.parse_statement().map(Box::new) - })?; - - self.consume(&Token::While, "Expected 'while' after do block")?; - self.consume(&Token::LeftParen, "Expected '(' after 'while'")?; - - let test = self.parse_expression()?; - - self.consume(&Token::RightParen, "Expected ')' after while condition")?; - self.consume(&Token::Semicolon, "Expected ';' after do-while statement")?; - - Ok(Statement::Loop(LoopStatement::DoWhile { body, test })) - } - - /// Parse break statement: break [label]; - fn parse_break(&mut self) -> ParseResult { - self.advance(); // consume 'break' - - if !self.is_in_loop_body() && !self.is_in_switch() { - return Err(parser_error_at_current!(self, "'break' statement outside of loop or switch")); - } - - // Optional label - let label = if !self.check(&Token::Semicolon) && !self.previous_line_terminator() { - if let Some(Token::Identifier(name)) = self.peek().cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(parser_error_at_current!(self, "Undefined label '{}'", label_name)); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume(&Token::Semicolon, "Expected ';' after break statement")?; - - Ok(Statement::Break(label)) - } - - /// Parse continue statement: continue [label]; - fn parse_continue(&mut self) -> ParseResult { - self.advance(); // consume 'continue' - - // Check if we're in a loop using context method - if !self.is_in_loop_body() { - return Err(parser_error_at_current!(self, "'continue' statement outside of loop")); - } - - // Optional label - let label = if !self.check(&Token::Semicolon) && !self.previous_line_terminator() { - if let Some(Token::Identifier(name)) = self.peek().cloned() { - self.advance(); - - // Verify label exists - let label_name = name.into_boxed_str(); - if !self.state.labels.contains(&label_name) { - return Err(parser_error_at_current!(self, "Undefined label '{}'", label_name)); - } - - Some(label_name) - } else { - None - } - } else { - None - }; - - self.consume(&Token::Semicolon, "Expected ';' after continue statement")?; - - Ok(Statement::Continue(label)) - } - - /// Parse return statement: return [expression]; - fn parse_return(&mut self) -> ParseResult { - self.advance(); // consume 'return' - - // Check if we're in a function using context method - if !self.is_in_function() { - return Err(parser_error_at_current!(self, "'return' statement outside of function")); - } - - // Return with no value if semicolon or end of block - let argument = (!self.check(&Token::Semicolon) && - !self.check(&Token::RightBrace) && - !self.is_at_end() && - !self.previous_line_terminator()) - .then(|| self.parse_expression()) - .transpose()?; - - self.consume(&Token::Semicolon, "Expected ';' after return statement")?; - - Ok(Statement::Return(argument)) - } - - /// Parse for statement: for ([init]; [test]; [update]) statement - fn parse_for(&mut self) -> ParseResult { - self.advance(); // consume 'for' - - //println!("In for loop construct"); - - // Check for for-await-of - let is_await = self.match_token(&Token::Await); - - self.consume(&Token::LeftParen, "Expected '(' after 'for'")?; - - let result = self.with_context(LexicalContext::LoopParameters, |parser| { - if parser.check(&Token::Semicolon) { - parser.consume(&Token::Semicolon, "Expected ';' after for init")?; - - let test = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for test")?; - - let update = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: None, - test, - update, - body - }) - } else if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { - - //println!("- with initialization"); - - let init = parser.parse_variable_declaration()?; - - match parser.advance() { - Some(&Token::In) => { - let right = parser.parse_expression()?; - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - Ok(LoopStatement::ForIn { - left: ForInit::Declaration(init), - right, - body - }) - }, - Some(&Token::Of) => { - let right = parser.parse_expression()?; - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::ForOf { - left: ForInit::Declaration(init), - right, - body, - is_await - }) - }, - _ => { - - //println!("- as classical loop"); - - let test = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for test")?; - - let update = (!parser.check(&Token::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: Some(ForInit::Declaration(init)), - test, - update, - body - }) - }, - } - - } else if let Some(Token::Identifier(_)) = parser.peek() { - - match parser.peek_next(1) { - Some(&Token::In) => { - let left = Expression::Identifier(parser.expect_identifier("Expected for init name")?); - - parser.advance(); // consume 'in' - - let right = parser.parse_expression()?; - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - Ok(LoopStatement::ForIn { - left: ForInit::Pattern(left), - right, - body - }) - }, - Some(&Token::Of) => { - let left = Expression::Identifier(parser.expect_identifier("Expected for init name")?); - - parser.advance(); // consume 'in' - - let right = parser.parse_expression()?; - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - Ok(LoopStatement::ForOf { - left: ForInit::Pattern(left), - right, - body, - is_await - }) - }, - _ => { - //println!("classical for loop without initialisation"); - - //println!("current token is {:#?}", parser.peek()); - - let init = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for init")?; - - let test = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for test")?; - - //println!("Before I am here on {:#?}", parser.peek()); - - let update = (!parser.check(&Token::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - - //println!("After I am here on {:#?}", parser.peek()); - - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: init.map(|exp| ForInit::Pattern(exp)), - test, - update, - body - }) - }, - } - } else { - //println!("What did happen? {:#?}", parser.peek()); - //Err(parser_error_at_current!(parser, "unknown for construct")) - - - let init = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for init")?; - - let test = (!parser.check(&Token::Semicolon)) - .then(|| parser.parse_expression()) - .transpose()?; - - parser.consume(&Token::Semicolon, "Expected ';' after for test")?; - - //println!("Before I am here on {:#?}", parser.peek()); - - let update = (!parser.check(&Token::RightParen)) - .then(|| parser.parse_expression()) - .transpose()?; - - - //println!("After I am here on {:#?}", parser.peek()); - - parser.consume(&Token::RightParen, "Expected ')' after 'for'")?; - let body = parser.with_context(LexicalContext::LoopBody, |p| { - p.parse_statement().map(Box::new) - })?; - - Ok(LoopStatement::For { - init: init.map(|exp| ForInit::Pattern(exp)), - test, - update, - body - }) - } - - })?; - - Ok(Statement::Loop(result)) - - } - - /// Parse a variable declarator: pattern = initializer - pub fn parse_variable_declarator(&mut self) -> ParseResult { - // Get the current token position for error reporting - - let is_const = matches!(self.peek(), Some(&Token::Const)); - - - //println!("1 Now at {:#?}", self.peek()); - - // Parse the binding pattern (identifier, object pattern, or array pattern) - let id = self.parse_pattern()?; - - // Check if this is a const declaration without an initializer - // TODO fix self.tokens direct access - - //println!("2 Now at {:#?}", self.peek()); - - // Parse optional initializer - let init = if self.match_token(&Token::Equal) { - // Parse the initializer expression - Some(self.parse_expression()?) - } else { - // Const declarations must have initializers - if is_const { - return Err(parser_error_at_current!(self, "Missing initializer in const declaration")); - } - None - }; - - // Validate the pattern based on the current context - self.validate_binding_pattern(&id, is_const)?; - - Ok(VariableDeclarator { id, init }) - } - - /// Helper method to validate a binding pattern - fn validate_binding_pattern(&self, pattern: &Expression, is_const: bool) -> ParseResult<()> { - match pattern { - // For simple identifiers, check for strict mode restrictions - Expression::Identifier(name) => { - // Check for reserved words in strict mode - if self.state.in_strict_mode { - let reserved_words = ["eval", "arguments"]; - if reserved_words.contains(&name.as_ref()) { - return Err(parser_error_at_current!(self, "'{}' cannot be used as a variable name in strict mode", name)); - } - } - - // Check for other JavaScript reserved words that can't be variable names - let always_reserved = ["let", "yield", "await", "static", "implements", - "interface", "package", "private", "protected", "public"]; - if always_reserved.contains(&name.as_ref()) { - return Err(parser_error_at_current!(self, "'{}' is a reserved word and cannot be used as a variable name", name)); - } - }, - // For object patterns, recursively validate each property - Expression::Object(properties) => { - for property in properties { - match property { - ObjectProperty::Property { key, value, .. } => { - // Validate the value part of the property - self.validate_binding_pattern(value, is_const)?; - }, - ObjectProperty::Spread(expr) => { - // For spread elements, validate the spread target - if let Expression::Identifier(name) = expr { - self.validate_binding_pattern(&Expression::Identifier(name.clone()), is_const)?; - } else if let Expression::Object(_) | Expression::Array(_) = expr { - self.validate_binding_pattern(expr, is_const)?; - } else { - return Err(parser_error_at_current!(self, "Invalid rest element in object pattern")); - } - }, - _ => { - // Methods are not allowed in binding patterns - return Err(parser_error_at_current!(self, "Method definitions are not allowed in object patterns")); - } - } - } - }, - // For array patterns, recursively validate each element - Expression::Array(elements) => { - for element in elements { - match element { - ArrayElement::Expression(expr) => { - self.validate_binding_pattern(expr, is_const)?; - }, - ArrayElement::Spread(expr) => { - // For spread elements, validate the spread target - if let Expression::Identifier(name) = expr { - self.validate_binding_pattern(&Expression::Identifier(name.clone()), is_const)?; - } else if let Expression::Object(_) | Expression::Array(_) = expr { - self.validate_binding_pattern(expr, is_const)?; - } else { - return Err(parser_error_at_current!(self, "Invalid rest element in array pattern")); - } - }, - ArrayElement::Hole => { - // Holes are allowed in array patterns - } - } - } - }, - // Handle assignment patterns (default values) - Expression::Assignment { left, .. } => { - self.validate_binding_pattern(left, is_const)?; - }, - // Handle spread elements - Expression::Spread(inner) => { - self.validate_binding_pattern(inner, is_const)?; - }, - // Other expression types are not valid binding patterns - _ => { - return Err(parser_error_at_current!(self, "Invalid binding pattern in variable declaration")); - } - } - - Ok(()) -} - - -} diff --git a/src/parser/stream.rs b/src/parser/stream.rs new file mode 100644 index 0000000..dad0e72 --- /dev/null +++ b/src/parser/stream.rs @@ -0,0 +1,94 @@ +use crate::lexer::Token; + +pub struct TokenStream<'a> { + tokens: &'a [(Token, [usize; 2])], + current: usize, + source: &'a str, +} + +impl<'a> TokenStream<'a> { + pub fn new(tokens: &'a [(Token, [usize; 2])]) -> Self { + Self { + tokens, + current: 0, + source: "", + } + } + + pub fn attach_source(&mut self, source: &'a str) { + self.source = source; + } + + pub fn get_source_text(&self) -> &str { + self.source + } + + pub fn is_at_end(&self) -> bool { + matches!(self.peek(), Token::EOS) + } + + pub fn peek(&self) -> &Token { + &self.tokens[self.current].0 + } + + pub fn peek_position(&self) -> [usize; 2] { + self.tokens[self.current].1 + } + + pub fn peek_previous(&self) -> &Token { + if self.current > 0 { + &self.tokens[self.current - 1].0 + } else { + &Token::EOS + } + } + + pub fn peek_next(&self, offset: usize) -> &Token { + let index = self.current + offset; + if index < self.tokens.len() { + &self.tokens[index].0 + } else { + &Token::EOS + } + } + + pub fn advance(&mut self) -> bool { + if self.current < self.tokens.len() { + self.current += 1; + true + } else { + false + } + } + + pub fn check(&self, token_type: &Token) -> bool { + &self.tokens[self.current].0 == token_type + } + + pub fn consume(&mut self, token_type: &Token) -> bool { + if self.current < self.tokens.len() && &self.tokens[self.current].0 == token_type { + self.current += 1; + true + } else { + false + } + } + + pub fn previous_line_terminator(&self) -> bool { + if self.current > 0 && self.current < self.tokens.len() { + let prev_line = self.tokens[self.current - 1].1[0]; + let curr_line = self.tokens[self.current].1[0]; + prev_line < curr_line + } else { + false + } + } + + pub fn save_position(&self) -> usize { + self.current + } + + pub fn restore_position(&mut self, position: usize) { + self.current = position; + } +} From 294f30517812006d4ea375d5be8f6d12c300a772 Mon Sep 17 00:00:00 2001 From: Jan Cajthaml Date: Thu, 15 May 2025 08:27:31 +0200 Subject: [PATCH 7/7] add unparsing --- src/ast.rs | 6 + src/grammar/array.rs | 86 ++- src/grammar/await_expression.rs | 37 ++ src/grammar/call.rs | 49 -- src/grammar/class.rs | 257 +++++++-- src/grammar/declaration.rs | 632 ++++++++++++++++----- src/grammar/expression.rs | 737 +++++++++++++++++++++++-- src/grammar/function.rs | 211 ++++--- src/grammar/literal.rs | 98 +++- src/grammar/member.rs | 67 --- src/grammar/mod.rs | 14 +- src/grammar/module.rs | 38 +- src/grammar/new.rs | 49 +- src/grammar/object.rs | 161 +----- src/grammar/pattern.rs | 403 ++++++++++++-- src/grammar/property.rs | 246 +++++++++ src/grammar/script.rs | 31 ++ src/grammar/statement.rs | 945 +++++++++++++++++++++++++++----- src/grammar/this.rs | 15 +- src/grammar/yield_expression.rs | 52 ++ src/lexer/lexer.rs | 350 ++++++------ src/lexer/token.rs | 130 ++++- src/main.rs | 28 +- src/parser/asi.rs | 1 + src/parser/context.rs | 21 +- src/parser/error.rs | 19 +- src/parser/mod.rs | 5 - src/parser/parser.rs | 30 +- src/parser/stream.rs | 27 + src/unparser/combinator.rs | 5 + src/unparser/formatter.rs | 109 ++++ src/unparser/mod.rs | 7 + src/unparser/unparser.rs | 79 +++ 33 files changed, 3875 insertions(+), 1070 deletions(-) create mode 100644 src/grammar/await_expression.rs delete mode 100644 src/grammar/call.rs delete mode 100644 src/grammar/member.rs create mode 100644 src/grammar/property.rs create mode 100644 src/grammar/script.rs create mode 100644 src/grammar/yield_expression.rs create mode 100644 src/unparser/combinator.rs create mode 100644 src/unparser/formatter.rs create mode 100644 src/unparser/mod.rs create mode 100644 src/unparser/unparser.rs diff --git a/src/ast.rs b/src/ast.rs index ab272f4..5a48f40 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -112,6 +112,7 @@ pub struct PrivateIdentifier { pub enum Literal { StringLiteral(StringLiteral), BooleanLiteral(BooleanLiteral), + UndefinedLiteral(UndefinedLiteral), NullLiteral(NullLiteral), NumericLiteral(NumericLiteral), BigIntLiteral(BigIntLiteral), @@ -133,6 +134,11 @@ pub struct NullLiteral { } +#[derive(Debug, Clone, PartialEq)] +pub struct UndefinedLiteral { + +} + #[derive(Debug, Clone, PartialEq)] pub struct NumericLiteral { pub value: f64, diff --git a/src/grammar/array.rs b/src/grammar/array.rs index 2134358..a830b01 100644 --- a/src/grammar/array.rs +++ b/src/grammar/array.rs @@ -1,40 +1,49 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::expression::*; -pub struct ArrayExpressionParser; +pub struct ArrayExpressionNode; -impl ArrayExpressionParser { +impl ArrayExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ArrayExpressionParser { +impl ParserCombinator for ArrayExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::LeftBracket, "Expected '[' at the start of array expression")?; let mut elements = Vec::new(); - - while !parser.check(&Token::RightBracket) && !parser.is_at_end() { - if parser.consume(&Token::Comma) { - // Handle elision (hole in the array) + + if parser.check(&Token::RightBracket) { + parser.advance(); + return Ok(ArrayExpression { elements }); + } + + let expr_parser = ExpressionNode::new(); + + loop { + if parser.check(&Token::Comma) { + parser.advance(); elements.push(None); - } else { - // Parse regular element - let element = ExpressionParser::new().parse(parser)?; + } else if !parser.check(&Token::RightBracket) { + let element = expr_parser.parse_with_precedence(parser, Precedence::Comma.next())?; elements.push(Some(element)); - - // If there's no comma, we should be at the end - if !parser.consume(&Token::Comma) { + + if !parser.check(&Token::Comma) { break; } - - // If we see a right bracket after a comma, it's a trailing comma + + parser.advance(); + if parser.check(&Token::RightBracket) { break; } + } else { + break; } } @@ -43,3 +52,50 @@ impl ParserCombinator for ArrayExpressionParser { Ok(ArrayExpression { elements }) } } + +impl UnparserCombinator for ArrayExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrayExpression) { + unparser.write_char('['); + + if !node.elements.is_empty() { + let multiline = node.elements.len() > 5; + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + match elem { + Some(expr) => { + ExpressionNode::new().unparse(u, expr); + }, + None => { + } + } + } + }); + unparser.newline(); + } else { + unparser.space(); + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + match elem { + Some(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + None => { + } + } + } + unparser.space(); + } + } + + unparser.write_char(']'); + } +} diff --git a/src/grammar/await_expression.rs b/src/grammar/await_expression.rs new file mode 100644 index 0000000..9037fa9 --- /dev/null +++ b/src/grammar/await_expression.rs @@ -0,0 +1,37 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct AwaitExpressionNode; + +impl AwaitExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for AwaitExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + if !parser.allows_await() { + return Err(parser.error_at_current("'await' expressions are only allowed within async functions and modules")); + } + + parser.assert_consume(&Token::Await, "Expected 'await'")?; + + let argument = Box::new(ExpressionNode::new().parse(parser)?); + + Ok(AwaitExpression { + argument, + }) + } +} + +impl UnparserCombinator for AwaitExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &AwaitExpression) { + unparser.write_str("await"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &node.argument); + } +} \ No newline at end of file diff --git a/src/grammar/call.rs b/src/grammar/call.rs deleted file mode 100644 index be05177..0000000 --- a/src/grammar/call.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::ast::*; -use crate::lexer::*; -use crate::parser::*; -use super::expression::*; - -pub struct CallExpressionParser; - -impl CallExpressionParser { - pub fn new() -> Self { - Self - } - - pub fn parse_with_callee(&self, parser: &mut Parser, callee: Expression) -> ParseResult { - - let optional = parser.consume(&Token::QuestionDot); - - if optional && !parser.check(&Token::LeftParen) { - return Err(parser.error_at_current("Expected '(' after optional chaining operator in function call")); - } - - parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; - - let mut arguments = Vec::new(); - - if !parser.check(&Token::RightParen) { - arguments.push(ExpressionParser::new().parse(parser)?); - - while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { - arguments.push(ExpressionParser::new().parse(parser)?); - } - } - - parser.assert_consume(&Token::RightParen, "Expected ')' after function arguments")?; - - Ok(CallExpression { - callee: Box::new(callee), - arguments, - optional, - }) - } - -} - -impl ParserCombinator for CallExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - let callee = ExpressionParser::new().parse(parser)?; - self.parse_with_callee(parser, callee) - } -} diff --git a/src/grammar/class.rs b/src/grammar/class.rs index bfc233d..30c2e66 100644 --- a/src/grammar/class.rs +++ b/src/grammar/class.rs @@ -1,37 +1,38 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::expression::*; use super::pattern::*; use super::statement::*; use super::function::*; use super::literal::*; -pub struct ClassDeclarationParser; +pub struct ClassDeclarationNode; -impl ClassDeclarationParser { +impl ClassDeclarationNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ClassDeclarationParser { +impl ParserCombinator for ClassDeclarationNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Class, "Expected 'class'")?; let id = if matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; let super_class = if parser.consume(&Token::Extends) { - Some(Box::new(ExpressionParser::new().parse(parser)?)) + Some(Box::new(ExpressionNode::new().parse(parser)?)) } else { None }; - let body = ClassBodyParser::new().parse(parser)?; + let body = ClassBodyNode::new().parse(parser)?; Ok(ClassDeclaration { id, @@ -41,31 +42,56 @@ impl ParserCombinator for ClassDeclarationParser { } } -pub struct ClassExpressionParser; +impl UnparserCombinator for ClassDeclarationNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassDeclaration) { + unparser.write_str("class"); + + // Write the class name if present + if let Some(id) = &node.id { + unparser.space(); + unparser.write_str(&id.name); + } + + // Write the extends clause if present + if let Some(super_class) = &node.super_class { + unparser.space(); + unparser.write_str("extends"); + unparser.space(); + ExpressionNode::new().unparse(unparser, super_class); + } + + unparser.space(); + + // Write the class body + ClassBodyNode::new().unparse(unparser, &node.body); + } +} -impl ClassExpressionParser { +pub struct ClassExpressionNode; + +impl ClassExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ClassExpressionParser { +impl ParserCombinator for ClassExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Class, "Expected 'class'")?; let id = if matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; let super_class = if parser.consume(&Token::Extends) { - Some(Box::new(ExpressionParser::new().parse(parser)?)) + Some(Box::new(ExpressionNode::new().parse(parser)?)) } else { None }; - let body = ClassBodyParser::new().parse(parser)?; + let body = ClassBodyNode::new().parse(parser)?; Ok(ClassExpression { id, @@ -75,15 +101,40 @@ impl ParserCombinator for ClassExpressionParser { } } -pub struct SuperExpressionParser; +impl UnparserCombinator for ClassExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassExpression) { + unparser.write_str("class"); + + // Write the class name if present + if let Some(id) = &node.id { + unparser.space(); + unparser.write_str(&id.name); + } + + // Write the extends clause if present + if let Some(super_class) = &node.super_class { + unparser.space(); + unparser.write_str("extends"); + unparser.space(); + ExpressionNode::new().unparse(unparser, super_class); + } + + unparser.space(); + + // Write the class body + ClassBodyNode::new().unparse(unparser, &node.body); + } +} + +pub struct SuperExpressionNode; -impl SuperExpressionParser { +impl SuperExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for SuperExpressionParser { +impl ParserCombinator for SuperExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Super, "Expected 'super'")?; @@ -91,15 +142,21 @@ impl ParserCombinator for SuperExpressionParser { } } -pub struct ClassBodyParser; +impl UnparserCombinator for SuperExpressionNode { + fn unparse(&self, unparser: &mut Unparser, _node: &SuperExpression) { + unparser.write_str("super"); + } +} + +pub struct ClassBodyNode; -impl ClassBodyParser { +impl ClassBodyNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ClassBodyParser { +impl ParserCombinator for ClassBodyNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::LeftBrace, "Expected '{' after class declaration")?; @@ -108,13 +165,13 @@ impl ParserCombinator for ClassBodyParser { while !parser.check(&Token::RightBrace) && !parser.is_at_end() { // Check for static block if parser.consume(&Token::Static) && parser.check(&Token::LeftBrace) { - let static_block = StaticBlockParser::new().parse(parser)?; + let static_block = StaticBlockNode::new().parse(parser)?; body.push(ClassElement::StaticBlock(static_block)); continue; } // Parse method definition - let method = MethodDefinitionParser::new().parse(parser)?; + let method = MethodDefinitionNode::new().parse(parser)?; body.push(ClassElement::MethodDefinition(method)); } @@ -124,15 +181,43 @@ impl ParserCombinator for ClassBodyParser { } } -pub struct MethodDefinitionParser; +impl UnparserCombinator for ClassBodyNode { + fn unparse(&self, unparser: &mut Unparser, node: &ClassBody) { + unparser.write_char('{'); + + if !node.body.is_empty() { + unparser.newline(); + + unparser.with_indent(|u| { + for element in &node.body { + match element { + ClassElement::MethodDefinition(method) => { + MethodDefinitionNode::new().unparse(u, method); + }, + ClassElement::StaticBlock(static_block) => { + u.write_str("static"); + u.space(); + StaticBlockNode::new().unparse(u, static_block); + } + } + u.newline(); + } + }); + } + + unparser.write_char('}'); + } +} -impl MethodDefinitionParser { +pub struct MethodDefinitionNode; + +impl MethodDefinitionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for MethodDefinitionParser { +impl ParserCombinator for MethodDefinitionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Check for static modifier let static_method = parser.consume(&Token::Static); @@ -156,12 +241,13 @@ impl ParserCombinator for MethodDefinitionParser { // Parse the key let (key, computed) = if parser.consume(&Token::LeftBracket) { // Computed property name - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property name")?; (PropertyKey::Expression(Box::new(expr)), true) } else if parser.check(&Token::Hash) { // Private field or method parser.advance(); // Consume the '#' + // TODO what about string literal? if let Token::Identifier(name) = parser.peek() { // Clone the name before advancing the parser let name_clone = name.clone(); @@ -178,11 +264,11 @@ impl ParserCombinator for MethodDefinitionParser { match parser.peek() { Token::StringLiteral(_) | Token::NumberLiteral(_) => { - let literal = LiteralParser::new().parse(parser)?; + let literal = LiteralNode::new().parse(parser)?; (PropertyKey::Literal(literal), false) }, _ => { - let ident = IdentifierParser::new().parse(parser)?; + let ident = IdentifierNode::new().parse(parser)?; (PropertyKey::Identifier(ident), false) } } @@ -192,7 +278,7 @@ impl ParserCombinator for MethodDefinitionParser { parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; // Create a function expression for the method - let mut func_expr = FunctionExpressionParser::new().parse(parser)?; + let mut func_expr = FunctionExpressionNode::new().parse(parser)?; func_expr.generator = generator; func_expr.async_function = async_method; @@ -206,18 +292,127 @@ impl ParserCombinator for MethodDefinitionParser { } } -pub struct StaticBlockParser; +impl UnparserCombinator for MethodDefinitionNode { + fn unparse(&self, unparser: &mut Unparser, node: &MethodDefinition) { + // Write static modifier if present + if node.static_method { + unparser.write_str("static"); + unparser.space(); + } + + // Write method kind + match node.kind { + MethodKind::Constructor => { + unparser.write_str("constructor"); + }, + MethodKind::Method => { + // For async methods + if node.value.async_function { + unparser.write_str("async"); + unparser.space(); + } + + // For generator methods + if node.value.generator { + unparser.write_char('*'); + } + }, + MethodKind::Get => { + unparser.write_str("get"); + unparser.space(); + }, + MethodKind::Set => { + unparser.write_str("set"); + unparser.space(); + } + } + + // Write the method key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::PrivateIdentifier(id) => { + unparser.write_char('#'); + unparser.write_str(&id.name); + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + }, + PropertyKey::PrivateIdentifier(id) => { + unparser.write_char('#'); + unparser.write_str(&id.name); + } + } + } + + // Write the method parameters and body + unparser.write_char('('); + + // Write parameters + if !node.value.params.is_empty() { + for (i, param) in node.value.params.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + unparser.space(); + + // Write the method body + BlockStatementNode::new().unparse(unparser, &node.value.body); + } +} + +pub struct StaticBlockNode; -impl StaticBlockParser { +impl StaticBlockNode { pub fn new() -> Self { Self } } -impl ParserCombinator for StaticBlockParser { +impl ParserCombinator for StaticBlockNode { fn parse(&self, parser: &mut Parser) -> ParseResult { - let block = BlockStatementParser::new().parse(parser)?; + let block = BlockStatementNode::new().parse(parser)?; Ok(StaticBlock { body: block }) } } + +impl UnparserCombinator for StaticBlockNode { + fn unparse(&self, unparser: &mut Unparser, node: &StaticBlock) { + BlockStatementNode::new().unparse(unparser, &node.body); + } +} diff --git a/src/grammar/declaration.rs b/src/grammar/declaration.rs index bc09aac..3f8c945 100644 --- a/src/grammar/declaration.rs +++ b/src/grammar/declaration.rs @@ -1,21 +1,22 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::literal::*; use super::pattern::*; use super::expression::*; use super::statement::*; use super::class::*; -pub struct VariableDeclarationParser; +pub struct VariableDeclarationNode; -impl VariableDeclarationParser { +impl VariableDeclarationNode { pub fn new() -> Self { Self } } -impl ParserCombinator for VariableDeclarationParser { +impl ParserCombinator for VariableDeclarationNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Parse variable kind let kind = if parser.consume(&Token::Var) { @@ -34,8 +35,10 @@ impl ParserCombinator for VariableDeclarationParser { let mut declared_identifiers = Vec::new(); // Parse first declarator - let declarator = VariableDeclaratorParser::new().parse(parser)?; + let declarator = VariableDeclaratorNode::new().parse(parser)?; + // TODO not checking TDZ ? + // For let/const, collect identifiers for TDZ enforcement if matches!(kind, VariableKind::Let | VariableKind::Const) { collect_binding_identifiers(&declarator.id, &mut declared_identifiers); @@ -45,7 +48,7 @@ impl ParserCombinator for VariableDeclarationParser { // Parse additional declarators while parser.consume(&Token::Comma) { - let declarator = VariableDeclaratorParser::new().parse(parser)?; + let declarator = VariableDeclaratorNode::new().parse(parser)?; // For let/const, collect identifiers and check TDZ if matches!(kind, VariableKind::Let | VariableKind::Const) { @@ -68,23 +71,53 @@ impl ParserCombinator for VariableDeclarationParser { } } +impl UnparserCombinator for VariableDeclarationNode { + fn unparse(&self, unparser: &mut Unparser, node: &VariableDeclaration) { + // Write the variable kind (var, let, const) + match node.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations + if !node.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &node.declarations[0]); + + // Remaining declarations + for decl in &node.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, decl); + } + } + + // Add semicolon + unparser.write_char(';'); + } +} + + /// Parser for variable declarators -pub struct VariableDeclaratorParser; +pub struct VariableDeclaratorNode; -impl VariableDeclaratorParser { +impl VariableDeclaratorNode { pub fn new() -> Self { Self } } -impl ParserCombinator for VariableDeclaratorParser { +impl ParserCombinator for VariableDeclaratorNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Parse the identifier or pattern - let id = PatternParser::new().parse(parser)?; + let id = PatternNode::new().parse(parser)?; // Parse the initializer if present let init = if parser.consume(&Token::Equal) { - Some(Box::new(ExpressionParser::new().parse(parser)?)) + Some(Box::new(ExpressionNode::new().parse(parser)?)) } else { None }; @@ -107,6 +140,9 @@ impl FunctionDeclarationParser { impl ParserCombinator for FunctionDeclarationParser { fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if this is an async function + let async_function = parser.consume(&Token::Async); + // Consume the 'function' keyword parser.assert_consume(&Token::Function, "Expected 'function'")?; @@ -115,7 +151,7 @@ impl ParserCombinator for FunctionDeclarationParser { // Parse the function name let id = if matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; @@ -127,18 +163,18 @@ impl ParserCombinator for FunctionDeclarationParser { if !parser.check(&Token::RightParen) { // Parse the first parameter - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); // Parse additional parameters while parser.consume(&Token::Comma) { - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); } } parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; - let body = parser.with_context(LexicalContext::FunctionBody { allow_await: false, allow_yield: generator }, |p| { - BlockStatementParser::new().parse(p) + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: generator }, |p| { + BlockStatementNode::new().parse(p) })?; Ok(FunctionDeclaration { @@ -146,7 +182,7 @@ impl ParserCombinator for FunctionDeclarationParser { params, body, generator, - async_function: false, + async_function, }) } } @@ -167,132 +203,199 @@ impl ExportDeclarationParser { } } -impl ParserCombinator for ExportDeclarationParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { +// Add these new parser structs for each export type +pub struct ExportNamedDeclarationParser; +pub struct ExportDefaultDeclarationParser; +pub struct ExportAllDeclarationParser; + +impl ExportNamedDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ExportDefaultDeclarationParser { + pub fn new() -> Self { + Self + } +} + +impl ExportAllDeclarationParser { + pub fn new() -> Self { + Self + } +} + +// Implement parsing for each export type +impl ParserCombinator for ExportNamedDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { // Consume the 'export' keyword parser.assert_consume(&Token::Export, "Expected 'export'")?; - // Check for export type - if parser.consume(&Token::Default) { - // Export default declaration - let declaration = if parser.check(&Token::Function) || parser.check(&Token::Class) { - // Function or class declaration - if parser.check(&Token::Function) { - let func = FunctionDeclarationParser::new().parse(parser)?; - ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::FunctionDeclaration(func))) - } else { - let class = ClassDeclarationParser::new().parse(parser)?; - ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::ClassDeclaration(class))) - } - } else { - // Expression - let expr = ExpressionParser::new().parse(parser)?; - parser.consume_semicolon("Expected ';' after export default expression")?; - ExportDefaultDeclarationKind::Expression(Box::new(expr)) - }; - - Ok(ExportDeclaration::Default(ExportDefaultDeclaration { declaration })) - } else if parser.consume(&Token::Star) { - // Export all declaration - let exported = if parser.consume(&Token::As) { - Some(IdentifierParser::new().parse(parser)?) - } else { - None - }; + // Parse declaration if present + let declaration = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + // Variable declaration + let var_decl = VariableDeclarationNode::new().parse(parser)?; + Some(Box::new(Declaration::VariableDeclaration(var_decl))) + } else if parser.check(&Token::Function) { + // Function declaration + let func_decl = FunctionDeclarationParser::new().parse(parser)?; + Some(Box::new(Declaration::FunctionDeclaration(func_decl))) + } else if parser.check(&Token::Class) { + // Class declaration + let class_decl = ClassDeclarationNode::new().parse(parser)?; + Some(Box::new(Declaration::ClassDeclaration(class_decl))) + } else { + None + }; + + // If there's no declaration, there must be export specifiers + let mut specifiers = Vec::new(); + + if declaration.is_none() { + // Parse export specifiers + parser.assert_consume(&Token::LeftBrace, "Expected '{' in named export declaration")?; - parser.assert_consume(&Token::From, "Expected 'from' after export *")?; + if !parser.check(&Token::RightBrace) { + // Parse the first specifier + specifiers.push(self.parse_export_specifier(parser)?); + + // Parse additional specifiers + while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { + specifiers.push(self.parse_export_specifier(parser)?); + } + } - let source = match parser.peek() { + parser.assert_consume(&Token::RightBrace, "Expected '}' after export specifiers")?; + } + + // Parse the 'from' clause if present + let source = if parser.consume(&Token::From) { + match parser.peek() { Token::StringLiteral(_) => { - let literal = LiteralParser::new().parse(parser)?; + let literal = LiteralNode::new().parse(parser)?; match literal { - Literal::StringLiteral(str_lit) => str_lit, + Literal::StringLiteral(str_lit) => Some(str_lit), _ => return Err(parser.error_at_current("Expected string literal for module source")), } }, _ => return Err(parser.error_at_current("Expected string literal for module source")), - }; - - parser.consume_semicolon("Expected ';' after export * from declaration")?; - - Ok(ExportDeclaration::All(ExportAllDeclaration { - source, - exported, - })) + } } else { - // Named export declaration - let declaration = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { - // Variable declaration - let var_decl = VariableDeclarationParser::new().parse(parser)?; - Some(Box::new(Declaration::VariableDeclaration(var_decl))) - } else if parser.check(&Token::Function) { - // Function declaration - let func_decl = FunctionDeclarationParser::new().parse(parser)?; - Some(Box::new(Declaration::FunctionDeclaration(func_decl))) - } else if parser.check(&Token::Class) { - // Class declaration - let class_decl = ClassDeclarationParser::new().parse(parser)?; - Some(Box::new(Declaration::ClassDeclaration(class_decl))) + None + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after export declaration")?; + + Ok(ExportNamedDeclaration { + declaration, + specifiers, + source, + }) + } +} + +impl ParserCombinator for ExportDefaultDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' and 'default' keywords + parser.assert_consume(&Token::Export, "Expected 'export'")?; + parser.assert_consume(&Token::Default, "Expected 'default'")?; + + // Parse the declaration or expression + let declaration = if parser.check(&Token::Function) || parser.check(&Token::Class) { + // Function or class declaration + if parser.check(&Token::Function) { + let func = FunctionDeclarationParser::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::FunctionDeclaration(func))) } else { - None - }; - - // If there's no declaration, there must be export specifiers - let mut specifiers = Vec::new(); - - if declaration.is_none() { - // Parse export specifiers - parser.assert_consume(&Token::LeftBrace, "Expected '{' in named export declaration")?; - - if !parser.check(&Token::RightBrace) { - // Parse the first specifier - specifiers.push(self.parse_export_specifier(parser)?); - - // Parse additional specifiers - while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { - specifiers.push(self.parse_export_specifier(parser)?); - } - } - - parser.assert_consume(&Token::RightBrace, "Expected '}' after export specifiers")?; + let class = ClassDeclarationNode::new().parse(parser)?; + ExportDefaultDeclarationKind::Declaration(Box::new(Declaration::ClassDeclaration(class))) } - - // Parse the 'from' clause if present - let source = if parser.consume(&Token::From) { - match parser.peek() { - Token::StringLiteral(_) => { - let literal = LiteralParser::new().parse(parser)?; - match literal { - Literal::StringLiteral(str_lit) => Some(str_lit), - _ => return Err(parser.error_at_current("Expected string literal for module source")), - } - }, + } else { + // Expression + let expr = ExpressionNode::new().parse(parser)?; + parser.consume_semicolon("Expected ';' after export default expression")?; + ExportDefaultDeclarationKind::Expression(Box::new(expr)) + }; + + Ok(ExportDefaultDeclaration { declaration }) + } +} + +impl ParserCombinator for ExportAllDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Consume the 'export' and '*' tokens + parser.assert_consume(&Token::Export, "Expected 'export'")?; + parser.assert_consume(&Token::Star, "Expected '*'")?; + + // Parse 'as' clause if present + let exported = if parser.consume(&Token::As) { + Some(IdentifierNode::new().parse(parser)?) + } else { + None + }; + + // Parse 'from' clause + parser.assert_consume(&Token::From, "Expected 'from' after export *")?; + + // Parse the module source + let source = match parser.peek() { + Token::StringLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + match literal { + Literal::StringLiteral(str_lit) => str_lit, _ => return Err(parser.error_at_current("Expected string literal for module source")), } - } else { - None - }; - - // Consume the semicolon - parser.consume_semicolon("Expected ';' after export declaration")?; - - Ok(ExportDeclaration::Named(ExportNamedDeclaration { - declaration, - specifiers, - source, - })) + }, + _ => return Err(parser.error_at_current("Expected string literal for module source")), + }; + + // Consume the semicolon + parser.consume_semicolon("Expected ';' after export * from declaration")?; + + Ok(ExportAllDeclaration { + source, + exported, + }) + } +} + +// Update the ExportDeclarationParser to use the new specific parsers +impl ParserCombinator for ExportDeclarationParser { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Save position to check what type of export this is + let pos = parser.save_position(); + + // Consume the 'export' keyword + parser.assert_consume(&Token::Export, "Expected 'export'")?; + + // Check for export type + if parser.check(&Token::Default) { + // Restore position and parse as default export + parser.restore_position(pos); + ExportDefaultDeclarationParser::new().parse(parser).map(ExportDeclaration::Default) + } else if parser.check(&Token::Star) { + // Restore position and parse as export all + parser.restore_position(pos); + ExportAllDeclarationParser::new().parse(parser).map(ExportDeclaration::All) + } else { + // Restore position and parse as named export + parser.restore_position(pos); + ExportNamedDeclarationParser::new().parse(parser).map(ExportDeclaration::Named) } } } -impl ExportDeclarationParser { +// Helper method for ExportNamedDeclarationParser +impl ExportNamedDeclarationParser { fn parse_export_specifier(&self, parser: &mut Parser) -> ParseResult { // Parse the local name - let local = IdentifierParser::new().parse(parser)?; + let local = IdentifierNode::new().parse(parser)?; // Parse the exported name if present let exported = if parser.consume(&Token::As) { - IdentifierParser::new().parse(parser)? + IdentifierNode::new().parse(parser)? } else { // If no 'as', the exported name is the same as the local name Identifier { name: local.name.clone() } @@ -328,7 +431,7 @@ impl ParserCombinator for ImportDeclarationParser { } else if parser.consume(&Token::Star) { // Namespace import parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; - let local = IdentifierParser::new().parse(parser)?; + let local = IdentifierNode::new().parse(parser)?; specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { local, @@ -342,7 +445,7 @@ impl ParserCombinator for ImportDeclarationParser { if !parser.check(&Token::RightBrace) { // Parse the first specifier specifiers.push(self.parse_import_specifier(parser)?); - + // Parse additional specifiers while parser.consume(&Token::Comma) && !parser.check(&Token::RightBrace) { specifiers.push(self.parse_import_specifier(parser)?); @@ -354,7 +457,7 @@ impl ParserCombinator for ImportDeclarationParser { parser.assert_consume(&Token::From, "Expected 'from' after named imports")?; } else if matches!(parser.peek(), Token::Identifier(_)) { // Default import - let local = IdentifierParser::new().parse(parser)?; + let local = IdentifierNode::new().parse(parser)?; specifiers.push(ImportSpecifier::ImportDefaultSpecifier(ImportDefaultSpecifier { local, @@ -365,7 +468,7 @@ impl ParserCombinator for ImportDeclarationParser { if parser.consume(&Token::Star) { // Namespace import after default import parser.assert_consume(&Token::As, "Expected 'as' after '*' in import declaration")?; - let local = IdentifierParser::new().parse(parser)?; + let local = IdentifierNode::new().parse(parser)?; specifiers.push(ImportSpecifier::ImportNamespaceSpecifier(ImportNamespaceSpecifier { local, @@ -392,7 +495,7 @@ impl ParserCombinator for ImportDeclarationParser { // Parse the source let source = match parser.peek() { Token::StringLiteral(_) => { - let literal = LiteralParser::new().parse(parser)?; + let literal = LiteralNode::new().parse(parser)?; match literal { Literal::StringLiteral(str_lit) => str_lit, _ => return Err(parser.error_at_current("Expected string literal for module source")), @@ -414,11 +517,11 @@ impl ParserCombinator for ImportDeclarationParser { impl ImportDeclarationParser { fn parse_import_specifier(&self, parser: &mut Parser) -> ParseResult { // Parse the imported name - let imported = IdentifierParser::new().parse(parser)?; + let imported = IdentifierNode::new().parse(parser)?; // Parse the local name if present let local = if parser.consume(&Token::As) { - IdentifierParser::new().parse(parser)? + IdentifierNode::new().parse(parser)? } else { // If no 'as', the local name is the same as the imported name Identifier { name: imported.name.clone() } @@ -431,9 +534,6 @@ impl ImportDeclarationParser { } } - - - // Helper function to collect identifiers from a binding pattern fn collect_binding_identifiers(pattern: &Pattern, identifiers: &mut Vec>) { match pattern { @@ -494,4 +594,280 @@ fn check_tdz_violation(expr: &Expression, declared_identifiers: &[Box], par _ => {} } Ok(()) -} \ No newline at end of file +} + +// Variable declarator unparser +impl UnparserCombinator for VariableDeclaratorNode { + fn unparse(&self, unparser: &mut Unparser, node: &VariableDeclarator) { + // Write the identifier or pattern + PatternNode::new().unparse(unparser, &node.id); + + // Write the initializer if present + if let Some(init) = &node.init { + unparser.space(); + unparser.write_char('='); + unparser.space(); + ExpressionNode::new().unparse(unparser, init); + } + } +} + +// Function declaration unparser +impl UnparserCombinator for FunctionDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &FunctionDeclaration) { + // Write async if it's an async function + if node.async_function { + unparser.write_str("async"); + unparser.write_char(' '); + } + + // Write the function keyword + unparser.write_str("function"); + + // Write * if it's a generator function + if node.generator { + unparser.write_char('*'); + } + + // Write the function name if present + if let Some(id) = &node.id { + unparser.write_char(' '); + unparser.write_str(&id.name); + } + + // Write the parameter list + unparser.write_char('('); + + // Write parameters + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); + + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + unparser.space(); + + // Write the function body + BlockStatementNode::new().unparse(unparser, &node.body); + } +} + +// Export named declaration unparser +impl UnparserCombinator for ExportNamedDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportNamedDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + + // Handle declaration if present + if let Some(decl) = &node.declaration { + match &**decl { + Declaration::VariableDeclaration(var_decl) => { + VariableDeclarationNode::new().unparse(unparser, var_decl); + }, + Declaration::FunctionDeclaration(func_decl) => { + FunctionDeclarationParser::new().unparse(unparser, func_decl); + }, + Declaration::ClassDeclaration(class_decl) => { + // Assuming ClassDeclarationNode is implemented elsewhere + ClassDeclarationNode::new().unparse(unparser, class_decl); + }, + _ => { + // This shouldn't happen for export named declarations + unparser.write_str("/* unsupported declaration */"); + } + } + } else { + // Export specifiers + unparser.write_char('{'); + + if !node.specifiers.is_empty() { + unparser.space(); + + // First specifier + self.unparse_export_specifier(unparser, &node.specifiers[0]); + + // Remaining specifiers + for spec in &node.specifiers[1..] { + unparser.write_char(','); + unparser.space(); + self.unparse_export_specifier(unparser, spec); + } + + unparser.space(); + } + + unparser.write_char('}'); + + // Handle 'from' clause if present + if let Some(source) = &node.source { + unparser.write_char(' '); + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", source.value)); + } + + unparser.write_char(';'); + } + } +} + +// Export default declaration unparser +impl UnparserCombinator for ExportDefaultDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportDefaultDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + unparser.write_str("default"); + unparser.write_char(' '); + + match &node.declaration { + ExportDefaultDeclarationKind::Declaration(decl) => { + match &**decl { + Declaration::FunctionDeclaration(func_decl) => { + FunctionDeclarationParser::new().unparse(unparser, func_decl); + }, + Declaration::ClassDeclaration(class_decl) => { + ClassDeclarationNode::new().unparse(unparser, class_decl); + }, + _ => { + // This shouldn't happen for export default declarations + unparser.write_str("/* unsupported declaration */"); + } + } + }, + ExportDefaultDeclarationKind::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(';'); + } + } + } +} + +// Export all declaration unparser +impl UnparserCombinator for ExportAllDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ExportAllDeclaration) { + unparser.write_str("export"); + unparser.write_char(' '); + unparser.write_char('*'); + + // Handle 'as' clause if present + if let Some(exported) = &node.exported { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&exported.name); + } + + unparser.write_char(' '); + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", node.source.value)); + unparser.write_char(';'); + } +} + +// Import declaration unparser +impl UnparserCombinator for ImportDeclarationParser { + fn unparse(&self, unparser: &mut Unparser, node: &ImportDeclaration) { + unparser.write_str("import"); + unparser.write_char(' '); + + // Handle specifiers + if node.specifiers.is_empty() { + // Side-effect import (no specifiers) + } else { + let mut has_default = false; + let mut has_namespace = false; + let mut named_specifiers = Vec::new(); + + // Categorize specifiers + for spec in &node.specifiers { + match spec { + ImportSpecifier::ImportDefaultSpecifier(default_spec) => { + has_default = true; + unparser.write_str(&default_spec.local.name); + }, + ImportSpecifier::ImportNamespaceSpecifier(namespace_spec) => { + has_namespace = true; + if has_default { + unparser.write_char(','); + unparser.space(); + } + unparser.write_str("* as "); + unparser.write_str(&namespace_spec.local.name); + }, + ImportSpecifier::ImportSpecifier(named_spec) => { + named_specifiers.push(named_spec); + } + } + } + + // Handle named specifiers + if !named_specifiers.is_empty() { + if has_default || has_namespace { + unparser.write_char(','); + unparser.space(); + } + + unparser.write_char('{'); + unparser.space(); + + // First named specifier + self.unparse_import_specifier(unparser, &named_specifiers[0]); + + // Remaining named specifiers + for spec in &named_specifiers[1..] { + unparser.write_char(','); + unparser.space(); + self.unparse_import_specifier(unparser, spec); + } + + unparser.space(); + unparser.write_char('}'); + } + + //unparser.space(); + unparser.write_char(' '); + } + + // Write the source + unparser.write_str("from"); + unparser.write_char(' '); + unparser.write_str(&format!("\"{}\"", node.source.value)); + unparser.write_char(';'); + } +} + +// Helper methods for ExportNamedDeclarationParser +impl ExportNamedDeclarationParser { + fn unparse_export_specifier(&self, unparser: &mut Unparser, spec: &ExportSpecifier) { + unparser.write_str(&spec.local.name); + + // If the exported name is different from the local name + if spec.local.name != spec.exported.name { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&spec.exported.name); + } + } +} + +// Helper methods for ImportDeclarationParser +impl ImportDeclarationParser { + fn unparse_import_specifier(&self, unparser: &mut Unparser, spec: &NamedImportSpecifier) { + unparser.write_str(&spec.imported.name); + + // If the local name is different from the imported name + if spec.imported.name != spec.local.name { + unparser.write_char(' '); + unparser.write_str("as"); + unparser.write_char(' '); + unparser.write_str(&spec.local.name); + } + } +} diff --git a/src/grammar/expression.rs b/src/grammar/expression.rs index 15716db..50947e4 100644 --- a/src/grammar/expression.rs +++ b/src/grammar/expression.rs @@ -1,22 +1,23 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::function::*; -use super::call::*; use super::class::*; use super::array::*; -use super::member::*; use super::pattern::*; use super::literal::*; use super::object::*; use super::this::*; use super::new::*; +use super::await_expression::*; +use super::yield_expression::*; /// Parser for JavaScript expressions -pub struct ExpressionParser; +pub struct ExpressionNode; -impl ExpressionParser { +impl ExpressionNode { pub fn new() -> Self { Self } @@ -30,7 +31,7 @@ impl ExpressionParser { while !parser.is_at_end() && precedence < self.get_precedence(parser) { left = self.parse_infix(parser, left)?; } - + Ok(left) } @@ -40,12 +41,12 @@ impl ExpressionParser { Token::Identifier(_) => { // Check if this is a single-parameter arrow function let pos = parser.save_position(); - let ident = IdentifierParser::new().parse(parser)?; + let ident = IdentifierNode::new().parse(parser)?; if parser.check(&Token::Arrow) { // This is an arrow function with a single parameter parser.restore_position(pos); - return ArrowFunctionExpressionParser::new().parse(parser).map(Expression::ArrowFunctionExpression); + return ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression); } Ok(Expression::Identifier(ident)) @@ -56,29 +57,30 @@ impl ExpressionParser { Token::RegExpLiteral(_, _) | Token::True | Token::False | + Token::Undefined | Token::Null => { - LiteralParser::new().parse(parser).map(Expression::Literal) + LiteralNode::new().parse(parser).map(Expression::Literal) }, Token::This => { - ThisExpressionParser::new().parse(parser).map(Expression::ThisExpression) + ThisExpressionNode::new().parse(parser).map(Expression::ThisExpression) }, Token::LeftBracket => { - ArrayExpressionParser::new().parse(parser).map(Expression::ArrayExpression) + ArrayExpressionNode::new().parse(parser).map(Expression::ArrayExpression) }, Token::LeftBrace => { - ObjectExpressionParser::new().parse(parser).map(Expression::ObjectExpression) + ObjectExpressionNode::new().parse(parser).map(Expression::ObjectExpression) }, Token::Function => { - FunctionExpressionParser::new().parse(parser).map(Expression::FunctionExpression) + FunctionExpressionNode::new().parse(parser).map(Expression::FunctionExpression) }, Token::Class => { - ClassExpressionParser::new().parse(parser).map(Expression::ClassExpression) + ClassExpressionNode::new().parse(parser).map(Expression::ClassExpression) }, Token::New => { - NewExpressionParser::new().parse(parser).map(Expression::NewExpression) + NewExpressionNode::new().parse(parser).map(Expression::NewExpression) }, Token::Super => { - SuperExpressionParser::new().parse(parser).map(Expression::SuperExpression) + SuperExpressionNode::new().parse(parser).map(Expression::SuperExpression) }, Token::LeftParen => { self.parse_grouped_expression(parser) @@ -98,25 +100,43 @@ impl ExpressionParser { }, Token::Await => { if parser.allows_await() { - AwaitExpressionParser::new().parse(parser).map(Expression::AwaitExpression) + AwaitExpressionNode::new().parse(parser).map(Expression::AwaitExpression) } else { Err(parser.error_at_current("'await' expression is only allowed within async functions")) } }, Token::Yield => { if parser.allows_yield() { - YieldExpressionParser::new().parse(parser).map(Expression::YieldExpression) + YieldExpressionNode::new().parse(parser).map(Expression::YieldExpression) } else { Err(parser.error_at_current("'yield' expression is only allowed within generator functions")) } }, Token::Async => { - // Look ahead to see if this is an async function or arrow function - if parser.peek_next(1) == &Token::Function || (parser.peek_next(1) == &Token::LeftParen && self.is_arrow_function_ahead(parser)) { - AsyncFunctionExpressionParser::new().parse(parser).map(Expression::FunctionExpression) - } else { - // Otherwise, it's just an identifier named "async" - IdentifierParser::new().parse(parser).map(Expression::Identifier) + // Save position to potentially backtrack + let pos = parser.save_position(); + + // Check if this is an async function + if parser.peek_next(1) == &Token::Function { + // This is an async function expression + parser.advance(); // Consume 'async' + + // Parse the function expression + let mut func_expr = FunctionExpressionNode::new().parse(parser)?; + func_expr.async_function = true; // Mark as async + + Ok(Expression::FunctionExpression(func_expr)) + } + // Check if this is an async arrow function + else if parser.peek_next(1) == &Token::LeftParen || + (matches!(parser.peek_next(1), Token::Identifier(_)) && + parser.peek_next(2) == &Token::Arrow) { + // This is an async arrow function + ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression) + } + // Otherwise, it's just an identifier named "async" + else { + IdentifierNode::new().parse(parser).map(Expression::Identifier) } }, _ => Err(parser.error_at_current("Expected an expression")), @@ -127,25 +147,17 @@ impl ExpressionParser { fn parse_infix(&self, parser: &mut Parser, left: Expression) -> ParseResult { match parser.peek() { Token::LeftParen => { - CallExpressionParser::new().parse_with_callee(parser, left).map(Expression::CallExpression) + self.parse_with_callee(parser, left, false).map(Expression::CallExpression) }, - Token::Dot | - Token::LeftBracket => { - MemberExpressionParser::new().parse_with_object(parser, left).map(Expression::MemberExpression) + Token::LeftBracket | + Token::Dot => { + self.parse_with_object(parser, left, false).map(Expression::MemberExpression) }, Token::QuestionDot => { - // Optional chaining - parser.advance(); // Consume the '?.' - - // Check if this is an optional function call - if parser.check(&Token::LeftParen) { - // Optional function call: obj?.(args) - let call_expr = CallExpressionParser::new().parse_with_callee(parser, left)?; - Ok(Expression::CallExpression(call_expr)) + if matches!(parser.peek_next(1), &Token::LeftParen) { + self.parse_with_callee(parser, left, true).map(Expression::CallExpression) } else { - // Optional property access: obj?.prop - let member_expr = MemberExpressionParser::new().parse_with_object(parser, left)?; - Ok(Expression::MemberExpression(member_expr)) + self.parse_with_object(parser, left, true).map(Expression::MemberExpression) } }, Token::PlusPlus | @@ -220,7 +232,7 @@ impl ExpressionParser { // Check if this might be an arrow function with parameters if self.is_arrow_function_ahead(parser) { parser.restore_position(pos); - return ArrowFunctionExpressionParser::new().parse(parser).map(Expression::ArrowFunctionExpression); + return ArrowFunctionExpressionNode::new().parse(parser).map(Expression::ArrowFunctionExpression); } // Consume the opening parenthesis @@ -245,14 +257,114 @@ impl ExpressionParser { Ok(expr) } + fn parse_with_callee(&self, parser: &mut Parser, callee: Expression, optional: bool) -> ParseResult { + // Consume the question-dot token if this is optional chaining + if optional { + parser.assert_consume(&Token::QuestionDot, "Expected '?.' in optional chaining")?; + } + + parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + + let mut arguments = Vec::new(); + + if !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + + while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { + arguments.push(ExpressionNode::new().parse(parser)?); + } + } + + parser.assert_consume(&Token::RightParen, "Expected ')' after function arguments")?; + + Ok(CallExpression { + callee: Box::new(callee), + arguments, + optional, + }) + } + + fn parse_with_object(&self, parser: &mut Parser, object: Expression, optional: bool) -> ParseResult { + // Consume the dot or question-dot token + if optional { + parser.assert_consume(&Token::QuestionDot, "Expected '?.' in optional chaining")?; + } else if parser.check(&Token::Dot) { + parser.advance(); // Consume the '.' + } + + // Parse the property access + let (property, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property access: obj[expr] or obj?.[expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; + (MemberProperty::Expression(Box::new(expr)), true) + } else { + // Static property access: obj.prop or obj?.prop + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '.' or '?.'")); + } + }; + + // Create the member expression + let member_expr = MemberExpression { + object: Box::new(object), + property, + computed, + optional, + }; + + if parser.check(&Token::LeftBracket) || parser.check(&Token::Dot) { + // Continue parsing the chain of regular property accesses + return self.parse_with_object(parser, Expression::MemberExpression(member_expr), false); + } else if parser.check(&Token::QuestionDot) { + // Save position to check what follows + let pos = parser.save_position(); + parser.advance(); // Consume '?.' + + if parser.check(&Token::LeftParen) { + // This would be a function call, which we can't handle here + // Restore position and return the member expression we've parsed so far + parser.restore_position(pos); + return Ok(member_expr); + } else { + // Continue with optional property access + parser.restore_position(pos); + return self.parse_with_object(parser, Expression::MemberExpression(member_expr), true); + } + } + + return Ok(member_expr); + } + // Helper method to check if an arrow function is ahead fn is_arrow_function_ahead(&self, parser: &mut Parser) -> bool { // Save position let pos = parser.save_position(); // Skip the async keyword if present - if parser.check(&Token::Async) { + let is_async = parser.check(&Token::Async); + if is_async { parser.advance(); + + // For async arrow functions, we need at least one token after 'async' + if parser.is_at_end() { + parser.restore_position(pos); + return false; + } + + // If 'async' is followed by a line terminator, it's not an arrow function + if parser.previous_line_terminator() { + parser.restore_position(pos); + return false; + } } // Check for single parameter without parentheses @@ -681,7 +793,7 @@ impl ExpressionParser { } } -impl ParserCombinator for ExpressionParser { +impl ParserCombinator for ExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { self.parse_with_precedence(parser, Precedence::Lowest) } @@ -737,3 +849,546 @@ impl Precedence { } } } + +// Main expression unparser +impl UnparserCombinator for ExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &Expression) { + match node { + Expression::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + Expression::Literal(lit) => { + self.unparse_literal(unparser, lit); + }, + Expression::ArrayExpression(array) => { + ArrayExpressionNode::new().unparse(unparser, array); + }, + Expression::ObjectExpression(obj) => { + ObjectExpressionNode::new().unparse(unparser, obj); + }, + Expression::FunctionExpression(func) => { + FunctionExpressionNode::new().unparse(unparser, func); + }, + Expression::ArrowFunctionExpression(arrow) => { + ArrowFunctionExpressionNode::new().unparse(unparser, arrow); + }, + Expression::ClassExpression(class) => { + ClassExpressionNode::new().unparse(unparser, class); + }, + Expression::TaggedTemplateExpression(tagged) => { + self.unparse_tagged_template(unparser, tagged); + }, + Expression::MemberExpression(member) => { + self.unparse_member_expression(unparser, member); + }, + Expression::SuperExpression(super_expr) => { + SuperExpressionNode::new().unparse(unparser, super_expr); + }, + Expression::MetaProperty(meta) => { + self.unparse_meta_property(unparser, meta); + }, + Expression::NewExpression(new_expr) => { + self.unparse_new_expression(unparser, new_expr); + }, + Expression::CallExpression(call) => { + self.unparse_call_expression(unparser, call); + }, + Expression::UpdateExpression(update) => { + self.unparse_update_expression(unparser, update); + }, + Expression::AwaitExpression(await_expr) => { + AwaitExpressionNode::new().unparse(unparser, await_expr); + }, + Expression::UnaryExpression(unary) => { + self.unparse_unary_expression(unparser, unary); + }, + Expression::BinaryExpression(binary) => { + self.unparse_binary_expression(unparser, binary); + }, + Expression::LogicalExpression(logical) => { + self.unparse_logical_expression(unparser, logical); + }, + Expression::ConditionalExpression(cond) => { + self.unparse_conditional_expression(unparser, cond); + }, + Expression::YieldExpression(yield_expr) => { + YieldExpressionNode::new().unparse(unparser, yield_expr); + }, + Expression::AssignmentExpression(assign) => { + self.unparse_assignment_expression(unparser, assign); + }, + Expression::SequenceExpression(seq) => { + self.unparse_sequence_expression(unparser, seq); + }, + Expression::ThisExpression(this) => { + ThisExpressionNode::new().unparse(unparser, this); + }, + // TODO implement +// Expression::TemplateLiteral(template) => { +// self.unparse_template_literal(unparser, template); +// }, + //_ => { + // Fallback for any expression types not explicitly handled + // unparser.write_str("/* unsupported expression */"); + //} + } + } +} + +// Helper methods for ExpressionNode +impl ExpressionNode { + fn unparse_literal(&self, unparser: &mut Unparser, lit: &Literal) { + match lit { + Literal::StringLiteral(s) => { + unparser.write_char('"'); + unparser.write_str(&s.value); + unparser.write_char('"'); + }, + Literal::NumericLiteral(n) => { + unparser.write_str(&n.value.to_string()); + }, + Literal::BooleanLiteral(b) => { + unparser.write_str(if b.value { "true" } else { "false" }); + }, + Literal::NullLiteral(_) => { + unparser.write_str("null"); + }, + Literal::UndefinedLiteral(_) => { + unparser.undefined(); + }, + Literal::RegExpLiteral(r) => { + unparser.write_char('/'); + unparser.write_str(&r.pattern); + unparser.write_char('/'); + unparser.write_str(&r.flags); + }, + Literal::BigIntLiteral(b) => { + unparser.write_str(&b.value); + unparser.write_char('n'); + } + } + } + + fn unparse_tagged_template(&self, unparser: &mut Unparser, tagged: &TaggedTemplateExpression) { + // Unparse the tag + self.unparse(unparser, &tagged.tag); + + // Unparse the template literal + self.unparse_template_literal(unparser, &tagged.quasi); + } + + fn unparse_template_literal(&self, unparser: &mut Unparser, template: &TemplateLiteral) { + unparser.write_char('`'); + + for (i, elem) in template.quasis.iter().enumerate() { + // Write the template string part + unparser.write_str(&elem.value.raw); + + // If there's an expression after this quasi, write it + if i < template.expressions.len() { + unparser.write_str("${"); + self.unparse(unparser, &template.expressions[i]); + unparser.write_char('}'); + } + } + + unparser.write_char('`'); + } + + fn unparse_member_expression(&self, unparser: &mut Unparser, member: &MemberExpression) { + // Unparse the object + self.unparse(unparser, &member.object); + + // Handle optional chaining + if member.optional { + unparser.write_str("?."); + } + + // Unparse the property + match &member.property { + MemberProperty::Identifier(id) => { + if !member.optional { + unparser.write_char('.'); + } + unparser.write_str(&id.name); + }, + MemberProperty::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + MemberProperty::Expression(expr) => { + unparser.write_char('['); + self.unparse(unparser, expr); + unparser.write_char(']'); + } + } + } + + fn unparse_meta_property(&self, unparser: &mut Unparser, meta: &MetaProperty) { + unparser.write_str(&meta.meta.name); + unparser.write_char('.'); + unparser.write_str(&meta.property.name); + } + + fn unparse_new_expression(&self, unparser: &mut Unparser, new_expr: &NewExpression) { + unparser.write_str("new "); + + // Unparse the callee + self.unparse(unparser, &new_expr.callee); + + // Unparse the arguments + unparser.write_char('('); + + for (i, arg) in new_expr.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, arg); + } + + unparser.write_char(')'); + } + + fn unparse_call_expression(&self, unparser: &mut Unparser, call: &CallExpression) { + // Unparse the callee + self.unparse(unparser, &call.callee); + + // Handle optional chaining + if call.optional { + unparser.write_str("?."); + } + + // Unparse the arguments + unparser.write_char('('); + + for (i, arg) in call.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, arg); + } + + unparser.write_char(')'); + } + + fn unparse_update_expression(&self, unparser: &mut Unparser, update: &UpdateExpression) { + let operator_str = match update.operator { + UpdateOperator::Increment => "++", + UpdateOperator::Decrement => "--", + }; + + if update.prefix { + unparser.write_str(operator_str); + self.unparse(unparser, &update.argument); + } else { + self.unparse(unparser, &update.argument); + unparser.write_str(operator_str); + } + } + + fn unparse_unary_expression(&self, unparser: &mut Unparser, unary: &UnaryExpression) { + let operator_str = match unary.operator { + UnaryOperator::Plus => "+", + UnaryOperator::Minus => "-", + UnaryOperator::Not => "!", + UnaryOperator::BitwiseNot => "~", + UnaryOperator::Typeof => "typeof ", + UnaryOperator::Void => "void ", + UnaryOperator::Delete => "delete ", + }; + + unparser.write_str(operator_str); + + // Determine if we need parentheses + let needs_parens = matches!(&*unary.argument, + Expression::UnaryExpression(_) | + Expression::BinaryExpression(_) | + Expression::LogicalExpression(_) | + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) + ) && !matches!(unary.operator, UnaryOperator::Typeof | UnaryOperator::Void | UnaryOperator::Delete); + + // Unparse the argument + if needs_parens { + unparser.write_char('('); + self.unparse(unparser, &unary.argument); + unparser.write_char(')'); + } else { + self.unparse(unparser, &unary.argument); + } + } + + + fn unparse_binary_expression(&self, unparser: &mut Unparser, binary: &BinaryExpression) { + let operator_str = match binary.operator { + BinaryOperator::Addition => "+", + BinaryOperator::Subtraction => "-", + BinaryOperator::Multiplication => "*", + BinaryOperator::Division => "/", + BinaryOperator::Remainder => "%", + BinaryOperator::Exponentiation => "**", + BinaryOperator::LeftShift => "<<", + BinaryOperator::RightShift => ">>", + BinaryOperator::UnsignedRightShift => ">>>", + BinaryOperator::BitwiseAnd => "&", + BinaryOperator::BitwiseOr => "|", + BinaryOperator::BitwiseXor => "^", + BinaryOperator::Equal => "==", + BinaryOperator::NotEqual => "!=", + BinaryOperator::StrictEqual => "===", + BinaryOperator::StrictNotEqual => "!==", + BinaryOperator::LessThan => "<", + BinaryOperator::LessThanOrEqual => "<=", + BinaryOperator::GreaterThan => ">", + BinaryOperator::GreaterThanOrEqual => ">=", + BinaryOperator::In => " in ", + BinaryOperator::InstanceOf => " instanceof ", + }; + + // Determine if we need parentheses based on operator precedence + let left_needs_parens = self.needs_parentheses(&binary.left, &binary.operator, true); + let right_needs_parens = self.needs_parentheses(&binary.right, &binary.operator, false); + + // Unparse left operand + if left_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &binary.left); + unparser.write_char(')'); + } else { + self.unparse(unparser, &binary.left); + } + + // Add space before operator for readability + if !matches!(binary.operator, BinaryOperator::In | BinaryOperator::InstanceOf) { + unparser.space(); + } + + // Write the operator + unparser.write_str(operator_str); + + // Add space after operator for readability + if !matches!(binary.operator, BinaryOperator::In | BinaryOperator::InstanceOf) { + unparser.space(); + } + + // Unparse right operand + if right_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &binary.right); + unparser.write_char(')'); + } else { + self.unparse(unparser, &binary.right); + } + } + + fn unparse_logical_expression(&self, unparser: &mut Unparser, logical: &LogicalExpression) { + let operator_str = match logical.operator { + LogicalOperator::And => "&&", + LogicalOperator::Or => "||", + LogicalOperator::NullishCoalescing => "??", + }; + + // Determine if we need parentheses based on operator precedence + let left_needs_parens = self.needs_logical_parentheses(&logical.left, &logical.operator, true); + let right_needs_parens = self.needs_logical_parentheses(&logical.right, &logical.operator, false); + + // Unparse left operand + if left_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &logical.left); + unparser.write_char(')'); + } else { + self.unparse(unparser, &logical.left); + } + + // Add space before operator + unparser.space(); + + // Write the operator + unparser.write_str(operator_str); + + // Add space after operator + unparser.space(); + + // Unparse right operand + if right_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &logical.right); + unparser.write_char(')'); + } else { + self.unparse(unparser, &logical.right); + } + } + + fn unparse_conditional_expression(&self, unparser: &mut Unparser, cond: &ConditionalExpression) { + // Determine if test needs parentheses + let test_needs_parens = matches!(&*cond.test, + Expression::AssignmentExpression(_) | + Expression::ConditionalExpression(_) | + Expression::SequenceExpression(_) + ); + + // Unparse test expression + if test_needs_parens { + unparser.write_char('('); + self.unparse(unparser, &cond.test); + unparser.write_char(')'); + } else { + self.unparse(unparser, &cond.test); + } + + // Write the question mark + unparser.space(); + unparser.write_char('?'); + unparser.space(); + + // Unparse consequent expression + self.unparse(unparser, &cond.consequent); + + // Write the colon + unparser.space(); + unparser.write_char(':'); + unparser.space(); + + // Unparse alternate expression + self.unparse(unparser, &cond.alternate); + } + + fn unparse_assignment_expression(&self, unparser: &mut Unparser, assign: &AssignmentExpression) { + // Unparse the left side + match &assign.left { + AssignmentLeft::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + }, + AssignmentLeft::Expression(expr) => { + self.unparse(unparser, expr); + } + } + + // Write the operator + unparser.space(); + match assign.operator { + AssignmentOperator::Assign => unparser.write_char('='), + AssignmentOperator::PlusAssign => unparser.write_str("+="), + AssignmentOperator::MinusAssign => unparser.write_str("-="), + AssignmentOperator::MultiplyAssign => unparser.write_str("*="), + AssignmentOperator::DivideAssign => unparser.write_str("/="), + AssignmentOperator::RemainderAssign => unparser.write_str("%="), + AssignmentOperator::ExponentiationAssign => unparser.write_str("**="), + AssignmentOperator::LeftShiftAssign => unparser.write_str("<<="), + AssignmentOperator::RightShiftAssign => unparser.write_str(">>="), + AssignmentOperator::UnsignedRightShiftAssign => unparser.write_str(">>>="), + AssignmentOperator::BitwiseAndAssign => unparser.write_str("&="), + AssignmentOperator::BitwiseOrAssign => unparser.write_str("|="), + AssignmentOperator::BitwiseXorAssign => unparser.write_str("^="), + AssignmentOperator::LogicalAndAssign => unparser.write_str("&&="), + AssignmentOperator::LogicalOrAssign => unparser.write_str("||="), + AssignmentOperator::NullishCoalescingAssign => unparser.write_str("??="), + } + unparser.space(); + + // Unparse the right side + self.unparse(unparser, &assign.right); + } + + fn unparse_sequence_expression(&self, unparser: &mut Unparser, seq: &SequenceExpression) { + for (i, expr) in seq.expressions.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + self.unparse(unparser, expr); + } + } + + // Helper method to determine if parentheses are needed for binary expressions + fn needs_parentheses(&self, expr: &Expression, parent_op: &BinaryOperator, is_left: bool) -> bool { + match expr { + Expression::BinaryExpression(binary) => { + let child_precedence = self.get_binary_precedence(&binary.operator); + let parent_precedence = self.get_binary_precedence(parent_op); + + // If the child has lower precedence, we need parentheses + if child_precedence < parent_precedence { + return true; + } + + // If they have the same precedence, we need parentheses for right-associative operators + // or for the right operand of left-associative operators + if child_precedence == parent_precedence { + // Handle right-associative operators (currently only **) + if matches!(parent_op, BinaryOperator::Exponentiation) { + return is_left; + } + // For left-associative operators, need parentheses on right side when precedences are equal + return !is_left; + } + + false + }, + Expression::LogicalExpression(_) | + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) | + Expression::SequenceExpression(_) => true, + _ => false, + } + } + + // Helper method to determine if parentheses are needed for logical expressions + fn needs_logical_parentheses(&self, expr: &Expression, parent_op: &LogicalOperator, is_left: bool) -> bool { + match expr { + Expression::LogicalExpression(logical) => { + let child_precedence = self.get_logical_precedence(&logical.operator); + let parent_precedence = self.get_logical_precedence(parent_op); + + // If the child has lower precedence, we need parentheses + if child_precedence < parent_precedence { + return true; + } + + // If they have the same precedence, we need parentheses for the right operand + // of left-associative operators (all logical operators are left-associative) + if child_precedence == parent_precedence && !is_left { + return true; + } + + false + }, + Expression::ConditionalExpression(_) | + Expression::AssignmentExpression(_) | + Expression::SequenceExpression(_) => true, + _ => false, + } + } + + // Helper method to get binary operator precedence + fn get_binary_precedence(&self, op: &BinaryOperator) -> u8 { + match op { + BinaryOperator::Exponentiation => 14, + BinaryOperator::Multiplication | BinaryOperator::Division | BinaryOperator::Remainder => 13, + BinaryOperator::Addition | BinaryOperator::Subtraction => 12, + BinaryOperator::LeftShift | BinaryOperator::RightShift | BinaryOperator::UnsignedRightShift => 11, + BinaryOperator::LessThan | BinaryOperator::LessThanOrEqual | + BinaryOperator::GreaterThan | BinaryOperator::GreaterThanOrEqual | + BinaryOperator::In | BinaryOperator::InstanceOf => 10, + BinaryOperator::Equal | BinaryOperator::NotEqual | + BinaryOperator::StrictEqual | BinaryOperator::StrictNotEqual => 9, + BinaryOperator::BitwiseAnd => 8, + BinaryOperator::BitwiseXor => 7, + BinaryOperator::BitwiseOr => 6, + //_ => 0, // Should not happen + } + } + + // Helper method to get logical operator precedence + fn get_logical_precedence(&self, op: &LogicalOperator) -> u8 { + match op { + LogicalOperator::And => 5, + LogicalOperator::Or => 4, + LogicalOperator::NullishCoalescing => 3, + } + } +} diff --git a/src/grammar/function.rs b/src/grammar/function.rs index c4d9462..383a734 100644 --- a/src/grammar/function.rs +++ b/src/grammar/function.rs @@ -1,21 +1,24 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::pattern::*; use super::expression::*; use super::statement::*; -/// Parser for function expressions -pub struct FunctionExpressionParser; +pub struct FunctionExpressionNode; -impl FunctionExpressionParser { +impl FunctionExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for FunctionExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { +impl ParserCombinator for FunctionExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check if this is an async function + let async_function = parser.consume(&Token::Async); + // Consume the 'function' keyword parser.assert_consume(&Token::Function, "Expected 'function'")?; @@ -24,7 +27,7 @@ impl ParserCombinator for FunctionExpressionParser { // Parse the function name if present let id = if matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; @@ -36,18 +39,18 @@ impl ParserCombinator for FunctionExpressionParser { if !parser.check(&Token::RightParen) { // Parse the first parameter - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); // Parse additional parameters while parser.consume(&Token::Comma) { - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); } } parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; - let body = parser.with_context(LexicalContext::FunctionBody { allow_await: false, allow_yield: generator }, |p| { - BlockStatementParser::new().parse(p) + let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: generator }, |p| { + BlockStatementNode::new().parse(p) })?; Ok(FunctionExpression { @@ -55,79 +58,67 @@ impl ParserCombinator for FunctionExpressionParser { params, body, generator, - async_function: false, + async_function, }) } } -/// Parser for async function expressions -pub struct AsyncFunctionExpressionParser; - -impl AsyncFunctionExpressionParser { - pub fn new() -> Self { - Self - } -} -impl ParserCombinator for AsyncFunctionExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - // Consume the 'async' keyword - parser.assert_consume(&Token::Async, "Expected 'async'")?; +impl UnparserCombinator for FunctionExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &FunctionExpression) { + // Write async if it's an async function + if node.async_function { + unparser.write_str("async"); + unparser.write_char(' '); + } - // Consume the 'function' keyword - parser.assert_consume(&Token::Function, "Expected 'function' after 'async'")?; + // Write the function keyword + unparser.write_str("function"); - // Check if this is a generator function - let generator = parser.consume(&Token::Star); + // Write * if it's a generator function + if node.generator { + unparser.write_char('*'); + } - // Parse the function name if present - let id = if matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) - } else { - None - }; - - // Parse the parameter list - parser.assert_consume(&Token::LeftParen, "Expected '(' after function name")?; + // Write the function name if present + if let Some(id) = &node.id { + //unparser.space(); + unparser.write_char(' '); + unparser.write_str(&id.name); + } - let mut params = Vec::new(); + // Write the parameter list + unparser.write_char('('); - if !parser.check(&Token::RightParen) { - // Parse the first parameter - params.push(PatternParser::new().parse(parser)?); + // Write parameters + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); - // Parse additional parameters - while parser.consume(&Token::Comma) { - params.push(PatternParser::new().parse(parser)?); + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); } } - parser.assert_consume(&Token::RightParen, "Expected ')' after function parameters")?; - - let body = parser.with_context(LexicalContext::FunctionBody { allow_await: true, allow_yield: generator }, |p| { - BlockStatementParser::new().parse(p) - })?; - - Ok(FunctionExpression { - id, - params, - body, - generator, - async_function: true, - }) + unparser.write_char(')'); + unparser.space(); + + // Write the function body + BlockStatementNode::new().unparse(unparser, &node.body); } } /// Parser for arrow function expressions -pub struct ArrowFunctionExpressionParser; +pub struct ArrowFunctionExpressionNode; -impl ArrowFunctionExpressionParser { +impl ArrowFunctionExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ArrowFunctionExpressionParser { +impl ParserCombinator for ArrowFunctionExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Check for async arrow function let async_function = parser.consume(&Token::Async); @@ -139,7 +130,7 @@ impl ParserCombinator for ArrowFunctionExpressionParser // Multiple parameters in parentheses if !parser.check(&Token::RightParen) { // Parse the first parameter - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); // Parse additional parameters while parser.consume(&Token::Comma) { @@ -149,14 +140,14 @@ impl ParserCombinator for ArrowFunctionExpressionParser } // Parse the next parameter - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); } } parser.assert_consume(&Token::RightParen, "Expected ')' after arrow function parameters")?; } else { // Single parameter without parentheses - params.push(PatternParser::new().parse(parser)?); + params.push(PatternNode::new().parse(parser)?); } // Consume the arrow @@ -166,10 +157,10 @@ impl ParserCombinator for ArrowFunctionExpressionParser let body = parser.with_context(LexicalContext::FunctionBody { allow_await: async_function, allow_yield: false }, |p| { if p.check(&Token::LeftBrace) { - let block = BlockStatementParser::new().parse(p)?; + let block = BlockStatementNode::new().parse(p)?; Ok(ArrowFunctionBody::BlockStatement(block)) } else { - let expr = ExpressionParser::new().parse(p)?; + let expr = ExpressionNode::new().parse(p)?; Ok(ArrowFunctionBody::Expression(Box::new(expr))) } })?; @@ -183,60 +174,48 @@ impl ParserCombinator for ArrowFunctionExpressionParser } } - -/// Parser for await expressions -pub struct AwaitExpressionParser; - -impl AwaitExpressionParser { - pub fn new() -> Self { - Self - } -} - -impl ParserCombinator for AwaitExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - // Consume the 'await' keyword - parser.assert_consume(&Token::Await, "Expected 'await'")?; +impl UnparserCombinator for ArrowFunctionExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrowFunctionExpression) { + // Write async if it's an async arrow function + if node.async_function { + unparser.write_str("async"); + unparser.space(); + } - // Parse the argument - let argument = Box::new(ExpressionParser::new().parse(parser)?); - - Ok(AwaitExpression { - argument, - }) - } -} - -/// Parser for yield expressions -pub struct YieldExpressionParser; - -impl YieldExpressionParser { - pub fn new() -> Self { - Self - } -} - -impl ParserCombinator for YieldExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - // Consume the 'yield' keyword - parser.assert_consume(&Token::Yield, "Expected 'yield'")?; - - // Check for delegate (yield*) - let delegate = parser.consume(&Token::Star); - - // Parse the argument if present - let argument = if parser.check(&Token::Semicolon) || parser.is_at_end() || - parser.check(&Token::RightBrace) || parser.check(&Token::Comma) || - parser.check(&Token::RightParen) || parser.check(&Token::RightBracket) || - parser.check(&Token::Colon) || parser.previous_line_terminator() { - None + // Write the parameter list + if node.params.len() == 1 && node.expression { + // Single parameter without parentheses for expression body arrow functions + PatternNode::new().unparse(unparser, &node.params[0]); } else { - Some(Box::new(ExpressionParser::new().parse(parser)?)) - }; + // Multiple parameters or block body requires parentheses + unparser.write_char('('); + + if !node.params.is_empty() { + PatternNode::new().unparse(unparser, &node.params[0]); + + for param in &node.params[1..] { + unparser.write_char(','); + unparser.space(); + PatternNode::new().unparse(unparser, param); + } + } + + unparser.write_char(')'); + } - Ok(YieldExpression { - argument, - delegate, - }) + // Write the arrow + unparser.space(); + unparser.write_str("=>"); + unparser.space(); + + // Write the function body + match &node.body { + ArrowFunctionBody::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + ArrowFunctionBody::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + } + } } } diff --git a/src/grammar/literal.rs b/src/grammar/literal.rs index caeb232..9ed2f2e 100644 --- a/src/grammar/literal.rs +++ b/src/grammar/literal.rs @@ -1,60 +1,75 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; -pub struct LiteralParser; +pub struct LiteralNode; -impl LiteralParser { +impl LiteralNode { pub fn new() -> Self { Self } + + fn escape_string(&self, s: &str) -> String { + let mut result = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '\n' => result.push_str("\\n"), + '\r' => result.push_str("\\r"), + '\t' => result.push_str("\\t"), + '\\' => result.push_str("\\\\"), + '"' => result.push_str("\\\""), + '\'' => result.push_str("\\'"), + '\0' => result.push_str("\\0"), + '\u{08}' => result.push_str("\\b"), // backspace + '\u{0C}' => result.push_str("\\f"), // form feed + c if c.is_control() => { + // Use Unicode escape sequence for other control characters + let code = c as u32; + result.push_str(&format!("\\u{:04x}", code)); + }, + _ => result.push(c), + } + } + result + } } -impl ParserCombinator for LiteralParser { +impl ParserCombinator for LiteralNode { fn parse(&self, parser: &mut Parser) -> ParseResult { match parser.peek() { Token::StringLiteral(value) => { let value_clone = value.clone(); parser.advance(); - - return Ok(Literal::StringLiteral(StringLiteral { - value: value_clone.into_boxed_str(), - })); + return Ok(Literal::StringLiteral(StringLiteral { value: value_clone.into_boxed_str() })); }, Token::NumberLiteral(value) => { let value_copy = *value; parser.advance(); - return Ok(Literal::NumericLiteral(NumericLiteral { - value: value_copy, - })); + return Ok(Literal::NumericLiteral(NumericLiteral { value: value_copy })); }, Token::BigIntLiteral(value) => { let value_clone = value.clone(); parser.advance(); - return Ok(Literal::BigIntLiteral(BigIntLiteral { - value: value_clone.into_boxed_str(), - })); + return Ok(Literal::BigIntLiteral(BigIntLiteral { value: value_clone.into_boxed_str() })); }, Token::RegExpLiteral(pattern, flags) => { let pattern_clone = pattern.clone(); let flags_clone = flags.clone(); parser.advance(); - return Ok(Literal::RegExpLiteral(RegExpLiteral { - pattern: pattern_clone.into_boxed_str(), - flags: flags_clone.into_boxed_str(), - })); + return Ok(Literal::RegExpLiteral(RegExpLiteral { pattern: pattern_clone.into_boxed_str(), flags: flags_clone.into_boxed_str() })); }, Token::True => { parser.advance(); - return Ok(Literal::BooleanLiteral(BooleanLiteral { - value: true, - })); + return Ok(Literal::BooleanLiteral(BooleanLiteral { value: true })); }, Token::False => { parser.advance(); - return Ok(Literal::BooleanLiteral(BooleanLiteral { - value: false, - })); + return Ok(Literal::BooleanLiteral(BooleanLiteral { value: false })); + }, + Token::Undefined => { + parser.advance(); + return Ok(Literal::UndefinedLiteral(UndefinedLiteral {})); }, Token::Null => { parser.advance(); @@ -64,3 +79,40 @@ impl ParserCombinator for LiteralParser { }; } } + +impl UnparserCombinator for LiteralNode { + fn unparse(&self, unparser: &mut Unparser, expr: &Expression) { + if let Expression::Literal(lit) = expr { + match lit { + Literal::StringLiteral(value) => { + let escaped = self.escape_string(&value.value); + unparser.write_char('"'); + unparser.write_str(&escaped); + unparser.write_char('"'); + }, + Literal::NumericLiteral(value) => { + unparser.write_str(&value.value.to_string()); + }, + Literal::BooleanLiteral(value) => { + unparser.write_str(if value.value { "true" } else { "false" }); + }, + Literal::NullLiteral(_) => { + unparser.write_str("null"); + }, + Literal::UndefinedLiteral(_) => { + unparser.undefined(); + }, + Literal::RegExpLiteral(value) => { + unparser.write_char('/'); + unparser.write_str(&value.pattern); + unparser.write_char('/'); + unparser.write_str(&value.flags); + }, + Literal::BigIntLiteral(value) => { + unparser.write_str(&value.value); + unparser.write_char('n'); + }, + } + } + } +} \ No newline at end of file diff --git a/src/grammar/member.rs b/src/grammar/member.rs deleted file mode 100644 index 15b9265..0000000 --- a/src/grammar/member.rs +++ /dev/null @@ -1,67 +0,0 @@ -use crate::ast::*; -use crate::lexer::*; -use crate::parser::*; -use super::expression::*; -use super::declaration::*; -use super::pattern::*; -use super::call::*; - -pub struct MemberExpressionParser; - -impl MemberExpressionParser { - pub fn new() -> Self { - Self - } - - /// Parse a member expression with a given object - pub fn parse_with_object(&self, parser: &mut Parser, object: Expression) -> ParseResult { - // Check if this is an optional chain - // Note: In many cases, the QuestionDot has already been consumed by the caller - let optional = parser.consume(&Token::QuestionDot); - - // Parse the property access - let (property, computed) = if parser.consume(&Token::LeftBracket) { - // Computed property access: obj[expr] or obj?.[expr] - let expr = ExpressionParser::new().parse(parser)?; - parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; - (MemberProperty::Expression(Box::new(expr)), true) - } else if parser.check(&Token::LeftParen) && optional { - // Optional function call: obj?.(args) - // Let the call expression parser handle it - return Err(parser.error_at_current("Expected property name after optional chaining operator")); - } else if optional || parser.consume(&Token::Dot) { - // Static property access: obj.prop or obj?.prop - // At this point, we should be directly at the identifier - if let Token::Identifier(_) = parser.peek() { - let ident = IdentifierParser::new().parse(parser)?; - (MemberProperty::Identifier(ident), false) - } else { - return Err(parser.error_at_current("Expected identifier after '.' or '?.'")); - } - } else { - // If we're here, we're expecting a direct property access without a dot - // This happens when the caller has already consumed the QuestionDot - if let Token::Identifier(_) = parser.peek() { - let ident = IdentifierParser::new().parse(parser)?; - (MemberProperty::Identifier(ident), false) - } else { - return Err(parser.error_at_current("Expected '.' or '[' in member expression")); - } - }; - - Ok(MemberExpression { - object: Box::new(object), - property, - computed, - optional, - }) - } - -} - -impl ParserCombinator for MemberExpressionParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - let object = ExpressionParser::new().parse(parser)?; - self.parse_with_object(parser, object) - } -} diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index cb13d5d..9203c7e 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,5 +1,4 @@ mod expression; -mod member; mod pattern; mod statement; mod declaration; @@ -8,13 +7,15 @@ mod function; mod class; mod array; mod object; -mod call; +mod property; mod module; +mod script; mod this; mod new; +mod await_expression; +mod yield_expression; pub use self::expression::*; -pub use self::member::*; pub use self::pattern::*; pub use self::statement::*; pub use self::declaration::*; @@ -23,7 +24,10 @@ pub use self::class::*; pub use self::array::*; pub use self::function::*; pub use self::object::*; -pub use self::call::*; +pub use self::property::*; pub use self::module::*; +pub use self::script::*; pub use self::this::*; -pub use self::new::*; \ No newline at end of file +pub use self::new::*; +pub use self::await_expression::*; +pub use self::yield_expression::*; diff --git a/src/grammar/module.rs b/src/grammar/module.rs index 4b799e1..9704851 100644 --- a/src/grammar/module.rs +++ b/src/grammar/module.rs @@ -1,51 +1,35 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::statement::*; -pub struct ModuleParser; +pub struct ModuleNode; -impl ModuleParser { +impl ModuleNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ModuleParser { - +impl ParserCombinator for ModuleNode { fn parse(&self, parser: &mut Parser) -> ParseResult { - let body = parser.with_context(LexicalContext::ModuleBody { allow_await: true }, |p| { let mut result = Vec::new(); while !p.is_at_end() { - let statement = StatementParser::new().parse(p)?; - result.push(statement); + result.push(StatementNode::new().parse(p)?); } Ok(result) })?; - Ok(Program { source_type: SourceType::Module, body }) } } -pub struct ScriptParser; - -impl ScriptParser { - pub fn new() -> Self { - Self - } -} - -impl ParserCombinator for ScriptParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - let mut body = Vec::new(); - - while !parser.is_at_end() { - let statement = StatementParser::new().parse(parser)?; - body.push(statement); +impl UnparserCombinator for ModuleNode { + fn unparse(&self, unparser: &mut Unparser, program: &Program) { + for stmt in &program.body { + StatementNode::new().unparse(unparser, stmt); + unparser.newline(); } - - Ok(Program { source_type: SourceType::Script, body }) } -} - +} \ No newline at end of file diff --git a/src/grammar/new.rs b/src/grammar/new.rs index 6f049cb..1c01fee 100644 --- a/src/grammar/new.rs +++ b/src/grammar/new.rs @@ -1,44 +1,53 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::expression::*; -pub struct NewExpressionParser; +pub struct NewExpressionNode; -impl NewExpressionParser { +impl NewExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for NewExpressionParser { +impl ParserCombinator for NewExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::New, "Expected 'new'")?; - - // Parse the callee with appropriate precedence - // Use a higher precedence than Call to ensure we don't consume too much - let callee = Box::new(ExpressionParser::new().parse_with_precedence(parser, Precedence::Call)?); - + let callee = Box::new(ExpressionNode::new().parse_with_precedence(parser, Precedence::Call)?); let mut arguments = Vec::new(); - - // Only parse arguments if there are parentheses if parser.check(&Token::LeftParen) { parser.assert_consume(&Token::LeftParen, "Expected '(' after new expression")?; - if !parser.check(&Token::RightParen) { - arguments.push(ExpressionParser::new().parse(parser)?); - + arguments.push(ExpressionNode::new().parse(parser)?); while parser.consume(&Token::Comma) && !parser.check(&Token::RightParen) { - arguments.push(ExpressionParser::new().parse(parser)?); + arguments.push(ExpressionNode::new().parse(parser)?); } } - parser.assert_consume(&Token::RightParen, "Expected ')' after new expression arguments")?; } - - Ok(NewExpression { - callee, - arguments, - }) + Ok(NewExpression { callee, arguments }) } } + +impl UnparserCombinator for NewExpressionNode { + fn unparse(&self, unparser: &mut Unparser, expr: &Expression) { + if let Expression::NewExpression(new_expr) = expr { + unparser.write_str("new"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &new_expr.callee); + unparser.write_char('('); + if !new_expr.arguments.is_empty() { + for (i, arg) in new_expr.arguments.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + ExpressionNode::new().unparse(unparser, arg); + } + } + unparser.write_char(')'); + } + } +} \ No newline at end of file diff --git a/src/grammar/object.rs b/src/grammar/object.rs index 5f40332..4497c0a 100644 --- a/src/grammar/object.rs +++ b/src/grammar/object.rs @@ -1,31 +1,24 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; -use super::expression::*; -use super::function::*; -use super::literal::*; +use crate::unparser::*; +use super::property::*; -/// Parser for object expressions -pub struct ObjectExpressionParser; +pub struct ObjectExpressionNode; -impl ObjectExpressionParser { +impl ObjectExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ObjectExpressionParser { +impl ParserCombinator for ObjectExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { - parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object expression")?; - let mut properties = Vec::new(); - while !parser.check(&Token::RightBrace) && !parser.is_at_end() { - - let property = PropertyParser::new().parse(parser)?; + let property = PropertyNode::new().parse(parser)?; properties.push(property.clone()); - if parser.consume(&Token::Comma) { if parser.check(&Token::RightBrace) { break; @@ -34,133 +27,33 @@ impl ParserCombinator for ObjectExpressionParser { break; } } - parser.assert_consume(&Token::RightBrace, "Expected '}' at the end of object expression")?; - Ok(ObjectExpression { properties }) } } -/// Parser for object properties -pub struct PropertyParser; - -impl PropertyParser { - pub fn new() -> Self { - Self - } - - fn parse_property_value(&self, parser: &mut Parser) -> ParseResult { - ExpressionParser::new().parse_with_precedence(parser, Precedence::Assignment) - } - -} - - -/// Parser for object properties -impl ParserCombinator for PropertyParser { - fn parse(&self, parser: &mut Parser) -> ParseResult { - // Check for special property types (getter, setter, async, generator) - let mut method = false; - let mut kind = PropertyKind::Init; - - // Parse property modifiers - if parser.consume(&Token::Get) { - kind = PropertyKind::Get; - } else if parser.consume(&Token::Set) { - kind = PropertyKind::Set; - } else if parser.consume(&Token::Async) { - method = true; - } else if parser.consume(&Token::Star) { - method = true; - } - - // Parse the property key (computed or not) - let computed = parser.consume(&Token::LeftBracket); - - let key = if computed { - // Computed property key: [expr] - let expr = ExpressionParser::new().parse(parser)?; - parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; - PropertyKey::Expression(Box::new(expr)) - } else { - // Regular property key: identifier, string, or number - match parser.peek() { - Token::StringLiteral(_) | - Token::NumberLiteral(_) => { - // String or number literal as key - let literal = LiteralParser::new().parse(parser)?; - PropertyKey::Literal(literal) - }, - Token::Identifier(_) => { - // Identifier as key - let name = match parser.peek() { - Token::Identifier(ident) => ident.clone(), - _ => unreachable!() - }; - parser.advance(); // Consume the identifier - PropertyKey::Identifier(Identifier { name: name.into() }) - }, - _ => return Err(parser.error_at_current("Expected property name")) +impl UnparserCombinator for ObjectExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectExpression) { + unparser.write_char('{'); + if !node.properties.is_empty() { + let multiline = node.properties.len() > 1; + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + PropertyNode::new().unparse(u, &node.properties[0]); + for property in &node.properties[1..] { + u.write_char(','); + u.newline(); + PropertyNode::new().unparse(u, property); + } + }); + unparser.newline(); + } else { + unparser.space(); + PropertyNode::new().unparse(unparser, &node.properties[0]); + unparser.space(); } - }; - - // Check if this is a method (has parentheses after the key) - if parser.check(&Token::LeftParen) { - method = true; } - - // Check if this is a shorthand property (no colon after key) - let shorthand = !computed && !method && !parser.check(&Token::Colon) && - matches!(kind, PropertyKind::Init); - - // Parse the property value - let value = if shorthand { - // Shorthand property: { x } is equivalent to { x: x } - match &key { - PropertyKey::Identifier(ident) => { - Box::new(Expression::Identifier(Identifier { name: ident.name.clone() })) - }, - _ => return Err(parser.error_at_current("Invalid shorthand property")) - } - } else if method { - // Method definition: { method() { ... } } - parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; - let func_expr = FunctionExpressionParser::new().parse(parser)?; - Box::new(Expression::FunctionExpression(func_expr)) - } else { - // Regular property: { key: value } - parser.assert_consume(&Token::Colon, "Expected ':' after property key")?; - - // Check for arrow function - let pos = parser.save_position(); - if matches!(parser.peek(), Token::Identifier(_)) && - parser.peek_next(1) == &Token::Arrow { - // This might be an arrow function - if let Ok(arrow_func) = ArrowFunctionExpressionParser::new().parse(parser) { - return Ok(Property { - key, - value: Box::new(Expression::ArrowFunctionExpression(arrow_func)), - kind, - method, - shorthand, - computed, - }); - } - parser.restore_position(pos); - } - - // Parse the value as an expression - let expr = self.parse_property_value(parser)?; - Box::new(expr) - }; - - Ok(Property { - key, - value, - kind, - method, - shorthand, - computed, - }) + unparser.write_char('}'); } } diff --git a/src/grammar/pattern.rs b/src/grammar/pattern.rs index 28ff5da..cdb5ef0 100644 --- a/src/grammar/pattern.rs +++ b/src/grammar/pattern.rs @@ -1,48 +1,47 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::literal::*; use super::expression::*; -use super::member::*; -/// Parser for JavaScript patterns (destructuring) -pub struct PatternParser; +pub struct PatternNode; -impl PatternParser { +impl PatternNode { pub fn new() -> Self { Self } } -impl ParserCombinator for PatternParser { +impl ParserCombinator for PatternNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Try to parse as object pattern if parser.check(&Token::LeftBrace) { - return ObjectPatternParser::new().parse(parser).map(Pattern::ObjectPattern); + return ObjectPatternNode::new().parse(parser).map(Pattern::ObjectPattern); } // Try to parse as array pattern if parser.check(&Token::LeftBracket) { - return ArrayPatternParser::new().parse(parser).map(Pattern::ArrayPattern); + return ArrayPatternNode::new().parse(parser).map(Pattern::ArrayPattern); } // Try to parse as rest element if parser.check(&Token::Ellipsis) { - return RestElementParser::new().parse(parser).map(Pattern::RestElement); + return RestElementNode::new().parse(parser).map(Pattern::RestElement); } // Try to parse as identifier or assignment pattern // First parse an identifier let pos = parser.save_position(); - if let Ok(ident) = IdentifierParser::new().parse(parser) { + if let Ok(ident) = IdentifierNode::new().parse(parser) { // Check if this is an assignment pattern if parser.check(&Token::Equal) { // Consume the equals sign parser.assert_consume(&Token::Equal, "Expected '=' in assignment pattern")?; // Parse the right side (must be a valid expression) - let right = ExpressionParser::new().parse(parser)?; + let right = ExpressionNode::new().parse(parser)?; return Ok(Pattern::AssignmentPattern(AssignmentPattern { left: Box::new(Pattern::Identifier(ident)), @@ -56,9 +55,9 @@ impl ParserCombinator for PatternParser { // Restore position after failed identifier attempt parser.restore_position(pos); - + // Try to parse as member expression (only valid in some contexts) - let result = MemberExpressionParser::new().parse(parser); + let result = MemberPatternNode::new().parse(parser); if result.is_ok() { return result.map(Pattern::MemberExpression); } @@ -68,16 +67,74 @@ impl ParserCombinator for PatternParser { } } + +pub struct MemberPatternNode; + +impl MemberPatternNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for MemberPatternNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let object = ExpressionNode::new().parse(parser)?; + + // Parse the property access (without optional chaining) + let (property, computed) = if parser.consume(&Token::LeftBracket) { + // Computed property access: obj[expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property")?; + (MemberProperty::Expression(Box::new(expr)), true) + } else if parser.consume(&Token::Dot) { + // Static property access: obj.prop + // At this point, we should be directly at the identifier + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected identifier after '.'")); + } + } else { + // If we're here, we're expecting a direct property access without a dot + if let Token::Identifier(_) = parser.peek() { + let ident = IdentifierNode::new().parse(parser)?; + (MemberProperty::Identifier(ident), false) + } else if let Token::Default = parser.peek() { + // Special case for 'default' as property name + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + (MemberProperty::Identifier(Identifier { name }), false) + } else { + return Err(parser.error_at_current("Expected '.' or '[' in member pattern")); + } + }; + + Ok(MemberExpression { + object: Box::new(object), + property, + computed, + optional: false, + }) + } +} + + /// Parser for object patterns -pub struct ObjectPatternParser; +pub struct ObjectPatternNode; -impl ObjectPatternParser { +impl ObjectPatternNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ObjectPatternParser { +impl ParserCombinator for ObjectPatternNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Consume the opening brace parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of object pattern")?; @@ -88,20 +145,15 @@ impl ParserCombinator for ObjectPatternParser { while !parser.check(&Token::RightBrace) && !parser.is_at_end() { // Check for rest element if parser.check(&Token::Ellipsis) { - let rest = RestElementParser::new().parse(parser)?; + let rest = RestElementNode::new().parse(parser)?; properties.push(ObjectPatternProperty::RestElement(rest)); - // After rest element, we can only have a closing brace - if !parser.check(&Token::RightBrace) { - // Consume comma if present - if parser.consume(&Token::Comma) { - return Err(parser.error_at_current("Rest element must be the last element in an object pattern")); - } - } + // After rest element, allow a trailing comma (ES2018+) + parser.consume(&Token::Comma); break; } else { // Parse regular property - let property = ObjectPropertyParser::new().parse(parser)?; + let property = ObjectPropertyNode::new().parse(parser)?; properties.push(ObjectPatternProperty::Property(property)); // If there's no comma, we should be at the end @@ -121,15 +173,15 @@ impl ParserCombinator for ObjectPatternParser { } /// Parser for object pattern properties -pub struct ObjectPropertyParser; +pub struct ObjectPropertyNode; -impl ObjectPropertyParser { +impl ObjectPropertyNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ObjectPropertyParser { +impl ParserCombinator for ObjectPropertyNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Check if we have a computed property let computed = parser.consume(&Token::LeftBracket); @@ -137,7 +189,7 @@ impl ParserCombinator for ObjectPropertyParser { // Parse the key let key = if computed { // Parse expression inside brackets - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; PropertyKey::Expression(Box::new(expr)) } else { @@ -145,11 +197,17 @@ impl ParserCombinator for ObjectPropertyParser { match parser.peek() { Token::StringLiteral(_) | Token::NumberLiteral(_) => { - let literal = LiteralParser::new().parse(parser)?; + let literal = LiteralNode::new().parse(parser)?; PropertyKey::Literal(literal) }, + Token::Default => { + // Special case for 'default' as property key + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + PropertyKey::Identifier(Identifier { name }) + }, _ => { - let ident = IdentifierParser::new().parse(parser)?; + let ident = IdentifierNode::new().parse(parser)?; PropertyKey::Identifier(ident) } } @@ -174,7 +232,7 @@ impl ParserCombinator for ObjectPropertyParser { parser.assert_consume(&Token::Colon, "Expected ':' after property key in object pattern")?; // Parse the pattern - PatternParser::new().parse(parser)? + PatternNode::new().parse(parser)? }; Ok(ObjectProperty { @@ -187,15 +245,15 @@ impl ParserCombinator for ObjectPropertyParser { } /// Parser for array patterns -pub struct ArrayPatternParser; +pub struct ArrayPatternNode; -impl ArrayPatternParser { +impl ArrayPatternNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ArrayPatternParser { +impl ParserCombinator for ArrayPatternNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::LeftBracket, "Expected '[' at start of array pattern")?; @@ -207,7 +265,7 @@ impl ParserCombinator for ArrayPatternParser { elements.push(None); } else if parser.consume(&Token::Ellipsis) { // Handle rest element - let argument = Box::new(PatternParser::new().parse(parser)?); + let argument = Box::new(PatternNode::new().parse(parser)?); elements.push(Some(Pattern::RestElement(RestElement { argument }))); // Rest element must be the last one @@ -217,7 +275,7 @@ impl ParserCombinator for ArrayPatternParser { break; } else { // Parse regular element - let element = PatternParser::new().parse(parser)?; + let element = PatternNode::new().parse(parser)?; elements.push(Some(element)); // If there's no comma, we should be at the end @@ -235,21 +293,21 @@ impl ParserCombinator for ArrayPatternParser { /// Parser for rest elements -pub struct RestElementParser; +pub struct RestElementNode; -impl RestElementParser { +impl RestElementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for RestElementParser { +impl ParserCombinator for RestElementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Consume the ellipsis parser.assert_consume(&Token::Ellipsis, "Expected '...' for rest element")?; // Parse the argument pattern - let argument = PatternParser::new().parse(parser)?; + let argument = PatternNode::new().parse(parser)?; Ok(RestElement { argument: Box::new(argument), @@ -258,15 +316,15 @@ impl ParserCombinator for RestElementParser { } /// Parser for identifiers -pub struct IdentifierParser; +pub struct IdentifierNode; -impl IdentifierParser { +impl IdentifierNode { pub fn new() -> Self { Self } } -impl ParserCombinator for IdentifierParser { +impl ParserCombinator for IdentifierNode { fn parse(&self, parser: &mut Parser) -> ParseResult { let name = match parser.peek() { @@ -283,3 +341,262 @@ impl ParserCombinator for IdentifierParser { }) } } + +// Main pattern unparser +impl UnparserCombinator for PatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &Pattern) { + match node { + Pattern::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + Pattern::ObjectPattern(pattern) => { + ObjectPatternNode::new().unparse(unparser, pattern); + }, + Pattern::ArrayPattern(pattern) => { + ArrayPatternNode::new().unparse(unparser, pattern); + }, + Pattern::RestElement(rest) => { + RestElementNode::new().unparse(unparser, rest); + }, + Pattern::AssignmentPattern(pattern) => { + // Left side (typically an identifier) + match &*pattern.left { + Pattern::Identifier(ident) => { + unparser.write_str(&ident.name); + }, + _ => { + PatternNode::new().unparse(unparser, &pattern.left); + } + } + + // Equals sign and default value + unparser.space(); + unparser.write_char('='); + unparser.space(); + ExpressionNode::new().unparse(unparser, &pattern.right); + }, + Pattern::MemberExpression(expr) => { + MemberPatternNode::new().unparse(unparser, expr); + } + } + } +} + +// Member expression pattern unparser +impl UnparserCombinator for MemberPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &MemberExpression) { + // Unparse the object part + ExpressionNode::new().unparse(unparser, &node.object); + + // Unparse the property access + match &node.property { + MemberProperty::Identifier(ident) => { + // Static property access: obj.prop + unparser.write_char('.'); + unparser.write_str(&ident.name); + }, + MemberProperty::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + MemberProperty::Expression(expr) => { + // Computed property access: obj[expr] + unparser.write_char('['); + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(']'); + } + } + } +} + +// Object pattern unparser +impl UnparserCombinator for ObjectPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectPattern) { + unparser.write_char('{'); + + if !node.properties.is_empty() { + let multiline = node.properties.len() > 3; + + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + // Process all properties + for (i, prop) in node.properties.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + + match prop { + ObjectPatternProperty::Property(property) => { + ObjectPropertyNode::new().unparse(u, property); + }, + ObjectPatternProperty::RestElement(rest) => { + RestElementNode::new().unparse(u, rest); + } + } + } + }); + unparser.newline(); + } else { + // Compact format for few properties + unparser.space(); + + for (i, prop) in node.properties.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + + match prop { + ObjectPatternProperty::Property(property) => { + ObjectPropertyNode::new().unparse(unparser, property); + }, + ObjectPatternProperty::RestElement(rest) => { + RestElementNode::new().unparse(unparser, rest); + } + } + } + + unparser.space(); + } + } + + unparser.write_char('}'); + } +} + +// Object property pattern unparser +impl UnparserCombinator for ObjectPropertyNode { + fn unparse(&self, unparser: &mut Unparser, node: &ObjectProperty) { + // Handle the property key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys in computed properties + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys + match lit { + Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + } + } + } + + // Handle the property value if not shorthand + if !node.shorthand { + unparser.write_char(':'); + unparser.space(); + PatternNode::new().unparse(unparser, &node.value); + } + } +} + +// Array pattern unparser +impl UnparserCombinator for ArrayPatternNode { + fn unparse(&self, unparser: &mut Unparser, node: &ArrayPattern) { + unparser.write_char('['); + + if !node.elements.is_empty() { + let multiline = node.elements.len() > 5; + + if multiline { + unparser.newline(); + unparser.with_indent(|u| { + // Process all elements + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + u.write_char(','); + u.newline(); + } + + match elem { + Some(pattern) => { + PatternNode::new().unparse(u, pattern); + }, + None => { + // Empty slot (elision) + } + } + } + }); + unparser.newline(); + } else { + // Compact format for few elements + unparser.space(); + + for (i, elem) in node.elements.iter().enumerate() { + if i > 0 { + unparser.write_char(','); + unparser.space(); + } + + match elem { + Some(pattern) => { + PatternNode::new().unparse(unparser, pattern); + }, + None => { + // Empty slot (elision) + } + } + } + + unparser.space(); + } + } + + unparser.write_char(']'); + } +} + +// Rest element unparser +impl UnparserCombinator for RestElementNode { + fn unparse(&self, unparser: &mut Unparser, node: &RestElement) { + unparser.write_str("..."); + PatternNode::new().unparse(unparser, &node.argument); + } +} + +// Identifier unparser +impl UnparserCombinator for IdentifierNode { + fn unparse(&self, unparser: &mut Unparser, node: &Identifier) { + unparser.write_str(&node.name); + } +} diff --git a/src/grammar/property.rs b/src/grammar/property.rs new file mode 100644 index 0000000..74c93f6 --- /dev/null +++ b/src/grammar/property.rs @@ -0,0 +1,246 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; +use super::function::*; +use super::literal::*; + +pub struct PropertyNode; + +impl PropertyNode { + pub fn new() -> Self { + Self + } +} + +/// Parser for object properties +impl ParserCombinator for PropertyNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + // Check for special property types (getter, setter, async, generator) + let mut method = false; + let mut kind = PropertyKind::Init; + + // Parse property modifiers + if parser.consume(&Token::Get) { + kind = PropertyKind::Get; + } else if parser.consume(&Token::Set) { + kind = PropertyKind::Set; + } else if parser.consume(&Token::Async) { + method = true; + } else if parser.consume(&Token::Star) { + method = true; + } + + // Parse the property key (computed or not) + let computed = parser.consume(&Token::LeftBracket); + + let key = if computed { + // Computed property key: [expr] + let expr = ExpressionNode::new().parse(parser)?; + parser.assert_consume(&Token::RightBracket, "Expected ']' after computed property key")?; + PropertyKey::Expression(Box::new(expr)) + } else { + // Regular property key: identifier, string, or number + match parser.peek() { + Token::StringLiteral(_) | + Token::NumberLiteral(_) => { + let literal = LiteralNode::new().parse(parser)?; + PropertyKey::Literal(literal) + }, + Token::Identifier(_) => { + // Identifier as key + let name = match parser.peek() { + Token::Identifier(ident) => ident.clone(), + _ => unreachable!() + }; + parser.advance(); // Consume the identifier + PropertyKey::Identifier(Identifier { name: name.into() }) + }, + Token::Default => { + // Special case for 'default' as property key + parser.advance(); // Consume the 'default' token + let name = "default".to_string().into_boxed_str(); + PropertyKey::Identifier(Identifier { name }) + }, + _ => return Err(parser.error_at_current("Expected property name")) + } + }; + + // Check if this is a method (has parentheses after the key) + if parser.check(&Token::LeftParen) { + method = true; + } + + // Check if this is a shorthand property (no colon after key) + let shorthand = !computed && !method && !parser.check(&Token::Colon) && + matches!(kind, PropertyKind::Init); + + // Parse the property value + let value = if shorthand { + // Shorthand property: { x } is equivalent to { x: x } + match &key { + PropertyKey::Identifier(ident) => { + Box::new(Expression::Identifier(Identifier { name: ident.name.clone() })) + }, + _ => return Err(parser.error_at_current("Invalid shorthand property")) + } + } else if method { + // Method definition: { method() { ... } } + parser.assert_consume(&Token::LeftParen, "Expected '(' after method name")?; + let func_expr = FunctionExpressionNode::new().parse(parser)?; + Box::new(Expression::FunctionExpression(func_expr)) + } else { + // Regular property: { key: value } + parser.assert_consume(&Token::Colon, "Expected ':' after property key")?; + + // Check for arrow function + let pos = parser.save_position(); + if matches!(parser.peek(), Token::Identifier(_)) && + parser.peek_next(1) == &Token::Arrow { + // This might be an arrow function + if let Ok(arrow_func) = ArrowFunctionExpressionNode::new().parse(parser) { + return Ok(Property { + key, + value: Box::new(Expression::ArrowFunctionExpression(arrow_func)), + kind, + method, + shorthand, + computed, + }); + } + parser.restore_position(pos); + } + + // Parse the value as an expression + let expr = ExpressionNode::new().parse_with_precedence(parser, Precedence::Assignment)?; + + Box::new(expr) + }; + + Ok(Property { + key, + value, + kind, + method, + shorthand, + computed, + }) + } +} + + +impl UnparserCombinator for PropertyNode { + fn unparse(&self, unparser: &mut Unparser, node: &Property) { + // Handle property modifiers (get, set, async, generator) + match node.kind { + PropertyKind::Get => { + unparser.write_str("get"); + unparser.write_char(' '); + }, + PropertyKind::Set => { + unparser.write_str("set"); + unparser.write_char(' '); + }, + PropertyKind::Init => { + // For async methods + if node.method && !node.computed { + if let PropertyKey::Identifier(id) = &node.key { + if id.name.starts_with("async") && id.name.len() > 5 { + unparser.write_str("async"); + unparser.write_char(' '); + // Continue with the rest of the method name later + } + } + } + + // For generator methods + if node.method && !node.computed { + if let PropertyKey::Identifier(id) = &node.key { + if id.name.starts_with("*") { + unparser.write_char('*'); + // Continue with the rest of the method name later + } + } + } + } + } + + // Handle the property key + if node.computed { + unparser.write_char('['); + match &node.key { + PropertyKey::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Identifier(id) => { + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys in computed properties + match lit { + crate::ast::Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + crate::ast::Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + crate::ast::Literal::BooleanLiteral(b) => unparser.write_str(if b.value { "true" } else { "false" }), + crate::ast::Literal::BigIntLiteral(b) => { + unparser.write_str(&b.value); + unparser.write_char('n'); + }, + crate::ast::Literal::NullLiteral(_) => unparser.write_str("null"), + crate::ast::Literal::UndefinedLiteral(_) => unparser.undefined(), + crate::ast::Literal::RegExpLiteral(r) => { + unparser.write_char('/'); + unparser.write_str(&r.pattern); + unparser.write_char('/'); + unparser.write_str(&r.flags); + }, + } + } + } + unparser.write_char(']'); + } else { + match &node.key { + PropertyKey::Identifier(id) => { + // For regular identifiers + unparser.write_str(&id.name); + }, + PropertyKey::PrivateIdentifier(id) => { + // Handle private identifiers (class private fields/methods) + unparser.write_char('#'); + unparser.write_str(&id.name); + }, + PropertyKey::Literal(lit) => { + // Handle literal keys + match lit { + crate::ast::Literal::StringLiteral(s) => unparser.write_str(&format!("\"{}\"", s.value)), + crate::ast::Literal::NumericLiteral(n) => unparser.write_str(&n.value.to_string()), + _ => unparser.write_str("\"unknown\""), + } + }, + PropertyKey::Expression(_) => { + // This shouldn't happen for non-computed properties + unparser.write_str("\"error\""); + } + } + } + + // Handle the property value + if node.shorthand { + // Shorthand property: { x } instead of { x: x } + // No need to write anything else + } else if node.method { + // Method definition: { method() { ... } } + ExpressionNode::new().unparse(unparser, &node.value); + } else { + // Regular property: { key: value } + unparser.write_char(':'); + unparser.space(); + ExpressionNode::new().unparse(unparser, &node.value); + } + } +} diff --git a/src/grammar/script.rs b/src/grammar/script.rs new file mode 100644 index 0000000..a5b348a --- /dev/null +++ b/src/grammar/script.rs @@ -0,0 +1,31 @@ +use crate::ast::*; +use crate::parser::*; +use crate::unparser::*; +use super::statement::*; + +pub struct ScriptNode; + +impl ScriptNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for ScriptNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + let mut body = Vec::new(); + while !parser.is_at_end() { + body.push(StatementNode::new().parse(parser)?); + } + Ok(Program { source_type: SourceType::Script, body }) + } +} + +impl UnparserCombinator for ScriptNode { + fn unparse(&self, unparser: &mut Unparser, program: &Program) { + for stmt in &program.body { + StatementNode::new().unparse(unparser, stmt); + unparser.newline(); + } + } +} \ No newline at end of file diff --git a/src/grammar/statement.rs b/src/grammar/statement.rs index ad158e0..c63d834 100644 --- a/src/grammar/statement.rs +++ b/src/grammar/statement.rs @@ -1,22 +1,66 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; use super::expression::*; use super::declaration::*; use super::pattern::*; use super::class::*; use super::object::*; -/// Parser for JavaScript statements -pub struct StatementParser; +pub struct StatementNode; -impl StatementParser { +impl StatementNode { pub fn new() -> Self { Self } + + fn determine_for_loop_type(&self, parser: &mut Parser) -> ForLoopType { + let pos = parser.save_position(); + + // Skip the variable declaration or pattern + if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { + parser.advance(); + + while !parser.check(&Token::Semicolon) && + !parser.check(&Token::In) && + !parser.check(&Token::Of) && + !parser.check(&Token::RightParen) && + !parser.is_at_end() { + parser.advance(); + } + } else { + while !parser.check(&Token::Semicolon) && + !parser.check(&Token::In) && + !parser.check(&Token::Of) && + !parser.check(&Token::RightParen) && + !parser.is_at_end() { + parser.advance(); + } + } + + let loop_type = match parser.peek() { + Token::In => ForLoopType::ForIn, + Token::Of => ForLoopType::ForOf, + _ => ForLoopType::Standard, + }; + + // Restore position + parser.restore_position(pos); + + loop_type + } + +} + +// Enum to represent the different types of for loops +enum ForLoopType { + Standard, // for (init; test; update) + ForIn, // for (left in right) + ForOf, // for (left of right) } -impl ParserCombinator for StatementParser { +impl ParserCombinator for StatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { match parser.peek() { // Special case for object literals at the start of a statement @@ -25,7 +69,7 @@ impl ParserCombinator for StatementParser { let pos = parser.save_position(); // Attempt to parse as an object literal - match ObjectExpressionParser::new().parse(parser) { + match ObjectExpressionNode::new().parse(parser) { Ok(obj_expr) => { // Successfully parsed as object expression @@ -39,17 +83,51 @@ impl ParserCombinator for StatementParser { Err(_) => { // Failed to parse as object expression, restore position and try as block statement parser.restore_position(pos); - return BlockStatementParser::new().parse(parser).map(Statement::BlockStatement); + return BlockStatementNode::new().parse(parser).map(Statement::BlockStatement); } } }, Token::Var | Token::Let | Token::Const => { - VariableDeclarationParser::new().parse(parser).map(|decl| + VariableDeclarationNode::new().parse(parser).map(|decl| Statement::Declaration(Declaration::VariableDeclaration(decl)) ) }, + Token::Async => { + // Check if this is an async function declaration + let pos = parser.save_position(); + parser.advance(); // Skip 'async' + + if parser.check(&Token::Function) { + // This is an async function declaration or expression + parser.restore_position(pos); + + // Try to parse as function declaration first + let pos2 = parser.save_position(); + parser.advance(); // Skip 'async' + parser.advance(); // Skip 'function' + + // Check for generator function + let _is_generator = parser.consume(&Token::Star); + + // If the next token is an identifier, this is a function declaration + if let Token::Identifier(_) = parser.peek() { + parser.restore_position(pos); + FunctionDeclarationParser::new().parse(parser).map(|decl| + Statement::Declaration(Declaration::FunctionDeclaration(decl)) + ) + } else { + // Otherwise, it's a function expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + } else { + // Not a function, treat as regular expression statement + parser.restore_position(pos); + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) + } + }, Token::Function => { // Check if this is a function declaration (has an identifier) let pos = parser.save_position(); @@ -65,7 +143,7 @@ impl ParserCombinator for StatementParser { } else { // Otherwise, it's a function expression statement parser.restore_position(pos); - ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) } }, Token::Class => { @@ -76,11 +154,11 @@ impl ParserCombinator for StatementParser { // If the next token is an identifier, this is a class declaration if let Token::Identifier(_) = parser.peek() { parser.restore_position(pos); - ClassDeclarationParser::new().parse(parser).map(|decl| Statement::Declaration(Declaration::ClassDeclaration(decl))) + ClassDeclarationNode::new().parse(parser).map(|decl| Statement::Declaration(Declaration::ClassDeclaration(decl))) } else { // Otherwise, it's a class expression statement parser.restore_position(pos); - ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) } }, Token::Import => { @@ -98,53 +176,64 @@ impl ParserCombinator for StatementParser { }) }, Token::If => { - IfStatementParser::new().parse(parser).map(Statement::IfStatement) + IfStatementNode::new().parse(parser).map(Statement::IfStatement) }, Token::Switch => { - SwitchStatementParser::new().parse(parser).map(Statement::SwitchStatement) + SwitchStatementNode::new().parse(parser).map(Statement::SwitchStatement) }, Token::For => { - // Try to parse as for statement let pos = parser.save_position(); - match ForStatementParser::new().parse(parser) { - Ok(stmt) => Ok(Statement::ForStatement(stmt)), - Err(_) => { - // Try to parse as for-in statement - parser.restore_position(pos); - match ForInStatementParser::new().parse(parser) { - Ok(stmt) => Ok(Statement::ForInStatement(stmt)), - Err(_) => { - // Try to parse as for-of statement - parser.restore_position(pos); - ForOfStatementParser::new().parse(parser).map(Statement::ForOfStatement) - } - } - } + + // Consume the 'for' token + parser.advance(); + + // Expect opening parenthesis + if !parser.consume(&Token::LeftParen) { + parser.restore_position(pos); + return Err(parser.error_at_current("Expected '(' after 'for'")); + } + + // Look ahead to determine the type of for loop + let loop_type = self.determine_for_loop_type(parser); + + // Restore position to start parsing the full statement + parser.restore_position(pos); + + match loop_type { + ForLoopType::Standard => { + ForStatementNode::new().parse(parser).map(Statement::ForStatement) + }, + ForLoopType::ForIn => { + ForInStatementNode::new().parse(parser).map(Statement::ForInStatement) + }, + ForLoopType::ForOf => { + ForOfStatementNode::new().parse(parser).map(Statement::ForOfStatement) + }, } }, Token::While => { - WhileStatementParser::new().parse(parser).map(Statement::WhileStatement) + WhileStatementNode::new().parse(parser).map(Statement::WhileStatement) }, Token::Do => { - DoWhileStatementParser::new().parse(parser).map(Statement::DoWhileStatement) + DoWhileStatementNode::new().parse(parser).map(Statement::DoWhileStatement) }, Token::Try => { - TryStatementParser::new().parse(parser).map(Statement::TryStatement) + TryStatementNode::new().parse(parser).map(Statement::TryStatement) }, Token::With => { - WithStatementParser::new().parse(parser).map(Statement::WithStatement) + WithStatementNode::new().parse(parser).map(Statement::WithStatement) }, Token::Throw => { - ThrowStatementParser::new().parse(parser).map(Statement::ThrowStatement) + ThrowStatementNode::new().parse(parser).map(Statement::ThrowStatement) }, Token::Return => { - ReturnStatementParser::new().parse(parser).map(Statement::ReturnStatement) + ReturnStatementNode::new().parse(parser).map(Statement::ReturnStatement) }, Token::Break => { - BreakStatementParser::new().parse(parser).map(Statement::BreakStatement) + BreakStatementNode::new().parse(parser).map(Statement::BreakStatement) }, Token::Continue => { - ContinueStatementParser::new().parse(parser).map(Statement::ContinueStatement) + ContinueStatementNode::new().parse(parser).map(Statement::ContinueStatement) }, Token::Debugger => { parser.advance(); // Consume 'debugger' @@ -158,7 +247,7 @@ impl ParserCombinator for StatementParser { // Check for labeled statements (identifier followed by colon) Token::Identifier(_) => { let pos = parser.save_position(); - let ident = IdentifierParser::new().parse(parser)?; + let ident = IdentifierNode::new().parse(parser)?; if parser.consume(&Token::Colon) { // This is a labeled statement @@ -167,27 +256,27 @@ impl ParserCombinator for StatementParser { } else { // Not a labeled statement, restore position and parse as expression statement parser.restore_position(pos); - ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) } }, // Default to expression statement _ => { - ExpressionStatementParser::new().parse(parser).map(Statement::ExpressionStatement) + ExpressionStatementNode::new().parse(parser).map(Statement::ExpressionStatement) } } } } /// Parser for block statements -pub struct BlockStatementParser; +pub struct BlockStatementNode; -impl BlockStatementParser { +impl BlockStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for BlockStatementParser { +impl ParserCombinator for BlockStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::LeftBrace, "Expected '{' at the start of block statement")?; @@ -195,7 +284,7 @@ impl ParserCombinator for BlockStatementParser { while !parser.check(&Token::RightBrace) && !parser.is_at_end() { // Parse a statement - let statement = StatementParser::new().parse(parser)?; + let statement = StatementNode::new().parse(parser)?; body.push(statement); } @@ -205,21 +294,39 @@ impl ParserCombinator for BlockStatementParser { } } +impl UnparserCombinator for BlockStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &BlockStatement) { + unparser.write_char('{'); + unparser.newline(); + + if !node.body.is_empty() { + unparser.with_indent(|u| { + for stmt in &node.body { + StatementNode::new().unparse(u, stmt); + u.newline(); + } + }); + } + + unparser.write_char('}'); + } +} + /// Parser for expression statements -pub struct ExpressionStatementParser; +pub struct ExpressionStatementNode; -impl ExpressionStatementParser { +impl ExpressionStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ExpressionStatementParser { +impl ParserCombinator for ExpressionStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { // Check for directive prologue (string literals that might be "use strict") if let Token::StringLiteral(_) = parser.peek() { let pos = parser.save_position(); - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; // If this is followed by a semicolon or end of block, it's a directive if parser.check(&Token::Semicolon) || parser.check(&Token::RightBrace) || parser.is_at_end() { @@ -249,7 +356,7 @@ impl ParserCombinator for ExpressionStatementParser { let pos = parser.save_position(); // Try to parse as object expression - match ObjectExpressionParser::new().parse(parser) { + match ObjectExpressionNode::new().parse(parser) { Ok(obj_expr) => { // Successfully parsed as object expression // Consume the semicolon if present @@ -268,7 +375,7 @@ impl ParserCombinator for ExpressionStatementParser { } // Regular expression statement parsing - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; // Consume the semicolon if present (ASI rules apply) // In JavaScript, semicolons are optional in many cases due to Automatic Semicolon Insertion (ASI) @@ -291,27 +398,34 @@ impl ParserCombinator for ExpressionStatementParser { } /// Parser for if statements -pub struct IfStatementParser; +pub struct IfStatementNode; -impl IfStatementParser { +impl IfStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for IfStatementParser { +impl ParserCombinator for IfStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::If, "Expected 'if'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'if'")?; - let test = Box::new(ExpressionParser::new().parse(parser)?); + let test = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after if condition")?; - let consequent = Box::new(StatementParser::new().parse(parser)?); + // Check if the consequent starts with a left brace + let consequent = if parser.check(&Token::LeftBrace) { + // Force parsing as a block statement + Box::new(BlockStatementNode::new().parse(parser).map(Statement::BlockStatement)?) + } else { + // For other statement types, use the general statement parser + Box::new(StatementNode::new().parse(parser)?) + }; let alternate = if parser.consume(&Token::Else) { - Some(Box::new(StatementParser::new().parse(parser)?)) + Some(Box::new(StatementNode::new().parse(parser)?)) } else { None }; @@ -324,21 +438,22 @@ impl ParserCombinator for IfStatementParser { } } + /// Parser for switch statements -pub struct SwitchStatementParser; +pub struct SwitchStatementNode; -impl SwitchStatementParser { +impl SwitchStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for SwitchStatementParser { +impl ParserCombinator for SwitchStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Switch, "Expected 'switch'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'switch'")?; - let discriminant = Box::new(ExpressionParser::new().parse(parser)?); + let discriminant = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after switch expression")?; parser.assert_consume(&Token::LeftBrace, "Expected '{' to start switch body")?; @@ -350,7 +465,7 @@ impl ParserCombinator for SwitchStatementParser { while !p.check(&Token::RightBrace) && !p.is_at_end() { if p.consume(&Token::Case) { // Case clause - let test = Some(Box::new(ExpressionParser::new().parse(p)?)); + let test = Some(Box::new(ExpressionNode::new().parse(p)?)); p.assert_consume(&Token::Colon, "Expected ':' after case value")?; let mut consequent = Vec::new(); @@ -358,7 +473,7 @@ impl ParserCombinator for SwitchStatementParser { !p.check(&Token::Default) && !p.check(&Token::RightBrace) && !p.is_at_end() { - consequent.push(StatementParser::new().parse(p)?); + consequent.push(StatementNode::new().parse(p)?); } result.push(SwitchCase { test, consequent }); @@ -371,7 +486,7 @@ impl ParserCombinator for SwitchStatementParser { !p.check(&Token::Default) && !p.check(&Token::RightBrace) && !p.is_at_end() { - consequent.push(StatementParser::new().parse(p)?); + consequent.push(StatementNode::new().parse(p)?); } result.push(SwitchCase { test: None, consequent }); @@ -394,25 +509,32 @@ impl ParserCombinator for SwitchStatementParser { } /// Parser for while statements -pub struct WhileStatementParser; +pub struct WhileStatementNode; -impl WhileStatementParser { +impl WhileStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for WhileStatementParser { +impl ParserCombinator for WhileStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::While, "Expected 'while'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; - let test = Box::new(ExpressionParser::new().parse(parser)?); + let test = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; let body = parser.with_context(LexicalContext::LoopBody, |p| { - StatementParser::new().parse(p) + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } })?; Ok(WhileStatement { @@ -423,26 +545,33 @@ impl ParserCombinator for WhileStatementParser { } /// Parser for do-while statements -pub struct DoWhileStatementParser; +pub struct DoWhileStatementNode; -impl DoWhileStatementParser { +impl DoWhileStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for DoWhileStatementParser { +impl ParserCombinator for DoWhileStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Do, "Expected 'do'")?; let body = parser.with_context(LexicalContext::LoopBody, |p| { - StatementParser::new().parse(p) + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } })?; parser.assert_consume(&Token::While, "Expected 'while' after do block")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'while'")?; - let test = Box::new(ExpressionParser::new().parse(parser)?); + let test = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after while condition")?; parser.assert_consume(&Token::Semicolon, "Expected ';' after while condition")?; @@ -455,15 +584,15 @@ impl ParserCombinator for DoWhileStatementParser { } /// Parser for for statements -pub struct ForStatementParser; +pub struct ForStatementNode; -impl ForStatementParser { +impl ForStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ForStatementParser { +impl ParserCombinator for ForStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::For, "Expected 'for'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; @@ -473,12 +602,24 @@ impl ParserCombinator for ForStatementParser { None } else if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { // Variable declaration - let decl = VariableDeclarationParser::new().parse(parser)?; + let decl = VariableDeclarationNode::new().parse(parser)?; + + // Check if this is a for-in or for-of loop + if parser.check(&Token::In) || parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected ';' after for initialization")); + } + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; Some(ForInit::VariableDeclaration(decl)) } else { // Expression - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; + + // Check if this is a for-in or for-of loop + if parser.check(&Token::In) || parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected ';' after for initialization")); + } + parser.assert_consume(&Token::Semicolon, "Expected ';' after for initialization")?; Some(ForInit::Expression(Box::new(expr))) }; @@ -487,7 +628,7 @@ impl ParserCombinator for ForStatementParser { let test = if parser.consume(&Token::Semicolon) { None } else { - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; parser.assert_consume(&Token::Semicolon, "Expected ';' after for condition")?; Some(Box::new(expr)) }; @@ -496,13 +637,20 @@ impl ParserCombinator for ForStatementParser { let update = if parser.consume(&Token::RightParen) { None } else { - let expr = ExpressionParser::new().parse(parser)?; + let expr = ExpressionNode::new().parse(parser)?; parser.assert_consume(&Token::RightParen, "Expected ')' after for clauses")?; Some(Box::new(expr)) }; let body = parser.with_context(LexicalContext::LoopBody, |p| { - StatementParser::new().parse(p) + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } })?; Ok(ForStatement { @@ -514,16 +662,17 @@ impl ParserCombinator for ForStatementParser { } } + /// Parser for for-in statements -pub struct ForInStatementParser; +pub struct ForInStatementNode; -impl ForInStatementParser { +impl ForInStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ForInStatementParser { +impl ParserCombinator for ForInStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::For, "Expected 'for'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'for'")?; @@ -531,24 +680,36 @@ impl ParserCombinator for ForInStatementParser { // Parse left side (variable declaration or pattern) let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { // Variable declaration - let decl = VariableDeclarationParser::new().parse(parser)?; + let decl = VariableDeclarationNode::new().parse(parser)?; ForInOf::VariableDeclaration(decl) } else { // Pattern - let pattern = PatternParser::new().parse(parser)?; + let pattern = PatternNode::new().parse(parser)?; ForInOf::Pattern(pattern) }; + // Check for 'in' keyword - fail early if not found + if !parser.check(&Token::In) { + return Err(parser.error_at_current("Expected 'in' in for-in statement")); + } + // Expect 'in' keyword parser.assert_consume(&Token::In, "Expected 'in' in for-in statement")?; // Parse right side (expression) - let right = Box::new(ExpressionParser::new().parse(parser)?); + let right = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after for-in clauses")?; let body = parser.with_context(LexicalContext::LoopBody, |p| { - StatementParser::new().parse(p) + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } })?; Ok(ForInStatement { @@ -559,16 +720,78 @@ impl ParserCombinator for ForInStatementParser { } } +impl UnparserCombinator for ForInStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForInStatement) { + unparser.write_str("for"); + unparser.space(); + unparser.write_char('('); + + match &node.left { + ForInOf::VariableDeclaration(decl) => { + // Special handling for variable declarations in for-in loops + // Write the variable kind (var, let, const) + match decl.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations without semicolon + if !decl.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &decl.declarations[0]); + + // Remaining declarations + for d in &decl.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, d); + } + } + // No semicolon here! + }, + ForInOf::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + } + } + + unparser.write_char(' '); + unparser.write_str("in"); + unparser.write_char(' '); + + ExpressionNode::new().unparse(unparser, &node.right); + + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + /// Parser for for-of statements -pub struct ForOfStatementParser; +pub struct ForOfStatementNode; -impl ForOfStatementParser { +impl ForOfStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ForOfStatementParser { +/// Parser for for-of statements +impl ParserCombinator for ForOfStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::For, "Expected 'for'")?; @@ -580,24 +803,37 @@ impl ParserCombinator for ForOfStatementParser { // Parse left side (variable declaration or pattern) let left = if parser.check(&Token::Var) || parser.check(&Token::Let) || parser.check(&Token::Const) { // Variable declaration - let decl = VariableDeclarationParser::new().parse(parser)?; + let decl = VariableDeclarationNode::new().parse(parser)?; ForInOf::VariableDeclaration(decl) } else { // Pattern - let pattern = PatternParser::new().parse(parser)?; + let pattern = PatternNode::new().parse(parser)?; ForInOf::Pattern(pattern) }; + // Check for 'of' keyword - fail early if not found + if !parser.check(&Token::Of) { + return Err(parser.error_at_current("Expected 'of' in for-of statement")); + } + // Expect 'of' keyword parser.assert_consume(&Token::Of, "Expected 'of' in for-of statement")?; // Parse right side (expression) - let right = Box::new(ExpressionParser::new().parse(parser)?); + let right = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after for-of clauses")?; + // Parse the body with special handling for block statements let body = parser.with_context(LexicalContext::LoopBody, |p| { - StatementParser::new().parse(p) + // Check if the body starts with a left brace + if p.check(&Token::LeftBrace) { + // Force parsing as a block statement + BlockStatementNode::new().parse(p).map(Statement::BlockStatement) + } else { + // For other statement types, use the general statement parser + StatementNode::new().parse(p) + } })?; Ok(ForOfStatement { @@ -609,16 +845,84 @@ impl ParserCombinator for ForOfStatementParser { } } + +impl UnparserCombinator for ForOfStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForOfStatement) { + unparser.write_str("for"); + + if node.await_token { + unparser.write_char(' '); + unparser.write_str("await"); + } + + unparser.space(); + unparser.write_char('('); + + match &node.left { + ForInOf::VariableDeclaration(decl) => { + // Special handling for variable declarations in for-of loops + // Write the variable kind (var, let, const) + match decl.kind { + VariableKind::Var => unparser.write_str("var"), + VariableKind::Let => unparser.write_str("let"), + VariableKind::Const => unparser.write_str("const"), + } + + unparser.write_char(' '); + + // Write the declarations without semicolon + if !decl.declarations.is_empty() { + // First declaration + VariableDeclaratorNode::new().unparse(unparser, &decl.declarations[0]); + + // Remaining declarations + for d in &decl.declarations[1..] { + unparser.write_char(','); + unparser.space(); + VariableDeclaratorNode::new().unparse(unparser, d); + } + } + // No semicolon here! + }, + ForInOf::Pattern(pattern) => { + PatternNode::new().unparse(unparser, pattern); + } + } + + unparser.write_char(' '); + unparser.write_str("of"); + unparser.write_char(' '); + + ExpressionNode::new().unparse(unparser, &node.right); + + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + /// Parser for break statements -pub struct BreakStatementParser; +pub struct BreakStatementNode; -impl BreakStatementParser { +impl BreakStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for BreakStatementParser { +impl ParserCombinator for BreakStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Break, "Expected 'break'")?; @@ -629,7 +933,7 @@ impl ParserCombinator for BreakStatementParser { // Check for label let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; @@ -642,15 +946,15 @@ impl ParserCombinator for BreakStatementParser { } /// Parser for continue statements -pub struct ContinueStatementParser; +pub struct ContinueStatementNode; -impl ContinueStatementParser { +impl ContinueStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ContinueStatementParser { +impl ParserCombinator for ContinueStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Continue, "Expected 'continue'")?; @@ -661,7 +965,7 @@ impl ParserCombinator for ContinueStatementParser { // Check for label let label = if !parser.previous_line_terminator() && matches!(parser.peek(), Token::Identifier(_)) { - Some(IdentifierParser::new().parse(parser)?) + Some(IdentifierNode::new().parse(parser)?) } else { None }; @@ -673,16 +977,29 @@ impl ParserCombinator for ContinueStatementParser { } } +impl UnparserCombinator for ContinueStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ContinueStatement) { + unparser.write_str("continue"); + + if let Some(label) = &node.label { + unparser.space(); + unparser.write_str(&label.name); + } + + unparser.write_char(';'); + } +} + /// Parser for return statements -pub struct ReturnStatementParser; +pub struct ReturnStatementNode; -impl ReturnStatementParser { +impl ReturnStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ReturnStatementParser { +impl ParserCombinator for ReturnStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Return, "Expected 'return'")?; @@ -698,7 +1015,7 @@ impl ParserCombinator for ReturnStatementParser { parser.is_at_end() { None } else { - Some(Box::new(ExpressionParser::new().parse(parser)?)) + Some(Box::new(ExpressionNode::new().parse(parser)?)) }; // Consume semicolon if present @@ -708,25 +1025,38 @@ impl ParserCombinator for ReturnStatementParser { } } +impl UnparserCombinator for ReturnStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ReturnStatement) { + unparser.write_str("return"); + + if let Some(argument) = &node.argument { + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, argument); + } + + unparser.write_char(';'); + } +} + /// Parser for with statements -pub struct WithStatementParser; +pub struct WithStatementNode; -impl WithStatementParser { +impl WithStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for WithStatementParser { +impl ParserCombinator for WithStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::With, "Expected 'with'")?; parser.assert_consume(&Token::LeftParen, "Expected '(' after 'with'")?; - let object = Box::new(ExpressionParser::new().parse(parser)?); + let object = Box::new(ExpressionNode::new().parse(parser)?); parser.assert_consume(&Token::RightParen, "Expected ')' after with object")?; - let body = Box::new(StatementParser::new().parse(parser)?); + let body = Box::new(StatementNode::new().parse(parser)?); Ok(WithStatement { object, @@ -735,58 +1065,103 @@ impl ParserCombinator for WithStatementParser { } } +impl UnparserCombinator for WithStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &WithStatement) { + unparser.write_str("with"); + unparser.write_char(' '); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.object); + unparser.write_char(')'); + + match &*node.body { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + /// Parser for throw statements -pub struct ThrowStatementParser; +pub struct ThrowStatementNode; -impl ThrowStatementParser { +impl ThrowStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ThrowStatementParser { +impl ParserCombinator for ThrowStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Throw, "Expected 'throw'")?; - // Line terminator not allowed between throw and expression if parser.previous_line_terminator() { return Err(parser.error_at_current("Line terminator not allowed after 'throw'")); } - let argument = Box::new(ExpressionParser::new().parse(parser)?); - - // Consume semicolon if present + let argument = Box::new(ExpressionNode::new().parse(parser)?); + parser.consume(&Token::Semicolon); Ok(ThrowStatement { argument }) } } +impl UnparserCombinator for ThrowStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ThrowStatement) { + unparser.write_str("throw"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, &node.argument); + unparser.write_char(';'); + } +} + /// Parser for try statements -pub struct TryStatementParser; +pub struct TryStatementNode; -impl TryStatementParser { +impl TryStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for TryStatementParser { +impl ParserCombinator for TryStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::Try, "Expected 'try'")?; - let block = BlockStatementParser::new().parse(parser)?; + let block = BlockStatementNode::new().parse(parser)?; // Parse catch clause if present let handler = if parser.consume(&Token::Catch) { - Some(self.parse_catch_clause(parser)?) + // Parse parameter if present + let param = if parser.consume(&Token::LeftParen) { + let pattern = PatternNode::new().parse(parser)?; + parser.assert_consume(&Token::RightParen, "Expected ')' after catch parameter")?; + Some(pattern) + } else { + None + }; + + let body = BlockStatementNode::new().parse(parser)?; + + Some(CatchClause { + param, + body, + }) + } else { None }; // Parse finally clause if present let finalizer = if parser.consume(&Token::Finally) { - Some(BlockStatementParser::new().parse(parser)?) + Some(BlockStatementNode::new().parse(parser)?) } else { None }; @@ -804,47 +1179,58 @@ impl ParserCombinator for TryStatementParser { } } -impl TryStatementParser { - fn parse_catch_clause(&self, parser: &mut Parser) -> ParseResult { - // The 'catch' keyword has already been consumed - - // Parse parameter if present - let param = if parser.consume(&Token::LeftParen) { - let pattern = PatternParser::new().parse(parser)?; - parser.assert_consume(&Token::RightParen, "Expected ')' after catch parameter")?; - Some(pattern) - } else { - None - }; - - let body = BlockStatementParser::new().parse(parser)?; +impl UnparserCombinator for TryStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &TryStatement) { + unparser.write_str("try"); + unparser.space(); + BlockStatementNode::new().unparse(unparser, &node.block); + + // Handle catch clause if present + if let Some(handler) = &node.handler { + unparser.space(); + unparser.write_str("catch"); + + // Handle catch parameter if present + if let Some(param) = &handler.param { + unparser.space(); + unparser.write_char('('); + PatternNode::new().unparse(unparser, param); + unparser.write_char(')'); + } + + unparser.space(); + BlockStatementNode::new().unparse(unparser, &handler.body); + } - Ok(CatchClause { - param, - body, - }) + // Handle finally clause if present + if let Some(finalizer) = &node.finalizer { + unparser.space(); + unparser.write_str("finally"); + unparser.space(); + BlockStatementNode::new().unparse(unparser, finalizer); + } } } /// Parser for labeled statements -pub struct LabeledStatementParser; +pub struct LabeledStatementNode; -impl LabeledStatementParser { +impl LabeledStatementNode { pub fn new() -> Self { Self } } -impl ParserCombinator for LabeledStatementParser { +impl ParserCombinator for LabeledStatementNode { fn parse(&self, parser: &mut Parser) -> ParseResult { - let label = IdentifierParser::new().parse(parser)?; + let label = IdentifierNode::new().parse(parser)?; parser.assert_consume(&Token::Colon, "Expected ':' after label")?; // Add label to context //parser.add_label(label.name.clone()); - let body = Box::new(StatementParser::new().parse(parser)?); + let body = Box::new(StatementNode::new().parse(parser)?); // Remove label from context //parser.remove_label(&label.name); @@ -855,3 +1241,270 @@ impl ParserCombinator for LabeledStatementParser { }) } } + +impl UnparserCombinator for LabeledStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &LabeledStatement) { + unparser.write_str(&node.label.name); + unparser.write_char(':'); + unparser.space(); + StatementNode::new().unparse(unparser, &node.body); + } +} + +// Main statement unparser +impl UnparserCombinator for StatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &Statement) { + match node { + Statement::BlockStatement(stmt) => BlockStatementNode::new().unparse(unparser, stmt), + Statement::BreakStatement(stmt) => BreakStatementNode::new().unparse(unparser, stmt), + Statement::ContinueStatement(stmt) => ContinueStatementNode::new().unparse(unparser, stmt), + Statement::DebuggerStatement => { + // TODO its own unparser + unparser.write_str("debugger"); + unparser.write_char(';'); + }, + Statement::DoWhileStatement(stmt) => DoWhileStatementNode::new().unparse(unparser, stmt), + //Statement::EmptyStatement => unparser.write_char(';'), + Statement::EmptyStatement => {}, + Statement::ExpressionStatement(stmt) => ExpressionStatementNode::new().unparse(unparser, stmt), + Statement::ForStatement(stmt) => ForStatementNode::new().unparse(unparser, stmt), + Statement::ForInStatement(stmt) => ForInStatementNode::new().unparse(unparser, stmt), + Statement::ForOfStatement(stmt) => ForOfStatementNode::new().unparse(unparser, stmt), + Statement::IfStatement(stmt) => IfStatementNode::new().unparse(unparser, stmt), + Statement::LabeledStatement(stmt) => LabeledStatementNode::new().unparse(unparser, stmt), + Statement::ReturnStatement(stmt) => ReturnStatementNode::new().unparse(unparser, stmt), + Statement::SwitchStatement(stmt) => SwitchStatementNode::new().unparse(unparser, stmt), + Statement::ThrowStatement(stmt) => ThrowStatementNode::new().unparse(unparser, stmt), + Statement::TryStatement(stmt) => TryStatementNode::new().unparse(unparser, stmt), + Statement::WhileStatement(stmt) => WhileStatementNode::new().unparse(unparser, stmt), + Statement::WithStatement(stmt) => WithStatementNode::new().unparse(unparser, stmt), + Statement::Declaration(decl) => { + match decl { + Declaration::ClassDeclaration(decl) => ClassDeclarationNode::new().unparse(unparser, decl), + Declaration::FunctionDeclaration(decl) => FunctionDeclarationParser::new().unparse(unparser, decl), + Declaration::VariableDeclaration(decl) => VariableDeclarationNode::new().unparse(unparser, decl), + Declaration::ImportDeclaration(decl) => ImportDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportNamedDeclaration(decl) => ExportNamedDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportDefaultDeclaration(decl) => ExportDefaultDeclarationParser::new().unparse(unparser, decl), + Declaration::ExportAllDeclaration(decl) => ExportAllDeclarationParser::new().unparse(unparser, decl), + } + } + } + } +} + +// Expression statement unparser +impl UnparserCombinator for ExpressionStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ExpressionStatement) { + ExpressionNode::new().unparse(unparser, &node.expression); + unparser.write_char(';'); + } +} + +// If statement unparser +impl UnparserCombinator for IfStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &IfStatement) { + unparser.write_str("if"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_char(')'); + + // Handle consequent + match &*node.consequent { + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.consequent); + }); + } + } + + // Handle alternate (else branch) + if let Some(alt) = &node.alternate { + unparser.space(); + unparser.write_str("else"); + + match &**alt { + Statement::IfStatement(_) => { + // For else if, keep on same line + unparser.space(); + StatementNode::new().unparse(unparser, alt); + }, + Statement::BlockStatement(block) => { + unparser.space(); + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, alt); + }); + } + } + } + } +} + +// Switch statement unparser +impl UnparserCombinator for SwitchStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &SwitchStatement) { + unparser.write_str("switch"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.discriminant); + unparser.write_char(')'); + unparser.space(); + unparser.write_char('{'); + unparser.newline(); + + for case in &node.cases { + if let Some(test) = &case.test { + unparser.write_str("case"); + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, test); + unparser.write_char(':'); + } else { + unparser.write_str("default:"); + } + + if !case.consequent.is_empty() { + unparser.newline(); + + unparser.with_indent(|u| { + for stmt in &case.consequent { + StatementNode::new().unparse(u, stmt); + u.newline(); + } + }); + } else { + unparser.newline(); + } + } + + unparser.write_char('}'); + } +} + +// While statement unparser +impl UnparserCombinator for WhileStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &WhileStatement) { + unparser.write_str("while"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_char(')'); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + +// Do-while statement unparser +impl UnparserCombinator for DoWhileStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &DoWhileStatement) { + unparser.write_str("do"); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + unparser.newline(); + } + } + + unparser.space(); + unparser.write_str("while"); + unparser.space(); + unparser.write_char('('); + ExpressionNode::new().unparse(unparser, &node.test); + unparser.write_str(");"); + } +} + +// For statement unparser +impl UnparserCombinator for ForStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &ForStatement) { + unparser.write_str("for"); + unparser.space(); + unparser.write_char('('); + + // Initialization + if let Some(init) = &node.init { + match init { + ForInit::VariableDeclaration(decl) => { + VariableDeclarationNode::new().unparse(unparser, decl); + }, + ForInit::Expression(expr) => { + ExpressionNode::new().unparse(unparser, expr); + unparser.write_char(';'); + } + } + } else { + unparser.write_char(';'); + } + + // Test condition + unparser.space(); + if let Some(test) = &node.test { + ExpressionNode::new().unparse(unparser, test); + } + unparser.write_char(';'); + + // Update expression + unparser.space(); + if let Some(update) = &node.update { + ExpressionNode::new().unparse(unparser, update); + } + + unparser.write_char(')'); + unparser.space(); + + match &*node.body { + Statement::BlockStatement(block) => { + BlockStatementNode::new().unparse(unparser, block); + }, + _ => { + unparser.newline(); + unparser.with_indent(|u| { + StatementNode::new().unparse(u, &node.body); + }); + } + } + } +} + + +// Break statement unparser +impl UnparserCombinator for BreakStatementNode { + fn unparse(&self, unparser: &mut Unparser, node: &BreakStatement) { + unparser.write_str("break"); + + if let Some(label) = &node.label { + unparser.space(); + unparser.write_str(&label.name); + } + + unparser.write_char(';'); + } +} diff --git a/src/grammar/this.rs b/src/grammar/this.rs index cfb0628..e347eeb 100644 --- a/src/grammar/this.rs +++ b/src/grammar/this.rs @@ -1,18 +1,25 @@ use crate::ast::*; use crate::lexer::*; use crate::parser::*; +use crate::unparser::*; -pub struct ThisExpressionParser; +pub struct ThisExpressionNode; -impl ThisExpressionParser { +impl ThisExpressionNode { pub fn new() -> Self { Self } } -impl ParserCombinator for ThisExpressionParser { +impl ParserCombinator for ThisExpressionNode { fn parse(&self, parser: &mut Parser) -> ParseResult { parser.assert_consume(&Token::This, "Expected 'this'")?; Ok(ThisExpression {}) } -} \ No newline at end of file +} + +impl UnparserCombinator for ThisExpressionNode { + fn unparse(&self, unparser: &mut Unparser, _node: &ThisExpression) { + unparser.write_str("this"); + } +} diff --git a/src/grammar/yield_expression.rs b/src/grammar/yield_expression.rs new file mode 100644 index 0000000..48b6a32 --- /dev/null +++ b/src/grammar/yield_expression.rs @@ -0,0 +1,52 @@ +use crate::ast::*; +use crate::lexer::*; +use crate::parser::*; +use crate::unparser::*; +use super::expression::*; + +pub struct YieldExpressionNode; + +impl YieldExpressionNode { + pub fn new() -> Self { + Self + } +} + +impl ParserCombinator for YieldExpressionNode { + fn parse(&self, parser: &mut Parser) -> ParseResult { + if !parser.allows_yield() { + return Err(parser.error_at_current("'yield' expressions are only allowed within generator functions")); + } + + parser.assert_consume(&Token::Yield, "Expected 'yield'")?; + + let delegate = parser.consume(&Token::Star); + + let argument = if parser.check(&Token::Semicolon) || parser.is_at_end() || + parser.check(&Token::RightBrace) || parser.check(&Token::Comma) || + parser.check(&Token::RightParen) || parser.check(&Token::RightBracket) || + parser.check(&Token::Colon) || parser.previous_line_terminator() { + None + } else { + Some(Box::new(ExpressionNode::new().parse(parser)?)) + }; + + Ok(YieldExpression { + argument, + delegate, + }) + } +} + +impl UnparserCombinator for YieldExpressionNode { + fn unparse(&self, unparser: &mut Unparser, node: &YieldExpression) { + unparser.write_str("yield"); + if node.delegate { + unparser.write_char('*'); + } + if let Some(argument) = &node.argument { + unparser.write_char(' '); + ExpressionNode::new().unparse(unparser, argument); + } + } +} \ No newline at end of file diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 810cca1..10c8f6d 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -63,7 +63,8 @@ impl<'a> Lexer<'a> { } else if b >= 128 { // Found a non-ASCII byte is_all_ascii = false; - // Process it with the regular advance method + + // Use advance() which properly handles UTF-8 characters self.advance(); continue; } @@ -71,121 +72,125 @@ impl<'a> Lexer<'a> { // If we reach here, either we're at the end or the next character // is not an identifier character - if !self.is_at_end() && self.is_alphanumeric(self.peek()) { - let c = self.advance(); - // Check if we just processed a non-ASCII character - if !c.is_ascii() { - is_all_ascii = false; + if !self.is_at_end() { + let c = self.peek(); + if self.is_alphanumeric(c) { + self.advance(); + if !c.is_ascii() { + is_all_ascii = false; + } + continue; } - } else { - break; } + + // Not a valid identifier character or end of source + break; } // Calculate the length of the identifier let length = self.current - self.start; // Only check for keywords if the identifier is within the length range of keywords - // and is all ASCII (since all keywords are ASCII) - let token_type = if is_all_ascii && length >= 2 && length <= 10 { - // For ASCII identifiers, we can do direct byte comparisons - let bytes = &self.bytes[self.start..self.current]; - - // First check by length for faster matching - match bytes.len() { - 2 => match bytes { - b"do" => Token::Do, - b"if" => Token::If, - b"in" => Token::In, - b"of" => Token::Of, - b"as" => Token::As, - _ => self.create_identifier_token(), - }, - 3 => match bytes { - b"for" => Token::For, - b"let" => Token::Let, - b"new" => Token::New, - b"try" => Token::Try, - b"var" => Token::Var, - b"get" => Token::Get, - b"set" => Token::Set, - _ => self.create_identifier_token(), - }, - 4 => match bytes { - b"case" => Token::Case, - b"else" => Token::Else, - b"enum" => Token::Enum, - b"from" => Token::From, - b"null" => Token::Null, - b"this" => Token::This, - b"true" => Token::True, - b"void" => Token::Void, - b"with" => Token::With, - b"eval" => Token::Eval, - _ => self.create_identifier_token(), - }, - 5 => match bytes { - b"async" => Token::Async, - b"await" => Token::Await, - b"break" => Token::Break, - b"catch" => Token::Catch, - b"class" => Token::Class, - b"const" => Token::Const, - b"false" => Token::False, - b"super" => Token::Super, - b"throw" => Token::Throw, - b"while" => Token::While, - b"yield" => Token::Yield, - _ => self.create_identifier_token(), - }, - 6 => match bytes { - b"delete" => Token::Delete, - b"export" => Token::Export, - b"import" => Token::Import, - b"public" => Token::Public, - b"return" => Token::Return, - b"static" => Token::Static, - b"switch" => Token::Switch, - b"target" => Token::Target, - b"typeof" => Token::Typeof, - _ => self.create_identifier_token(), - }, - 7 => match bytes { - b"default" => Token::Default, - b"extends" => Token::Extends, - b"finally" => Token::Finally, - b"package" => Token::Package, - b"private" => Token::Private, - _ => self.create_identifier_token(), - }, - 8 => match bytes { - b"continue" => Token::Continue, - b"debugger" => Token::Debugger, - b"function" => Token::Function, - _ => self.create_identifier_token(), - }, - 9 => match bytes { - b"arguments" => Token::Arguments, - b"interface" => Token::Interface, - b"protected" => Token::Protected, - b"undefined" => Token::Undefined, - _ => self.create_identifier_token(), - }, - 10 => match bytes { - b"instanceof" => Token::InstanceOf, - b"implements" => Token::Implements, - b"constructor" => Token::Constructor, + // and is all ASCII (since all keywords are ASCII) + let token_type = if is_all_ascii && length >= 2 && length <= 10 { + // For ASCII identifiers, we can do direct byte comparisons + let bytes = &self.bytes[self.start..self.current]; + + // First check by length for faster matching + match bytes.len() { + 2 => match bytes { + b"do" => Token::Do, + b"if" => Token::If, + b"in" => Token::In, + b"of" => Token::Of, + b"as" => Token::As, + _ => self.create_identifier_token(), + }, + 3 => match bytes { + b"for" => Token::For, + b"let" => Token::Let, + b"new" => Token::New, + b"try" => Token::Try, + b"var" => Token::Var, + b"get" => Token::Get, + b"set" => Token::Set, + _ => self.create_identifier_token(), + }, + 4 => match bytes { + b"case" => Token::Case, + b"else" => Token::Else, + b"enum" => Token::Enum, + b"from" => Token::From, + b"null" => Token::Null, + b"this" => Token::This, + b"true" => Token::True, + b"void" => Token::Void, + b"with" => Token::With, + b"eval" => Token::Eval, + _ => self.create_identifier_token(), + }, + 5 => match bytes { + b"async" => Token::Async, + b"await" => Token::Await, + b"break" => Token::Break, + b"catch" => Token::Catch, + b"class" => Token::Class, + b"const" => Token::Const, + b"false" => Token::False, + b"super" => Token::Super, + b"throw" => Token::Throw, + b"while" => Token::While, + b"yield" => Token::Yield, + _ => self.create_identifier_token(), + }, + 6 => match bytes { + b"delete" => Token::Delete, + b"export" => Token::Export, + b"import" => Token::Import, + b"public" => Token::Public, + b"return" => Token::Return, + b"static" => Token::Static, + b"switch" => Token::Switch, + b"target" => Token::Target, + b"typeof" => Token::Typeof, + _ => self.create_identifier_token(), + }, + 7 => match bytes { + b"default" => Token::Default, + b"extends" => Token::Extends, + b"finally" => Token::Finally, + b"package" => Token::Package, + b"private" => Token::Private, + _ => self.create_identifier_token(), + }, + 8 => match bytes { + b"continue" => Token::Continue, + b"debugger" => Token::Debugger, + b"function" => Token::Function, + _ => self.create_identifier_token(), + }, + 9 => match bytes { + b"arguments" => Token::Arguments, + b"interface" => Token::Interface, + b"protected" => Token::Protected, + b"undefined" => Token::Undefined, + _ => self.create_identifier_token(), + }, + 10 => match bytes { + b"instanceof" => Token::InstanceOf, + b"implements" => Token::Implements, + b"constructor" => Token::Constructor, + _ => self.create_identifier_token(), + }, _ => self.create_identifier_token(), - }, - _ => self.create_identifier_token(), - } - } else { - // For non-ASCII identifiers or identifiers with lengths outside keyword range - self.create_identifier_token() - }; - - // Add the token - emit_token!(self, token_type); + } + } else { + // For non-ASCII identifiers or identifiers with lengths outside keyword range + self.create_identifier_token() + }; + + // Add the token + emit_token!(self, token_type); } // Helper method to create an identifier token @@ -496,76 +501,82 @@ impl<'a> Lexer<'a> { /// Parses a regular expression literal fn regexp(&mut self) -> Result<(), LexerError> { - let start_column = self.column - 1; - let mut pattern = String::with_capacity(16); + let start_column = self.column - 1; + let mut pattern = String::with_capacity(16); + let mut in_character_class = false; + + // Parse the pattern + while !self.is_at_end() && (in_character_class || self.peek() != '/') { + let c = self.peek(); - // Parse the pattern - while !self.is_at_end() && self.peek() != '/' { - if self.peek() == '\\' { - pattern.push(self.advance()); // Add the escape character - - if self.is_at_end() { - return Err(LexerError::new( - "Unterminated regular expression: escape sequence not completed", - self.line, - start_column - )); - } - - // Add the escaped character (whatever it is) - pattern.push(self.advance()); - } else if self.peek() == '\n' { + if c == '[' && !in_character_class { + // Start of character class + in_character_class = true; + pattern.push(self.advance()); + } else if c == ']' && in_character_class { + // End of character class + in_character_class = false; + pattern.push(self.advance()); + } else if c == '\\' { + // Handle escape sequences + pattern.push(self.advance()); // Add the backslash + + if self.is_at_end() { return Err(LexerError::new( - "Unterminated regular expression: newline in pattern", + "Unterminated regular expression: escape sequence not completed", self.line, start_column )); - } else { - pattern.push(self.advance()); } - } - - if self.is_at_end() { + + // Add the escaped character (whatever it is) + pattern.push(self.advance()); + } else if c == '\n' { return Err(LexerError::new( - "Unterminated regular expression", + "Unterminated regular expression: newline in pattern", self.line, start_column )); + } else { + pattern.push(self.advance()); } - - // Consume the closing slash - self.advance(); - - // Parse flags - let mut flags = String::with_capacity(4); - while !self.is_at_end() && self.is_regexp_flag(self.peek()) { - flags.push(self.advance()); - } - - // Validate flags (no duplicates, only valid flags) - let mut seen_flags = HashSet::with_capacity(flags.len()); - for flag in flags.chars() { - if !seen_flags.insert(flag) { - return Err(LexerError::new( - &format!("Duplicate flag '{}' in regular expression", flag), - self.line, - self.column - 1 - )); - } - - if !matches!(flag, 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd') { - return Err(LexerError::new( - &format!("Invalid regular expression flag '{}'", flag), - self.line, - self.column - 1 - )); - } + } + + if self.is_at_end() { + return Err(LexerError::new( + "Unterminated regular expression", + self.line, + start_column + )); + } + + // Consume the closing slash + self.advance(); + + // Parse flags + let mut flags = String::new(); + while !self.is_at_end() && self.is_regexp_flag(self.peek()) { + flags.push(self.advance()); + } + + // Validate flags (no duplicates) + let mut seen_flags = HashSet::new(); + for flag in flags.chars() { + if !seen_flags.insert(flag) { + return Err(LexerError::new( + &format!("Duplicate flag '{}' in regular expression", flag), + self.line, + self.column - 1 + )); } - - emit_token!(self, Token::RegExpLiteral(pattern, flags)); - - Ok(()) } + + // Emit the token + emit_token!(self, Token::RegExpLiteral(pattern, flags)); + + Ok(()) +} + #[inline(always)] fn is_regexp_flag(&self, c: char) -> bool { @@ -1257,7 +1268,16 @@ impl<'a> Lexer<'a> { #[inline(always)] fn is_alpha(&self, c: char) -> bool { - (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$' + // Include $ character which is valid in JavaScript identifiers + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_' || + c == '$' || + // For non-ASCII characters, use a simple heuristic + // This covers most Unicode letters that would be valid in JS identifiers + (c > '\x7F' && !c.is_whitespace() && !c.is_control()) || + // Zero-width characters allowed in JS identifiers + c == '\u{200C}' || c == '\u{200D}' } #[inline(always)] diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 3198cfc..f0986db 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -227,18 +227,140 @@ impl Token { pub fn to_string(&self) -> String { match self { + // Literals Token::Identifier(name) => name.clone(), Token::StringLiteral(s) => format!("\"{}\"", s), Token::NumberLiteral(n) => n.to_string(), Token::BigIntLiteral(b) => format!("{}n", b), Token::RegExpLiteral(pattern, flags) => format!("/{}/{}", pattern, flags), Token::TemplateLiteral(_) => "`...`".to_string(), - _ => match self.keyword_text() { - Some(text) => text.to_string(), - None => format!("{:?}", self), - }, + + // Boolean literals and null + Token::True => "true".to_string(), + Token::False => "false".to_string(), + Token::Null => "null".to_string(), + Token::Undefined => "undefined".to_string(), + + // Keywords + Token::Var => "var".to_string(), + Token::Let => "let".to_string(), + Token::With => "with".to_string(), + Token::Const => "const".to_string(), + Token::Function => "function".to_string(), + Token::Return => "return".to_string(), + Token::If => "if".to_string(), + Token::Else => "else".to_string(), + Token::While => "while".to_string(), + Token::For => "for".to_string(), + Token::Break => "break".to_string(), + Token::Continue => "continue".to_string(), + Token::This => "this".to_string(), + Token::Super => "super".to_string(), + Token::New => "new".to_string(), + Token::Delete => "delete".to_string(), + Token::Typeof => "typeof".to_string(), + Token::Void => "void".to_string(), + Token::In => "in".to_string(), + Token::InstanceOf => "instanceof".to_string(), + Token::Try => "try".to_string(), + Token::Catch => "catch".to_string(), + Token::Finally => "finally".to_string(), + Token::Throw => "throw".to_string(), + Token::Switch => "switch".to_string(), + Token::Case => "case".to_string(), + Token::Default => "default".to_string(), + Token::Await => "await".to_string(), + Token::Async => "async".to_string(), + Token::Do => "do".to_string(), + Token::Enum => "enum".to_string(), + Token::Of => "of".to_string(), + Token::Target => "target".to_string(), + Token::Implements => "implements".to_string(), + Token::Interface => "interface".to_string(), + Token::Package => "package".to_string(), + Token::Private => "private".to_string(), + Token::Protected => "protected".to_string(), + Token::Public => "public".to_string(), + Token::Arguments => "arguments".to_string(), + Token::Eval => "eval".to_string(), + Token::Debugger => "debugger".to_string(), + Token::Class => "class".to_string(), + Token::Extends => "extends".to_string(), + Token::Constructor => "constructor".to_string(), + Token::Static => "static".to_string(), + Token::Get => "get".to_string(), + Token::Set => "set".to_string(), + Token::Yield => "yield".to_string(), + Token::Import => "import".to_string(), + Token::Export => "export".to_string(), + Token::From => "from".to_string(), + Token::As => "as".to_string(), + + // Characters and operators + Token::LeftParen => "(".to_string(), + Token::RightParen => ")".to_string(), + Token::LeftBrace => "{".to_string(), + Token::RightBrace => "}".to_string(), + Token::LeftBracket => "[".to_string(), + Token::RightBracket => "]".to_string(), + Token::Comma => ",".to_string(), + Token::Dot => ".".to_string(), + Token::Semicolon => ";".to_string(), + Token::Colon => ":".to_string(), + Token::Question => "?".to_string(), + Token::Arrow => "=>".to_string(), + Token::Hash => "#".to_string(), + Token::Plus => "+".to_string(), + Token::PlusPlus => "++".to_string(), + Token::PlusEqual => "+=".to_string(), + Token::Minus => "-".to_string(), + Token::MinusMinus => "--".to_string(), + Token::MinusEqual => "-=".to_string(), + Token::Star => "*".to_string(), + Token::StarStar => "**".to_string(), + Token::StarEqual => "*=".to_string(), + Token::StarStarEqual => "**=".to_string(), + Token::Slash => "/".to_string(), + Token::SlashEqual => "/=".to_string(), + Token::Percent => "%".to_string(), + Token::PercentEqual => "%=".to_string(), + Token::Equal => "=".to_string(), + Token::EqualEqual => "==".to_string(), + Token::EqualEqualEqual => "===".to_string(), + Token::Bang => "!".to_string(), + Token::BangEqual => "!=".to_string(), + Token::BangEqualEqual => "!==".to_string(), + Token::Greater => ">".to_string(), + Token::GreaterEqual => ">=".to_string(), + Token::GreaterGreater => ">>".to_string(), + Token::GreaterGreaterEqual => ">>=".to_string(), + Token::GreaterGreaterGreater => ">>>".to_string(), + Token::GreaterGreaterGreaterEqual => ">>>=".to_string(), + Token::Less => "<".to_string(), + Token::LessEqual => "<=".to_string(), + Token::LessLess => "<<".to_string(), + Token::LessLessEqual => "<<=".to_string(), + Token::Ampersand => "&".to_string(), + Token::AmpersandEqual => "&=".to_string(), + Token::AmpersandAmpersand => "&&".to_string(), + Token::AmpersandAmpersandEqual => "&&=".to_string(), + Token::Pipe => "|".to_string(), + Token::PipeEqual => "|=".to_string(), + Token::PipePipe => "||".to_string(), + Token::PipePipeEqual => "||=".to_string(), + Token::Caret => "^".to_string(), + Token::CaretEqual => "^=".to_string(), + Token::Tilde => "~".to_string(), + Token::Ellipsis => "...".to_string(), + Token::QuestionQuestion => "??".to_string(), + Token::QuestionQuestionEqual => "??=".to_string(), + Token::QuestionDot => "?.".to_string(), + + // Sentinel + Token::EOS => "".to_string(), } } + } #[derive(Debug, Clone, PartialEq)] diff --git a/src/main.rs b/src/main.rs index ceb9574..34cb7c5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod ast; mod lexer; mod parser; +mod unparser; mod grammar; use std::path::PathBuf; @@ -10,7 +11,7 @@ use std::process; use lexer::Lexer; use parser::Parser; - +use unparser::{Unparser, FormatStyle}; fn main() { let args: Vec = env::args().collect(); @@ -24,7 +25,7 @@ fn main() { match run::(entry_file) { Ok(_) => { - println!("Successfully parsed {}", entry_file); + println!("Successfully parsed and unparsed {}", entry_file); }, Err(error) => { eprintln!("Error: {}", error); @@ -34,20 +35,35 @@ fn main() { } fn run(file: &str) -> Result<(), Box> where str: AsRef { - let path = PathBuf::from(file); - - let source = fs::read_to_string(path)?; + let source = fs::read_to_string(path)?; let mut lexer = Lexer::new(&source); let tokens = lexer.scan_tokens()?; - + + println!("Tokens: {:#?}", tokens); let mut parser = Parser::new(&tokens); parser.attach_source(&source); let ast = parser.parse_script()?; println!("AST: {:#?}", ast); + + let mut pretty_unparser = Unparser::new(FormatStyle::Pretty { indent_size: 2 }); + + let pretty_code = pretty_unparser.unparse_script(&ast); + + let mut compact_unparser = Unparser::new(FormatStyle::Compact); + + let compact_code = compact_unparser.unparse_script(&ast); + + println!("\nPretty JavaScript code:"); + println!("{}", pretty_code); + + println!("\nCompact JavaScript code:"); + println!("{}", compact_code); + + println!("\n\n"); Ok(()) } diff --git a/src/parser/asi.rs b/src/parser/asi.rs index bbf759e..a513cc1 100644 --- a/src/parser/asi.rs +++ b/src/parser/asi.rs @@ -2,6 +2,7 @@ use crate::lexer::Token; use super::error::ParseResult; use super::parser::Parser; +// TODO remove? impl<'a> Parser<'a> { pub fn consume_semicolon(&mut self, message: &str) -> ParseResult<&Token> { if self.consume(&Token::Semicolon) { diff --git a/src/parser/context.rs b/src/parser/context.rs index db99b0b..fd1d432 100644 --- a/src/parser/context.rs +++ b/src/parser/context.rs @@ -1,12 +1,11 @@ use std::collections::HashSet; use crate::lexer::LexicalContext; -/// Maintains parser state and context information pub struct ParserContext { + // TODO to lexical context pub in_strict_mode: bool, pub labels: HashSet>, pub context_stack: Vec, - pub comments: Vec, } impl ParserContext { @@ -15,14 +14,9 @@ impl ParserContext { in_strict_mode: false, labels: HashSet::new(), context_stack: vec![LexicalContext::Default], - comments: Vec::new(), } } - pub fn current_context(&self) -> &LexicalContext { - self.context_stack.last().unwrap_or(&LexicalContext::Default) - } - pub fn push_context(&mut self, context: LexicalContext) { self.context_stack.push(context); } @@ -33,23 +27,20 @@ impl ParserContext { } } - pub fn has_context(&self, predicate: F) -> bool - where - F: Fn(&LexicalContext) -> bool - { - self.context_stack.iter().any(predicate) + fn current_context(&self) -> &LexicalContext { + self.context_stack.last().unwrap_or(&LexicalContext::Default) } pub fn is_in_loop_body(&self) -> bool { - self.has_context(|ctx| matches!(ctx, LexicalContext::LoopBody)) + matches!(self.current_context(), LexicalContext::LoopBody) } pub fn is_in_switch(&self) -> bool { - self.has_context(|ctx| matches!(ctx, LexicalContext::SwitchBody)) + matches!(self.current_context(), LexicalContext::SwitchBody) } pub fn is_in_function(&self) -> bool { - self.has_context(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) + self.context_stack.iter().any(|ctx| matches!(ctx, LexicalContext::FunctionBody { .. })) } pub fn allows_yield(&self) -> bool { diff --git a/src/parser/error.rs b/src/parser/error.rs index 67ab01b..8e7504f 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -10,7 +10,7 @@ pub struct ParserError { pub source_line: Option, pub source_span: Option<(usize, usize)>, pub context_stack: Vec, - pub current_token: Token, + pub token_stack: Vec, } impl ParserError { @@ -18,6 +18,8 @@ impl ParserError { pub fn new(parser: &Parser, message: &str) -> Self { let context_stack = parser.get_context_stack_info(); + + let token_stack = parser.get_token_stack_info(); let token = parser.peek(); @@ -191,7 +193,7 @@ impl ParserError { source_line: Some(source_line), source_span: Some((adjusted_column, adjusted_span_end)), context_stack, - current_token: token.clone(), + token_stack, } } @@ -243,12 +245,13 @@ impl fmt::Display for ParserError { writeln!(f, "at line {}, column {}", self.line, self.column)?; } - // Print current token information if available - if !matches!(self.current_token, Token::EOS) { - writeln!(f, "\nCurrent token: {:#?}", self.current_token)?; + if !self.token_stack.is_empty() { + writeln!(f, "\nToken stack:")?; + for (i, token) in self.token_stack.iter().enumerate() { + writeln!(f, " {}: {}", i, token)?; + } } - - // Print context stack information if available + if !self.context_stack.is_empty() { writeln!(f, "\nLexical context stack:")?; for (i, context) in self.context_stack.iter().enumerate() { @@ -286,7 +289,7 @@ impl From for ParserError { source_line: None, source_span: None, context_stack: Vec::new(), - current_token: Token::EOS, + token_stack: Vec::new(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ce8ff84..0e8814a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,7 +1,3 @@ -//! JavaScript parser module -//! -//! This module provides a composable, extensible parser for JavaScript code. - mod asi; mod error; mod stream; @@ -9,7 +5,6 @@ mod context; mod parser; mod combinator; -// Public exports pub use self::parser::Parser; pub use self::combinator::ParserCombinator; pub use self::error::{ParserError, ParseResult}; \ No newline at end of file diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 225755f..a0f7342 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -6,6 +6,7 @@ use super::combinator::ParserCombinator; use super::context::ParserContext; use crate::grammar::*; +/* use std::borrow::Cow; pub struct TokenAccess<'a> { @@ -18,9 +19,8 @@ impl<'a> std::ops::Deref for TokenAccess<'a> { fn deref(&self) -> &Self::Target { self.token.as_ref() } -} +}*/ -/// JavaScript parser pub struct Parser<'a> { stream: TokenStream<'a>, context: ParserContext, @@ -36,19 +36,19 @@ impl<'a> Parser<'a> { // Main parsing methods pub fn parse_module(&mut self) -> ParseResult { - ModuleParser::new().parse(self) + ModuleNode::new().parse(self) } pub fn parse_script(&mut self) -> ParseResult { - ScriptParser::new().parse(self) + ScriptNode::new().parse(self) } pub fn parse_expression(&mut self) -> ParseResult { - ExpressionParser::new().parse(self) + ExpressionNode::new().parse(self) } pub fn parse_statement(&mut self) -> ParseResult { - StatementParser::new().parse(self) + StatementNode::new().parse(self) } // Source handling @@ -101,10 +101,6 @@ impl<'a> Parser<'a> { pub fn peek_next(&self, offset: usize) -> &Token { self.stream.peek_next(offset) } - - //pub fn peek_position(&self) -> [usize; 2] { - // self.stream.peek_position() - //} pub fn advance(&mut self) -> bool { self.stream.advance() @@ -141,19 +137,16 @@ impl<'a> Parser<'a> { Err(self.error_at_current(message)) } } - + + pub fn get_token_stack_info(&self) -> Vec { + self.stream.get_token_stack_info() + } + // ParserContext delegations pub fn get_context_stack_info(&self) -> Vec { self.context.get_context_stack_info() } - pub fn has_context(&self, predicate: F) -> bool - where - F: Fn(&LexicalContext) -> bool - { - self.context.has_context(predicate) - } - pub fn is_in_function(&self) -> bool { self.context.is_in_function() } @@ -194,6 +187,7 @@ impl<'a> Parser<'a> { } } */ + pub fn with_context(&mut self, context: LexicalContext, f: F) -> ParseResult where F: FnOnce(&mut Self) -> ParseResult, diff --git a/src/parser/stream.rs b/src/parser/stream.rs index dad0e72..fa3138f 100644 --- a/src/parser/stream.rs +++ b/src/parser/stream.rs @@ -91,4 +91,31 @@ impl<'a> TokenStream<'a> { pub fn restore_position(&mut self, position: usize) { self.current = position; } + + pub fn get_token_stack_info(&self) -> Vec { + + let count = 10; + + let mut history = Vec::with_capacity(count); + + // Start from the current position and go backwards + let start_pos = if self.current >= count - 1 { + self.current - (count - 1) + } else { + 0 + }; + + // Add tokens from start_pos up to and including the current position + for i in start_pos..=self.current { + if i < self.tokens.len() { + history.push(self.tokens[i].0.clone()); + } + } + + history + .iter() + .rev() + .map(|token| format!("{}", token.to_string())) + .collect() + } } diff --git a/src/unparser/combinator.rs b/src/unparser/combinator.rs new file mode 100644 index 0000000..473aa3e --- /dev/null +++ b/src/unparser/combinator.rs @@ -0,0 +1,5 @@ +use super::unparser::Unparser; + +pub trait UnparserCombinator { + fn unparse(&self, unparser: &mut Unparser, node: &T); +} diff --git a/src/unparser/formatter.rs b/src/unparser/formatter.rs new file mode 100644 index 0000000..a6603c6 --- /dev/null +++ b/src/unparser/formatter.rs @@ -0,0 +1,109 @@ +use std::fmt::Write; + +#[derive(Debug, Clone)] +pub enum FormatStyle { + Compact, + Pretty { + indent_size: usize, + }, +} + +pub struct Formatter { + buffer: String, + style: FormatStyle, + current_indent: usize, + line_start: bool, +} + +impl Formatter { + pub fn new(style: FormatStyle) -> Self { + Self { + buffer: String::new(), + style, + current_indent: 0, + line_start: true, + } + } + + pub fn write_str(&mut self, s: &str) { + if self.line_start { + match &self.style { + FormatStyle::Pretty { indent_size } => { + for _ in 0..self.current_indent * indent_size { + self.buffer.push(' '); + } + }, + FormatStyle::Compact => {} + } + self.line_start = false; + } + self.buffer.push_str(s); + } + + pub fn write_char(&mut self, c: char) { + if self.line_start { + match &self.style { + FormatStyle::Pretty { indent_size } => { + for _ in 0..self.current_indent * indent_size { + self.buffer.push(' '); + } + }, + FormatStyle::Compact => {} + } + self.line_start = false; + } + self.buffer.push(c); + } + + pub fn newline(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.buffer.push('\n'); + self.line_start = true; + }, + FormatStyle::Compact => { + } + } + } + + pub fn space(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.buffer.push(' '); + }, + FormatStyle::Compact => {} + } + } + + pub fn undefined(&mut self) { + match self.style { + FormatStyle::Pretty { .. } => { + self.write_str("undefined"); + }, + FormatStyle::Compact => { + self.write_str("void 0"); + } + } + } + + pub fn indent(&mut self) { + self.current_indent += 1; + } + + pub fn dedent(&mut self) { + if self.current_indent > 0 { + self.current_indent -= 1; + } + } + + pub fn as_str(&self) -> &str { + &self.buffer + } +} + +impl Write for Formatter { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.write_str(s); + Ok(()) + } +} diff --git a/src/unparser/mod.rs b/src/unparser/mod.rs new file mode 100644 index 0000000..dca0ef3 --- /dev/null +++ b/src/unparser/mod.rs @@ -0,0 +1,7 @@ +mod formatter; +mod unparser; +mod combinator; + +pub use self::unparser::Unparser; +pub use self::combinator::UnparserCombinator; +pub use self::formatter::FormatStyle; \ No newline at end of file diff --git a/src/unparser/unparser.rs b/src/unparser/unparser.rs new file mode 100644 index 0000000..a2bbdf1 --- /dev/null +++ b/src/unparser/unparser.rs @@ -0,0 +1,79 @@ +use crate::ast::*; +use super::formatter::{Formatter, FormatStyle}; +use super::combinator::UnparserCombinator; + +use crate::grammar::*; + +pub struct Unparser { + formatter: Formatter, +} + +impl Unparser { + pub fn new(style: FormatStyle) -> Self { + Self { + formatter: Formatter::new(style), + } + } + + // Main unparse methods + pub fn unparse_module(&mut self, program: &Program) -> &str { + // TODO generic + ModuleNode::new().unparse(self, program); + self.formatter.as_str() + } + + pub fn unparse_script(&mut self, program: &Program) -> &str { + ScriptNode::new().unparse(self, program); + self.formatter.as_str() + } + + /* + pub fn unparse_expression(&mut self, expr: &Expression) -> String { + ExpressionNode::new().unparse(self, expr); + self.formatter.into_string() + } + + pub fn unparse_statement(&mut self, stmt: &Statement) -> String { + StatementNode::new().unparse(self, stmt); + self.formatter.into_string() + } + */ + + // Formatter delegations + pub fn write_str(&mut self, s: &str) { + self.formatter.write_str(s); + } + + pub fn write_char(&mut self, c: char) { + self.formatter.write_char(c); + } + + pub fn newline(&mut self) { + self.formatter.newline(); + } + + pub fn undefined(&mut self) { + self.formatter.undefined(); + } + + pub fn space(&mut self) { + self.formatter.space(); + } + + pub fn indent(&mut self) { + self.formatter.indent(); + } + + pub fn dedent(&mut self) { + self.formatter.dedent(); + } + + pub fn with_indent(&mut self, f: F) + where + F: FnOnce(&mut Self) + { + self.indent(); + f(self); + self.dedent(); + } +}