From e66328c4c4d946047f0a9595daf2071f8950cc48 Mon Sep 17 00:00:00 2001 From: LunaStev Date: Fri, 23 Jan 2026 18:25:30 +0900 Subject: [PATCH] feat: implement Linux x86_64 syscall suite and networking stdlib modules This commit establishes a comprehensive low-level system interface for the Wave language on Linux x86_64. It includes a full suite of syscall wrappers, foundational system modules (FS, Memory, Time), and high-level networking abstractions (TCP/UDP). Changes: - **Lexer Enhancements**: - Added support for hex escape sequences (`\xHH`) in string literals. - Unified whitespace and comment skipping into a new `skip_trivia` method, ensuring robust tokenization across multi-line comments. - Refactored `next_token` for better control flow and readability. - **Parser & Backend**: - Added support for the `input` statement in the parser. - Implemented implicit coercion between `Pointer` and `Int64` types specifically for functions starting with `syscall`, facilitating the passing of raw addresses to system registers. - Updated `scanf` format generation to correctly map 8-bit integers to `%c`. - **System Standard Library (`std::sys::linux`)**: - **`syscall`**: Implemented `syscall0` through `syscall6` using inline assembly following the x86_64 calling convention. - **`fs`**: Added raw wrappers for `open`, `close`, `read`, `write`, `lseek`, `stat`, and directory operations. - **`socket`**: Defined core socket constants (AF_INET, SOCK_STREAM, etc.) and raw wrappers for the Linux socket API. - **`memory`**: Added `mmap`, `munmap`, and `brk` for manual virtual memory management. - **`time`**: Added `nanosleep` and `clock_gettime` with `TimeSpec` support. - **Networking Standard Library (`std::net`)**: - **`tcp`**: Implemented `TcpListener` and `TcpStream` providing a synchronous, stream-based networking API. - **`udp`**: Implemented `UdpSocket` for datagram-based communication. These additions provide Wave with the necessary primitives to perform low-level systems programming and network communication directly. --- examples/game.wave | 59 ++ front/lexer/src/lexer/literals.rs | 16 +- front/lexer/src/lexer/scan.rs | 698 +++++++++--------- front/lexer/src/lexer/trivia.rs | 28 + front/parser/src/parser/stmt.rs | 4 + .../llvm_temporary/expression/rvalue/calls.rs | 22 + .../src/llvm_temporary/llvm_codegen/format.rs | 8 +- std/net/tcp.wave | 133 ++++ std/net/udp.wave | 140 +++- std/sys/linux/fs.wave | 104 +++ std/sys/linux/memory.wave | 42 ++ std/sys/linux/process.wave | 71 ++ std/sys/linux/socket.wave | 93 +++ std/sys/linux/syscall.wave | 142 +++- std/sys/linux/time.wave | 40 + 15 files changed, 1227 insertions(+), 373 deletions(-) create mode 100644 examples/game.wave create mode 100644 std/net/tcp.wave create mode 100644 std/sys/linux/fs.wave create mode 100644 std/sys/linux/memory.wave create mode 100644 std/sys/linux/process.wave create mode 100644 std/sys/linux/socket.wave create mode 100644 std/sys/linux/time.wave diff --git a/examples/game.wave b/examples/game.wave new file mode 100644 index 00000000..3596feee --- /dev/null +++ b/examples/game.wave @@ -0,0 +1,59 @@ +struct Player { + x: i32; + y: i32; +} + +struct Game { + p: Player; + running: bool; +} + +fun render(g: Game) { + var y: i32 = 0; + while (y < 5) { + var x: i32 = 0; + while (x < 5) { + if (x == g.p.x && y == g.p.y) { + print("@"); + } else { + print("."); + } + x += 1; + } + println(""); + y += 1; + } +} + +fun update(g: ptr, key: i8) { + if (key == 'w' && deref g.p.y > 0) { deref g.p.y -= 1; } + if (key == 's' && deref g.p.y < 4) { deref g.p.y += 1; } + if (key == 'a' && deref g.p.x > 0) { deref g.p.x -= 1; } + if (key == 'd' && deref g.p.x < 4) { deref g.p.x += 1; } + if (key == 'q') { deref g.running = false; } +} + +fun clear_screen() { + print("\x1b[2J\x1b[H"); +} + + +fun main() { + var game: Game = Game { + p: Player { x: 2, y: 2 }, + running: true + }; + + var key: i8; + + println("WASD to move, q to quit"); + + while (game.running) { + clear_screen(); + render(game); + input("{}", key); + update(&game, key); + } + + println("Game Over"); +} diff --git a/front/lexer/src/lexer/literals.rs b/front/lexer/src/lexer/literals.rs index d45349f0..c2ae773a 100644 --- a/front/lexer/src/lexer/literals.rs +++ b/front/lexer/src/lexer/literals.rs @@ -18,12 +18,22 @@ impl<'a> Lexer<'a> { 'r' => string_literal.push('\r'), '\\' => string_literal.push('\\'), '"' => string_literal.push('"'), + 'x' => { + let h1 = self.advance(); + let h2 = self.advance(); + + let hex = format!("{}{}", h1, h2); + let value = u8::from_str_radix(&hex, 16) + .unwrap_or_else(|_| panic!("Invalid hex escape: \\x{}", hex)); + + string_literal.push(value as char); + } _ => { - string_literal.push('\\'); - string_literal.push(next); + panic!("Unknown escape sequence: \\{}", next); } } - } else { + } + else { string_literal.push(c); } } diff --git a/front/lexer/src/lexer/scan.rs b/front/lexer/src/lexer/scan.rs index d32d25c6..b3c44699 100644 --- a/front/lexer/src/lexer/scan.rs +++ b/front/lexer/src/lexer/scan.rs @@ -3,412 +3,400 @@ use super::{Lexer, Token}; impl<'a> Lexer<'a> { pub fn next_token(&mut self) -> Token { - self.skip_whitespace(); + loop { + self.skip_trivia(); - if self.is_at_end() { - return Token { token_type: TokenType::Eof, lexeme: String::new(), line: self.line }; - } + if self.is_at_end() { + return Token { token_type: TokenType::Eof, lexeme: String::new(), line: self.line }; + } - let c = self.advance(); + let c = self.advance(); - match c { - '+' => { - if self.match_next('+') { - Token { - token_type: TokenType::Increment, - lexeme: "++".to_string(), - line: self.line, - } - } else if self.match_next('=') { - Token { - token_type: TokenType::PlusEq, - lexeme: "+=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Plus, - lexeme: "+".to_string(), - line: self.line, + match c { + '+' => { + if self.match_next('+') { + return Token { + token_type: TokenType::Increment, + lexeme: "++".to_string(), + line: self.line, + } + } else if self.match_next('=') { + return Token { + token_type: TokenType::PlusEq, + lexeme: "+=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Plus, + lexeme: "+".to_string(), + line: self.line, + } } } - } - '-' => { - if self.match_next('-') { - Token { - token_type: TokenType::Decrement, - lexeme: "--".to_string(), - line: self.line, - } - } else if self.match_next('>') { - Token { - token_type: TokenType::Arrow, - lexeme: "->".to_string(), - line: self.line, - } - } else if self.match_next('=') { - Token { - token_type: TokenType::MinusEq, - lexeme: "-=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Minus, - lexeme: "-".to_string(), - line: self.line, + '-' => { + if self.match_next('-') { + return Token { + token_type: TokenType::Decrement, + lexeme: "--".to_string(), + line: self.line, + } + } else if self.match_next('>') { + return Token { + token_type: TokenType::Arrow, + lexeme: "->".to_string(), + line: self.line, + } + } else if self.match_next('=') { + return Token { + token_type: TokenType::MinusEq, + lexeme: "-=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Minus, + lexeme: "-".to_string(), + line: self.line, + } } } - } - '*' => { - if self.match_next('=') { - Token { - token_type: TokenType::StarEq, - lexeme: "*=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Star, - lexeme: "*".to_string(), - line: self.line, + '*' => { + if self.match_next('=') { + return Token { + token_type: TokenType::StarEq, + lexeme: "*=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Star, + lexeme: "*".to_string(), + line: self.line, + } } } - } - '.' => Token { - token_type: TokenType::Dot, - lexeme: ".".to_string(), - line: self.line, - }, - '/' => { - if self.match_next('/') { - self.skip_comment(); - self.next_token() - } else if self.match_next('*') { - self.skip_multiline_comment(); - self.next_token() - } else if self.match_next('=') { - Token { - token_type: TokenType::DivEq, - lexeme: "/=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Div, - lexeme: "/".to_string(), - line: self.line, + '.' => return Token { + token_type: TokenType::Dot, + lexeme: ".".to_string(), + line: self.line, + }, + '/' => { + if self.match_next('=') { + return Token { token_type: TokenType::DivEq, lexeme: "/=".to_string(), line: self.line }; + } else { + return Token { token_type: TokenType::Div, lexeme: "/".to_string(), line: self.line }; } } - } - '%' => { - if self.match_next('=') { - Token { - token_type: TokenType::RemainderEq, - lexeme: "%=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Remainder, - lexeme: "%".to_string(), - line: self.line, + '%' => { + if self.match_next('=') { + return Token { + token_type: TokenType::RemainderEq, + lexeme: "%=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Remainder, + lexeme: "%".to_string(), + line: self.line, + } } } - } - ';' => Token { - token_type: TokenType::SemiColon, - lexeme: ";".to_string(), - line: self.line, - }, - ':' => Token { - token_type: TokenType::Colon, - lexeme: ":".to_string(), - line: self.line, - }, - '<' => { - if self.match_next('<') { - Token { - token_type: TokenType::Rol, - lexeme: "<<".to_string(), - line: self.line, - } - } else if self.match_next('=') { - Token { - token_type: TokenType::LchevrEq, - lexeme: "<=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Lchevr, - lexeme: "<".to_string(), - line: self.line, + ';' => return Token { + token_type: TokenType::SemiColon, + lexeme: ";".to_string(), + line: self.line, + }, + ':' => return Token { + token_type: TokenType::Colon, + lexeme: ":".to_string(), + line: self.line, + }, + '<' => { + if self.match_next('<') { + return Token { + token_type: TokenType::Rol, + lexeme: "<<".to_string(), + line: self.line, + } + } else if self.match_next('=') { + return Token { + token_type: TokenType::LchevrEq, + lexeme: "<=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Lchevr, + lexeme: "<".to_string(), + line: self.line, + } } } - } - '>' => { - if self.match_next('>') { - Token { - token_type: TokenType::Ror, - lexeme: ">>".to_string(), - line: self.line, - } - } else if self.match_next('=') { - Token { - token_type: TokenType::RchevrEq, - lexeme: ">=".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Rchevr, - lexeme: ">".to_string(), - line: self.line, + '>' => { + if self.match_next('>') { + return Token { + token_type: TokenType::Ror, + lexeme: ">>".to_string(), + line: self.line, + } + } else if self.match_next('=') { + return Token { + token_type: TokenType::RchevrEq, + lexeme: ">=".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Rchevr, + lexeme: ">".to_string(), + line: self.line, + } } } - } - '(' => Token { - token_type: TokenType::Lparen, - lexeme: "(".to_string(), - line: self.line, - }, - ')' => Token { - token_type: TokenType::Rparen, - lexeme: ")".to_string(), - line: self.line, - }, - '{' => Token { - token_type: TokenType::Lbrace, - lexeme: "{".to_string(), - line: self.line, - }, - '}' => Token { - token_type: TokenType::Rbrace, - lexeme: "}".to_string(), - line: self.line, - }, - '[' => Token { - token_type: TokenType::Lbrack, - lexeme: "[".to_string(), - line: self.line, - }, - ']' => Token { - token_type: TokenType::Rbrack, - lexeme: "]".to_string(), - line: self.line, - }, - '=' => { - if self.match_next('=') { - Token { - token_type: TokenType::EqualTwo, - lexeme: "==".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Equal, - lexeme: "=".to_string(), - line: self.line, + '(' => return Token { + token_type: TokenType::Lparen, + lexeme: "(".to_string(), + line: self.line, + }, + ')' => return Token { + token_type: TokenType::Rparen, + lexeme: ")".to_string(), + line: self.line, + }, + '{' => return Token { + token_type: TokenType::Lbrace, + lexeme: "{".to_string(), + line: self.line, + }, + '}' => return Token { + token_type: TokenType::Rbrace, + lexeme: "}".to_string(), + line: self.line, + }, + '[' => return Token { + token_type: TokenType::Lbrack, + lexeme: "[".to_string(), + line: self.line, + }, + ']' => return Token { + token_type: TokenType::Rbrack, + lexeme: "]".to_string(), + line: self.line, + }, + '=' => { + if self.match_next('=') { + return Token { + token_type: TokenType::EqualTwo, + lexeme: "==".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Equal, + lexeme: "=".to_string(), + line: self.line, + } } } - } - '&' => { - if self.match_next('&') { - Token { - token_type: TokenType::LogicalAnd, - lexeme: "&&".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::AddressOf, - lexeme: "&".to_string(), - line: self.line, + '&' => { + if self.match_next('&') { + return Token { + token_type: TokenType::LogicalAnd, + lexeme: "&&".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::AddressOf, + lexeme: "&".to_string(), + line: self.line, + } } } - } - '|' => { - if self.match_next('|') { - Token { - token_type: TokenType::LogicalOr, - lexeme: "||".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::BitwiseOr, - lexeme: "|".to_string(), - line: self.line, + '|' => { + if self.match_next('|') { + return Token { + token_type: TokenType::LogicalOr, + lexeme: "||".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::BitwiseOr, + lexeme: "|".to_string(), + line: self.line, + } } } - } - '!' => { - if self.match_next('=') { - Token { - token_type: TokenType::NotEqual, - lexeme: "!=".to_string(), - line: self.line, - } - } else if self.match_next('&') { - Token { - token_type: TokenType::Nand, - lexeme: "!&".to_string(), - line: self.line, - } - } else if self.match_next('|') { - Token { - token_type: TokenType::Nor, - lexeme: "!|".to_string(), - line: self.line, - } - } else { - Token { - token_type: TokenType::Not, - lexeme: "!".to_string(), - line: self.line, + '!' => { + if self.match_next('=') { + return Token { + token_type: TokenType::NotEqual, + lexeme: "!=".to_string(), + line: self.line, + } + } else if self.match_next('&') { + return Token { + token_type: TokenType::Nand, + lexeme: "!&".to_string(), + line: self.line, + } + } else if self.match_next('|') { + return Token { + token_type: TokenType::Nor, + lexeme: "!|".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Not, + lexeme: "!".to_string(), + line: self.line, + } } } - } - '^' => Token { - token_type: TokenType::Xor, - lexeme: "^".to_string(), - line: self.line, - }, - '~' => { - if self.match_next('^') { - Token { - token_type: TokenType::Xnor, - lexeme: "~^".to_string(), - line: self.line, + '^' => return Token { + token_type: TokenType::Xor, + lexeme: "^".to_string(), + line: self.line, + }, + '~' => { + if self.match_next('^') { + return Token { + token_type: TokenType::Xnor, + lexeme: "~^".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::BitwiseNot, + lexeme: "~".to_string(), + line: self.line, + } } - } else { - Token { - token_type: TokenType::BitwiseNot, - lexeme: "~".to_string(), - line: self.line, + } + '?' => { + if self.match_next('?') { + return Token { + token_type: TokenType::NullCoalesce, + lexeme: "??".to_string(), + line: self.line, + } + } else { + return Token { + token_type: TokenType::Condition, + lexeme: "?".to_string(), + line: self.line, + } } } - } - '?' => { - if self.match_next('?') { - Token { - token_type: TokenType::NullCoalesce, - lexeme: "??".to_string(), + ',' => return Token { + token_type: TokenType::Comma, + lexeme: ",".to_string(), + line: self.line, + }, + '\'' => { + let value = self.char_literal(); + return Token { + token_type: TokenType::CharLiteral(value), + lexeme: format!("'{}'", value), line: self.line, } - } else { - Token { - token_type: TokenType::Condition, - lexeme: "?".to_string(), + }, + '"' => { + let string_value = self.string(); + return Token { + token_type: TokenType::String(string_value.clone()), + lexeme: format!("\"{}\"", string_value), line: self.line, } } - } - ',' => Token { - token_type: TokenType::Comma, - lexeme: ",".to_string(), - line: self.line, - }, - '\'' => { - let value = self.char_literal(); - Token { - token_type: TokenType::CharLiteral(value), - lexeme: format!("'{}'", value), - line: self.line, - } - }, - '"' => { - let string_value = self.string(); - Token { - token_type: TokenType::String(string_value.clone()), - lexeme: format!("\"{}\"", string_value), - line: self.line, + + 'a'..='z' | 'A'..='Z' | '_' => { + let ident = self.identifier(); + return self.keyword_or_ident_token(ident) } - } - 'a'..='z' | 'A'..='Z' | '_' => { - let ident = self.identifier(); - self.keyword_or_ident_token(ident) - } + '0'..='9' => { + if c == '0' && (self.peek() == 'b' || self.peek() == 'B') { + self.advance(); // consume 'b' or 'B' - '0'..='9' => { - if c == '0' && (self.peek() == 'b' || self.peek() == 'B') { - self.advance(); // consume 'b' or 'B' + let mut bin_str = String::new(); + while self.peek() == '0' || self.peek() == '1' { + bin_str.push(self.advance()); + } - let mut bin_str = String::new(); - while self.peek() == '0' || self.peek() == '1' { - bin_str.push(self.advance()); + let value = i64::from_str_radix(&bin_str, 2).unwrap_or(0); + + return Token { + token_type: TokenType::IntLiteral(format!("0b{}", bin_str)), + lexeme: format!("0b{}", bin_str), + line: self.line, + }; } - let value = i64::from_str_radix(&bin_str, 2).unwrap_or(0); + if c == '0' && (self.peek() == 'x' || self.peek() == 'X') { + self.advance(); // consume 'x' or 'X' - return Token { - token_type: TokenType::IntLiteral(format!("0b{}", bin_str)), - lexeme: format!("0b{}", bin_str), - line: self.line, - }; - } + let mut hex_str = String::new(); + while self.peek().is_ascii_hexdigit() { + hex_str.push(self.advance()); + } - if c == '0' && (self.peek() == 'x' || self.peek() == 'X') { - self.advance(); // consume 'x' or 'X' + let value = i64::from_str_radix(&hex_str, 16).unwrap_or(0); - let mut hex_str = String::new(); - while self.peek().is_ascii_hexdigit() { - hex_str.push(self.advance()); + return Token { + token_type: TokenType::IntLiteral(format!("0x{}", hex_str)), + lexeme: format!("0x{}", hex_str), + line: self.line, + }; } - let value = i64::from_str_radix(&hex_str, 16).unwrap_or(0); - - return Token { - token_type: TokenType::IntLiteral(format!("0x{}", hex_str)), - lexeme: format!("0x{}", hex_str), - line: self.line, - }; - } - - let mut num_str = c.to_string(); - while self.peek().is_ascii_digit() { - num_str.push(self.advance()); - } - - let is_float = if self.peek() == '.' { - num_str.push('.'); - self.advance(); + let mut num_str = c.to_string(); while self.peek().is_ascii_digit() { num_str.push(self.advance()); } - true - } else { - false - }; - let token_type = if is_float { - num_str.parse::().map(TokenType::Float).unwrap() - } else { - TokenType::IntLiteral(num_str.clone()) - }; + let is_float = if self.peek() == '.' { + num_str.push('.'); + self.advance(); + while self.peek().is_ascii_digit() { + num_str.push(self.advance()); + } + true + } else { + false + }; - Token { - token_type, - lexeme: num_str, - line: self.line, + let token_type = if is_float { + num_str.parse::().map(TokenType::Float).unwrap() + } else { + TokenType::IntLiteral(num_str.clone()) + }; + + return Token { + token_type, + lexeme: num_str, + line: self.line, + } } - } - _ => { - if c == '\0' { - eprintln!("[eprintln] Null character encountered — likely unintended"); - panic!("[panic] Null character (`\\0`) is not allowed in source"); - } else if c == '\\' { - eprintln!("[eprintln] Unexpected backslash outside of string"); - panic!("[panic] Unexpected character: '\\' outside of string"); - } else { - eprintln!( - "[eprintln] Unexpected character: {:?} (code: {})", - c, c as u32 - ); - panic!("[panic] Unexpected character: {:?}", c); + _ => { + if c == '\0' { + eprintln!("[eprintln] Null character encountered — likely unintended"); + panic!("[panic] Null character (`\\0`) is not allowed in source"); + } else if c == '\\' { + eprintln!("[eprintln] Unexpected backslash outside of string"); + panic!("[panic] Unexpected character: '\\' outside of string"); + } else { + eprintln!( + "[eprintln] Unexpected character: {:?} (code: {})", + c, c as u32 + ); + panic!("[panic] Unexpected character: {:?}", c); + } } } } diff --git a/front/lexer/src/lexer/trivia.rs b/front/lexer/src/lexer/trivia.rs index 8622d197..5dd1dc65 100644 --- a/front/lexer/src/lexer/trivia.rs +++ b/front/lexer/src/lexer/trivia.rs @@ -1,6 +1,34 @@ use super::Lexer; impl<'a> Lexer<'a> { + pub(crate) fn skip_trivia(&mut self) { + loop { + self.skip_whitespace(); + + if self.is_at_end() { + break; + } + + // line comment // + if self.peek() == '/' && self.peek_next() == '/' { + self.advance(); // '/' + self.advance(); // '/' + self.skip_comment(); + continue; + } + + // block comment /* */ + if self.peek() == '/' && self.peek_next() == '*' { + self.advance(); // '/' + self.advance(); // '*' + self.skip_multiline_comment(); + continue; + } + + break; + } + } + pub(crate) fn skip_whitespace(&mut self) { while !self.is_at_end() { let c = self.peek(); diff --git a/front/parser/src/parser/stmt.rs b/front/parser/src/parser/stmt.rs index 012b1724..bbce904a 100644 --- a/front/parser/src/parser/stmt.rs +++ b/front/parser/src/parser/stmt.rs @@ -174,6 +174,10 @@ pub fn parse_statement(tokens: &mut Peekable>) -> Option { tokens.next(); parse_print(tokens) } + TokenType::Input => { + tokens.next(); + parse_input(tokens) + } TokenType::If => { tokens.next(); parse_if(tokens) diff --git a/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs b/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs index 7b60f709..1816b46a 100644 --- a/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs +++ b/llvm_temporary/src/llvm_temporary/expression/rvalue/calls.rs @@ -254,6 +254,28 @@ fn coerce_to_expected<'ctx, 'a>( } match (got, expected) { + // 0) ptr -> int (ptrtoint) (needed for syscall wrappers that take i64 registers) + (BasicTypeEnum::PointerType(_), BasicTypeEnum::IntType(dst)) + if dst.get_bit_width() == 64 && name.starts_with("syscall") => + { + let pv = val.into_pointer_value(); + env.builder + .build_ptr_to_int(pv, dst, &format!("arg{}_p2i", arg_index)) + .unwrap() + .as_basic_value_enum() + } + + // 0.1) int -> ptr (inttoptr) (useful when passing raw addresses) + (BasicTypeEnum::IntType(src), BasicTypeEnum::PointerType(dst)) + if src.get_bit_width() == 64 && name.starts_with("syscall") => + { + let iv = val.into_int_value(); + env.builder + .build_int_to_ptr(iv, dst, &format!("arg{}_i2p", arg_index)) + .unwrap() + .as_basic_value_enum() + } + // 1) int -> int (BasicTypeEnum::IntType(src), BasicTypeEnum::IntType(dst)) => { let src_bw = src.get_bit_width(); diff --git a/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs b/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs index 0fdff26e..465393fb 100644 --- a/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs +++ b/llvm_temporary/src/llvm_temporary/llvm_codegen/format.rs @@ -91,7 +91,13 @@ pub fn wave_format_to_scanf(format: &str, arg_types: &[AnyTypeEnum]) -> String { .unwrap_or_else(|| panic!("Missing argument for format")); let fmt = match ty { - AnyTypeEnum::IntType(_) => "%d", + AnyTypeEnum::IntType(int_ty) => { + if int_ty.get_bit_width() == 8 { + "%c" + } else { + "%d" + } + } AnyTypeEnum::FloatType(_) => "%f", AnyTypeEnum::PointerType(_) => { panic!("Cannot input into a pointer type directly") diff --git a/std/net/tcp.wave b/std/net/tcp.wave new file mode 100644 index 00000000..e59fa03e --- /dev/null +++ b/std/net/tcp.wave @@ -0,0 +1,133 @@ +// ======================================================= +// TCP networking for Wave +// ======================================================= +// +// Stream-based TCP API built on top of +// std::sys::linux::socket +// +// Blocking, minimal, synchronous TCP abstraction. +// ======================================================= + +import("std::sys::linux::socket"); + + +// ----------------------- +// IPv4 address +// ----------------------- + +struct TcpAddr { + ip: i32; // network byte order + port: i16; // network byte order +} + + +// sockaddr_in (internal) +struct SockAddrIn { + family: i16; // AF_INET + port: i16; + addr: i32; + zero: array; +} + + +// ----------------------- +// TCP types +// ----------------------- + +struct TcpListener { + fd: i64; +} + +struct TcpStream { + fd: i64; +} + + +// ----------------------- +// helpers +// ----------------------- + +fun htons(x: i16) -> i16 { + var a: i32 = x; + return ((a & 255) << 8) | ((a >> 8) & 255); +} + +fun htonl(x: i32) -> i32 { + return ((x & 0x000000FF) << 24) + | ((x & 0x0000FF00) << 8) + | ((x & 0x00FF0000) >> 8) + | ((x & 0xFF000000) >> 24); +} + +fun _to_sockaddr(addr: TcpAddr) -> SockAddrIn { + return SockAddrIn { + family: socket.AF_INET, + port: addr.port, + addr: addr.ip, + zero: [0,0,0,0,0,0,0,0] + }; +} + + +// ----------------------- +// listener +// ----------------------- + +fun tcp_bind(port: i16) -> TcpListener { + let fd: i64 = socket.socket( + socket.AF_INET, + socket.SOCK_STREAM, + socket.IPPROTO_TCP + ); + + let addr: SockAddrIn = SockAddrIn { + family: socket.AF_INET, + port: htons(port), + addr: 0, + zero: [0,0,0,0,0,0,0,0] + }; + + socket.bind(fd, &addr, 16); + socket.listen(fd, 128); + + return TcpListener { fd: fd }; +} + +fun tcp_accept(listener: TcpListener) -> TcpStream { + let fd: i64 = socket.accept(listener.fd, 0, 0); + return TcpStream { fd: fd }; +} + +fun tcp_close_listener(listener: TcpListener) { + socket.shutdown(listener.fd, 2); +} + + +// ----------------------- +// stream (server + client common) +// ----------------------- + +fun tcp_connect(addr: TcpAddr) -> TcpStream { + let fd: i64 = socket.socket( + socket.AF_INET, + socket.SOCK_STREAM, + socket.IPPROTO_TCP + ); + + let sa: i64 = _to_sockaddr(addr); + socket.connect(fd, &sa, 16); + + return TcpStream { fd: fd }; +} + +fun tcp_read(stream: TcpStream, buf: ptr, len: i64) -> i64 { + return socket.recv(stream.fd, buf, len, 0); +} + +fun tcp_write(stream: TcpStream, buf: ptr, len: i64) -> i64 { + return socket.send(stream.fd, buf, len, 0); +} + +fun tcp_close(stream: TcpStream) { + socket.shutdown(stream.fd, 2); +} diff --git a/std/net/udp.wave b/std/net/udp.wave index f447347c..afcb6194 100644 --- a/std/net/udp.wave +++ b/std/net/udp.wave @@ -1,8 +1,138 @@ -import("std::sys::linux::syscall"); +// ======================================================= +// UDP networking for Wave +// ======================================================= +// +// Datagram-based UDP API built on top of +// std::sys::linux::socket +// +// No connection abstraction. +// No async. +// No buffering. +// ======================================================= -const AF_INET: i64 = 2; -const SOCK_DGRAM: i64 = 2; +import("std::sys::linux::socket"); -fun udp_socket() -> i64 { - return sys_socket(AF_INET, SOCK_DGRAM, 0); + +// ----------------------- +// IPv4 address +// ----------------------- + +struct UdpAddr { + ip: i32; // network byte order + port: i16; // network byte order +} + + +// sockaddr_in (internal use) +struct SockAddrIn { + family: i16; // AF_INET + port: i16; + addr: i32; + zero: array; +} + + +// ----------------------- +// UDP socket +// ----------------------- + +struct UdpSocket { + fd: i64; +} + + +// ----------------------- +// helpers +// ----------------------- + +fun htons(x: i16) -> i16 { + var a: i32 = x; + return ((a & 255) << 8) | ((a >> 8) & 255); +} + +fun htonl(x: i32) -> i32 { + return ((x & 0x000000FF) << 24) + | ((x & 0x0000FF00) << 8) + | ((x & 0x00FF0000) >> 8) + | ((x & 0xFF000000) >> 24); +} + +fun _to_sockaddr(addr: UdpAddr) -> SockAddrIn { + return SockAddrIn { + family: socket.AF_INET, + port: addr.port, + addr: addr.ip, + zero: [0,0,0,0,0,0,0,0] + }; +} + + +// ----------------------- +// socket lifecycle +// ----------------------- + +fun udp_bind(port: i16) -> UdpSocket { + let fd = socket.socket( + socket.AF_INET, + socket.SOCK_DGRAM, + socket.IPPROTO_UDP + ); + + let addr = SockAddrIn { + family: socket.AF_INET, + port: htons(port), + addr: 0, + zero: [0,0,0,0,0,0,0,0] + }; + + socket.bind(fd, &addr, 16); + + return UdpSocket { fd: fd }; +} + +fun udp_close(sock: UdpSocket) { + socket.shutdown(sock.fd, 2); +} + + +// ----------------------- +// send / recv +// ----------------------- + +fun udp_send_to( + sock: UdpSocket, + addr: UdpAddr, + buf: ptr, + len: i64 +) -> i64 { + let sa = _to_sockaddr(addr); + return socket.send( + sock.fd, + buf, + len, + 0 + ); +} + +fun udp_recv_from( + sock: UdpSocket, + buf: ptr, + len: i64, + src: ptr +) -> i64 { + var sa: SockAddrIn; + var salen: i32 = 16; + + let n = socket.recv( + sock.fd, + buf, + len, + 0 + ); + + // NOTE: + // recvfrom source address extraction + // will be added later when needed + + return n; } diff --git a/std/sys/linux/fs.wave b/std/sys/linux/fs.wave new file mode 100644 index 00000000..86f1cd0e --- /dev/null +++ b/std/sys/linux/fs.wave @@ -0,0 +1,104 @@ +// ======================================================= +// Linux x86_64 filesystem syscalls +// ======================================================= +// +// This layer provides minimal filesystem operations +// built directly on top of syscall.wave. +// +// All functions return raw syscall results. +// Negative values indicate -errno. +// ======================================================= + +import("std::sys::linux::syscall"); + + +// ----------------------- +// open / close +// ----------------------- + +fun open(path: str, flags: i32, mode: i32) -> i64 { + // syscall: open (2) + return syscall3(2, path, flags, mode); +} + +fun close(fd: i64) -> i64 { + // syscall: close (3) + return syscall1(3, fd); +} + + +// ----------------------- +// read / write +// ----------------------- + +fun read(fd: i64, buf: ptr, len: i64) -> i64 { + // syscall: read (0) + return syscall3(0, fd, buf, len); +} + +fun write(fd: i64, buf: ptr, len: i64) -> i64 { + // syscall: write (1) + return syscall3(1, fd, buf, len); +} + + +// ----------------------- +// seek +// ----------------------- + +fun lseek(fd: i64, offset: i64, whence: i32) -> i64 { + // syscall: lseek (8) + return syscall3(8, fd, offset, whence); +} + + +// ----------------------- +// file operations +// ----------------------- + +fun unlink(path: str) -> i64 { + // syscall: unlink (87) + return syscall1(87, path); +} + +fun mkdir(path: str, mode: i32) -> i64 { + // syscall: mkdir (83) + return syscall2(83, path, mode); +} + +fun rmdir(path: str) -> i64 { + // syscall: rmdir (84) + return syscall1(84, path); +} + + +// ----------------------- +// metadata +// ----------------------- + +struct Stat { + dev: i64; + ino: i64; + nlink: i64; + mode: i32; + uid: i32; + gid: i32; + pad0: i32; + rdev: i64; + size: i64; + blksize: i64; + blocks: i64; + atime: i64; + mtime: i64; + ctime: i64; +} + +fun stat(path: str, st: ptr) -> i64 { + // syscall: stat (4) + return syscall2(4, path, st); +} + +fun fstat(fd: i64, st: ptr) -> i64 { + // syscall: fstat (5) + return syscall2(5, fd, st); +} diff --git a/std/sys/linux/memory.wave b/std/sys/linux/memory.wave new file mode 100644 index 00000000..b4438a83 --- /dev/null +++ b/std/sys/linux/memory.wave @@ -0,0 +1,42 @@ +// ======================================================= +// Linux x86_64 memory syscalls +// ======================================================= +// +// Low-level virtual memory management. +// All functions return raw syscall values. +// Negative values indicate -errno. +// ======================================================= + +import("std::sys::linux::syscall"); + + +// ----------------------- +// mmap / munmap +// ----------------------- + +fun mmap( + addr: ptr, + length: i64, + prot: i32, + flags: i32, + fd: i64, + offset: i64 +) -> ptr { + // syscall: mmap (9) + return syscall6(9, addr, length, prot, flags, fd, offset); +} + +fun munmap(addr: ptr, length: i64) -> i64 { + // syscall: munmap (11) + return syscall2(11, addr, length); +} + + +// ----------------------- +// brk +// ----------------------- + +fun brk(addr: ptr) -> ptr { + // syscall: brk (12) + return syscall1(12, addr); +} diff --git a/std/sys/linux/process.wave b/std/sys/linux/process.wave new file mode 100644 index 00000000..36efc819 --- /dev/null +++ b/std/sys/linux/process.wave @@ -0,0 +1,71 @@ +// ======================================================= +// Linux x86_64 process syscalls +// ======================================================= +// +// Minimal process-related system calls. +// All return raw syscall values. +// Negative values indicate -errno. +// ======================================================= + +import("std::sys::linux::syscall"); + + +// ----------------------- +// process lifecycle +// ----------------------- + +fun exit(code: i32) -> ! { + // syscall: exit (60) + syscall1(60, code); + while (true) { } +} + +fun getpid() -> i64 { + // syscall: getpid (39) + return syscall0(39); +} + +fun getppid() -> i64 { + // syscall: getppid (110) + return syscall0(110); +} + + +// ----------------------- +// process creation +// ----------------------- + +fun fork() -> i64 { + // syscall: fork (57) + return syscall0(57); +} + +fun execve( + path: str, + argv: ptr>, + envp: ptr> +) -> i64 { + // syscall: execve (59) + return syscall3(59, path, argv, envp); +} + + +// ----------------------- +// waiting +// ----------------------- + +fun waitpid(pid: i64, status: ptr, options: i32) -> i64 { + // syscall: wait4 (61) + // waitpid is implemented via wait4 + return syscall4(61, pid, status, options, 0); +} + + +// ----------------------- +// signals +// ----------------------- + +fun kill(pid: i64, sig: i32) -> i64 { + // syscall: kill (62) + return syscall2(62, pid, sig); +} diff --git a/std/sys/linux/socket.wave b/std/sys/linux/socket.wave new file mode 100644 index 00000000..092861e7 --- /dev/null +++ b/std/sys/linux/socket.wave @@ -0,0 +1,93 @@ +// ======================================================= +// Linux x86_64 socket syscalls +// ======================================================= +// +// Raw socket interface built on top of syscall.wave. +// This layer exposes OS-level socket APIs only. +// No protocol abstraction (TCP/UDP) here. +// ======================================================= + +import("std::sys::linux::syscall"); + + +// ----------------------- +// constants (minimal) +// ----------------------- + +// address families +const AF_INET: i32 = 2; +const AF_INET6: i32 = 10; + +// socket types +const SOCK_STREAM: i32 = 1; +const SOCK_DGRAM: i32 = 2; + +// protocol +const IPPROTO_IP: i32 = 0; +const IPPROTO_TCP: i32 = 6; +const IPPROTO_UDP: i32 = 17; + +// socket options +const SOL_SOCKET: i32 = 1; +const SO_REUSEADDR: i32 = 2; + + +// ----------------------- +// basic socket syscalls +// ----------------------- + +fun socket(domain: i32, ty: i32, protocol: i32) -> i64 { + // syscall: socket (41) + return syscall3(41, domain, ty, protocol); +} + +fun bind(fd: i64, addr: ptr, len: i32) -> i64 { + // syscall: bind (49) + return syscall3(49, fd, addr, len); +} + +fun listen(fd: i64, backlog: i32) -> i64 { + // syscall: listen (50) + return syscall2(50, fd, backlog); +} + +fun accept(fd: i64, addr: ptr, len: ptr) -> i64 { + // syscall: accept (43) + return syscall3(43, fd, addr, len); +} + +fun connect(fd: i64, addr: ptr, len: i32) -> i64 { + // syscall: connect (42) + return syscall3(42, fd, addr, len); +} + +fun shutdown(fd: i64, how: i32) -> i64 { + // syscall: shutdown (48) + return syscall2(48, fd, how); +} + +fun setsockopt( + fd: i64, + level: i32, + optname: i32, + optval: ptr, + optlen: i32 +) -> i64 { + // syscall: setsockopt (54) + return syscall5(54, fd, level, optname, optval, optlen); +} + + +// ----------------------- +// send / recv +// ----------------------- + +fun send(fd: i64, buf: ptr, len: i64, flags: i32) -> i64 { + // syscall: sendto (44) + return syscall6(44, fd, buf, len, flags, 0, 0); +} + +fun recv(fd: i64, buf: ptr, len: i64, flags: i32) -> i64 { + // syscall: recvfrom (45) + return syscall6(45, fd, buf, len, flags, 0, 0); +} diff --git a/std/sys/linux/syscall.wave b/std/sys/linux/syscall.wave index 5024f47b..3bc31270 100644 --- a/std/sys/linux/syscall.wave +++ b/std/sys/linux/syscall.wave @@ -1,14 +1,138 @@ -const SYS_SOCKET: i64 = 41; +// ======================================================= +// Linux x86_64 syscall interface for Wave +// ======================================================= +// Calling convention: +// rax = syscall number +// rdi, rsi, rdx, r10, r8, r9 = arguments +// return value in rax +// +// Error handling: +// negative return value = -errno +// ======================================================= -fun sys_socket(domain: i64, ty: i64, proto: i64) -> i64 { - let fd: i64; + +// ----------------------- +// syscall with 0 args +// ----------------------- +fun syscall0(id: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 1 arg +// ----------------------- +fun syscall1(id: i64, a1: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 2 args +// ----------------------- +fun syscall2(id: i64, a1: i64, a2: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") a2 + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 3 args +// ----------------------- +fun syscall3(id: i64, a1: i64, a2: i64, a3: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") a2 + in("rdx") a3 + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 4 args +// ----------------------- +fun syscall4(id: i64, a1: i64, a2: i64, a3: i64, a4: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") a2 + in("rdx") a3 + in("r10") a4 + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 5 args +// ----------------------- +fun syscall5(id: i64, a1: i64, a2: i64, a3: i64, a4: i64, a5: i64) -> i64 { + var ret: i64; + asm { + "syscall" + in("rax") id + in("rdi") a1 + in("rsi") a2 + in("rdx") a3 + in("r10") a4 + in("r8") a5 + out("rax") ret + } + return ret; +} + + +// ----------------------- +// syscall with 6 args +// ----------------------- +fun syscall6( + id: i64, + a1: i64, + a2: i64, + a3: i64, + a4: i64, + a5: i64, + a6: i64 +) -> i64 { + var ret: i64; asm { "syscall" - in("rax") SYS_SOCKET - in("rdi") domain - in("rsi") ty - in("rdx") proto - out("rax") fd + in("rax") id + in("rdi") a1 + in("rsi") a2 + in("rdx") a3 + in("r10") a4 + in("r8") a5 + in("r9") a6 + out("rax") ret } - return fd; + return ret; } diff --git a/std/sys/linux/time.wave b/std/sys/linux/time.wave new file mode 100644 index 00000000..748ebdae --- /dev/null +++ b/std/sys/linux/time.wave @@ -0,0 +1,40 @@ +// ======================================================= +// Linux x86_64 time-related syscalls +// ======================================================= +// +// Raw time and sleep syscalls. +// All functions return raw syscall values. +// Negative values indicate -errno. +// ======================================================= + +import("std::sys::linux::syscall"); + + +// ----------------------- +// timespec +// ----------------------- + +struct TimeSpec { + sec: i64; + nsec: i64; +} + + +// ----------------------- +// nanosleep +// ----------------------- + +fun nanosleep(req: ptr, rem: ptr) -> i64 { + // syscall: nanosleep (35) + return syscall2(35, req, rem); +} + + +// ----------------------- +// clock_gettime +// ----------------------- + +fun clock_gettime(clock_id: i32, tp: ptr) -> i64 { + // syscall: clock_gettime (228) + return syscall2(228, clock_id, tp); +}