From 33fe33c6c32976c2f4a3ea349c5f5a83417dd342 Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Fri, 27 Feb 2026 23:06:15 +0000 Subject: [PATCH 1/7] chore: add doc comments --- rust-json-parser/src/error.rs | 48 ++++++-- rust-json-parser/src/lib.rs | 8 +- rust-json-parser/src/parser.rs | 81 ++++++++++++++ rust-json-parser/src/python_bindings.rs | 75 ++++++++++++- rust-json-parser/src/tokenizer.rs | 82 +++++++++++--- rust-json-parser/src/value.rs | 143 ++++++++++++++++++++++-- 6 files changed, 402 insertions(+), 35 deletions(-) diff --git a/rust-json-parser/src/error.rs b/rust-json-parser/src/error.rs index db265cc..4295167 100644 --- a/rust-json-parser/src/error.rs +++ b/rust-json-parser/src/error.rs @@ -1,38 +1,39 @@ use std::error::Error; use std::fmt; -/* - * Enum for JsonError kind, for unsuccessful JSON processing. - * Valid variants: - * UnexpectedToken - * UnexpectedEndOfInput - * InvalidNumber - * InvalidEscape - * InvalidUnicode - */ +/// Error type representing all possible failures during JSON parsing and serialization. #[derive(Debug, Clone, PartialEq)] pub enum JsonError { + /// A token was found that does not match what the parser expected at this position. UnexpectedToken { expected: String, found: String, position: usize, }, + /// The input ended before the parser found a required token. UnexpectedEndOfInput { expected: String, position: usize, }, + /// A numeric literal could not be parsed as a valid number. InvalidNumber { value: String, position: usize, }, + /// An unrecognized escape sequence was encountered inside a string. InvalidEscape { char: char, position: usize, }, + /// A `\uXXXX` escape sequence contains an invalid or incomplete hex value. InvalidUnicode { sequence: String, position: usize, }, + /// A file system operation failed (e.g. file not found, permission denied). + Io { + message: String, + }, } impl fmt::Display for JsonError { @@ -73,12 +74,31 @@ impl fmt::Display for JsonError { position, sequence, ) } + JsonError::Io { message } => write!(f, "IO error: {}", message), } } } impl Error for JsonError {} +impl From for JsonError { + fn from(err: std::io::Error) -> Self { + JsonError::Io { + message: err.to_string(), + } + } +} + +/// Creates an [`JsonError::UnexpectedToken`] error with the given context. +/// +/// # Examples +/// +/// ``` +/// use rust_json_parser::error::unexpected_token_error; +/// +/// let err = unexpected_token_error("number", "@", 5); +/// assert_eq!(err.to_string(), "Unexpected token at position 5: expected number, found @"); +/// ``` pub fn unexpected_token_error(expected: &str, found: &str, position: usize) -> JsonError { JsonError::UnexpectedToken { expected: expected.to_string(), @@ -87,6 +107,16 @@ pub fn unexpected_token_error(expected: &str, found: &str, position: usize) -> J } } +/// Creates an [`JsonError::UnexpectedEndOfInput`] error with the given context. +/// +/// # Examples +/// +/// ``` +/// use rust_json_parser::error::unexpected_end_of_input; +/// +/// let err = unexpected_end_of_input("closing quote", 10); +/// assert_eq!(err.to_string(), "Unexpected end of input at position 10: expected closing quote"); +/// ``` pub fn unexpected_end_of_input(expected: &str, position: usize) -> JsonError { JsonError::UnexpectedEndOfInput { expected: expected.to_string(), diff --git a/rust-json-parser/src/lib.rs b/rust-json-parser/src/lib.rs index 2630502..4442fde 100644 --- a/rust-json-parser/src/lib.rs +++ b/rust-json-parser/src/lib.rs @@ -1,3 +1,9 @@ +//! A JSON parser and serializer library implemented in Rust. +//! +//! Provides a tokenizer, recursive descent parser, and value types for +//! parsing JSON strings or files into structured [`JsonValue`] representations, +//! and serializing them back to JSON strings. + pub mod error; pub mod parser; pub mod tokenizer; @@ -7,7 +13,7 @@ pub mod value; // Without this: users write `use my_lib::parser::parse_json` // With this: users write `use my_lib::parse_json` (cleaner!) pub use error::JsonError; -pub use parser::{JsonParser, parse_json}; +pub use parser::{JsonParser, parse_json, parse_json_file}; pub use tokenizer::{Token, Tokenizer}; pub use value::JsonValue; diff --git a/rust-json-parser/src/parser.rs b/rust-json-parser/src/parser.rs index bada34d..2ef76c6 100644 --- a/rust-json-parser/src/parser.rs +++ b/rust-json-parser/src/parser.rs @@ -4,6 +4,7 @@ use crate::JsonResult; use crate::error::{unexpected_end_of_input, unexpected_token_error}; use crate::tokenizer::{Token, Tokenizer}; use crate::value::JsonValue; +use std::fs; /* * Utility function to error upon missing expected comma @@ -86,18 +87,54 @@ fn err_on_unexpected_closing_token( Ok(()) } +/// A recursive descent parser that converts a token stream into a [`JsonValue`] tree. pub struct JsonParser { tokens: Vec, current: usize, } impl JsonParser { + /// Tokenizes the input string and creates a new `JsonParser` ready to parse. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::JsonParser; + /// + /// let parser = JsonParser::new(r#"{"key": "value"}"#)?; + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` + /// + /// # Errors + /// + /// Returns a [`JsonError`](crate::JsonError) if the input contains invalid tokens + /// (see [`Tokenizer::tokenize`](crate::Tokenizer::tokenize)). pub fn new(input: &str) -> JsonResult { let mut tokenizer = Tokenizer::new(input); let tokens = tokenizer.tokenize()?; Ok(Self { current: 0, tokens }) } + /// Parses the token stream and returns the root [`JsonValue`]. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::{JsonParser, JsonValue}; + /// + /// let mut parser = JsonParser::new("[1, 2, 3]")?; + /// let value = parser.parse()?; + /// assert_eq!(value.as_array().map(|a| a.len()), Some(3)); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` + /// + /// # Errors + /// + /// Returns [`JsonError::UnexpectedToken`](crate::JsonError::UnexpectedToken) if the + /// token stream contains structurally invalid JSON (e.g. missing commas, colons, or + /// mismatched brackets), or + /// [`JsonError::UnexpectedEndOfInput`](crate::JsonError::UnexpectedEndOfInput) if the + /// input ends before a complete value is formed. pub fn parse(&mut self) -> JsonResult { match self.peek() { Some(Token::LeftBrace) => self.parse_object(), @@ -439,10 +476,54 @@ impl JsonParser { } } +/// Parses a JSON string and returns the corresponding [`JsonValue`]. +/// +/// This is the main entry point for parsing JSON. It tokenizes and parses in one step. +/// +/// # Examples +/// +/// ``` +/// use rust_json_parser::{parse_json, JsonValue}; +/// +/// let value = parse_json(r#"{"name": "Alice"}"#)?; +/// assert_eq!(value.get("name"), Some(&JsonValue::String("Alice".to_string()))); +/// +/// let value = parse_json("[1, 2, 3]")?; +/// assert_eq!(value.as_array().map(|a| a.len()), Some(3)); +/// # Ok::<(), rust_json_parser::JsonError>(()) +/// ``` +/// +/// # Errors +/// +/// Returns a [`JsonError`](crate::JsonError) if the input is not valid JSON. This includes +/// tokenization errors (invalid characters, malformed strings or numbers) and structural +/// errors (missing commas, unclosed brackets, etc.). pub fn parse_json(input: &str) -> JsonResult { JsonParser::new(input)?.parse() } +/// Reads a file at the given path and parses its contents as JSON. +/// +/// # Examples +/// +/// ```no_run +/// use rust_json_parser::parse_json_file; +/// +/// let value = parse_json_file("data.json")?; +/// println!("{}", value); +/// # Ok::<(), rust_json_parser::JsonError>(()) +/// ``` +/// +/// # Errors +/// +/// Returns [`JsonError::Io`](crate::JsonError::Io) if the file cannot be read (e.g. not +/// found or permission denied), or any other [`JsonError`](crate::JsonError) variant if the +/// file contents are not valid JSON. +pub fn parse_json_file(path: &str) -> JsonResult { + let contents = fs::read_to_string(path)?; + parse_json(&contents) +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust-json-parser/src/python_bindings.rs b/rust-json-parser/src/python_bindings.rs index 15faf54..f716cf0 100644 --- a/rust-json-parser/src/python_bindings.rs +++ b/rust-json-parser/src/python_bindings.rs @@ -1,10 +1,10 @@ use crate::parse_json as parse; +use crate::parse_json_file as parse_file; use crate::{JsonError, JsonValue}; -use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::exceptions::{PyIOError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList}; use std::collections::HashMap; -use std::fs; /// Utility function to convert a JsonValue instance (value) into a PyAny instance fn json_value_to_py<'py>(value: JsonValue, py: Python<'py>) -> Result, PyErr> { @@ -103,22 +103,89 @@ impl From for PyErr { "Invalid unicode sequence at position {}: {}", position, sequence )), + JsonError::Io { message } => PyIOError::new_err(message), } } } +/// Parse a JSON string and return the corresponding Python object. +/// +/// Args: +/// input: A string containing valid JSON. +/// +/// Returns: +/// The parsed JSON as a Python object (dict, list, str, float, bool, or None). +/// +/// Raises: +/// ValueError: If the input is not valid JSON. +/// +/// Examples: +/// >>> parse_json('{"name": "Alice", "age": 30}') +/// {'name': 'Alice', 'age': 30.0} +/// +/// >>> parse_json('[1, 2, 3]') +/// [1.0, 2.0, 3.0] +/// +/// >>> parse_json('"hello"') +/// 'hello' +/// +/// >>> parse_json('null') #[pyfunction] fn parse_json<'py>(py: Python<'py>, input: &str) -> PyResult> { let result = parse(input)?; result.into_pyobject(py) } +/// Parse a JSON file and return the corresponding Python object. +/// +/// Args: +/// path: Path to a file containing valid JSON. +/// +/// Returns: +/// The parsed JSON as a Python object (dict, list, str, float, bool, or None). +/// +/// Raises: +/// ValueError: If the file contents are not valid JSON. +/// OSError: If the file cannot be read. +/// +/// Examples: +/// >>> parse_json_file("config.json") +/// {'key': 'value'} +/// +/// >>> parse_json_file("data/users.json") +/// [{'name': 'Alice'}, {'name': 'Bob'}] #[pyfunction] fn parse_json_file<'py>(py: Python<'py>, path: &str) -> PyResult> { - let contents = fs::read_to_string(&path)?; - Ok(parse_json(py, &contents)?) + let result = parse_file(path)?; + result.into_pyobject(py) } +/// Serialize a Python object to a JSON string. +/// +/// Args: +/// obj: A Python object to serialize (dict, list, str, float, int, bool, or None). +/// indent: Optional number of spaces for pretty-printing. If None, output is compact. +/// +/// Returns: +/// A JSON string representation of the object. +/// +/// Raises: +/// TypeError: If the object contains types that cannot be serialized to JSON. +/// +/// Examples: +/// >>> dumps({"name": "Alice", "age": 30}) +/// '{"name": "Alice", "age": 30}' +/// +/// >>> dumps([1, 2, 3]) +/// '[1, 2, 3]' +/// +/// >>> print(dumps({"key": "value"}, indent=2)) +/// { +/// "key": "value" +/// } +/// +/// >>> dumps(None) +/// 'null' #[pyfunction] #[pyo3(signature = (obj, indent=None))] fn dumps(obj: &Bound, indent: Option) -> PyResult { diff --git a/rust-json-parser/src/tokenizer.rs b/rust-json-parser/src/tokenizer.rs index bfab537..999e4f8 100644 --- a/rust-json-parser/src/tokenizer.rs +++ b/rust-json-parser/src/tokenizer.rs @@ -1,7 +1,7 @@ use crate::error::unexpected_token_error; use crate::{JsonError, JsonResult}; -pub fn resolve_escape_sequence(char: char) -> Option { +fn resolve_escape_sequence(char: char) -> Option { match char { 'n' => Some('\n'), 't' => Some('\t'), @@ -15,29 +15,47 @@ pub fn resolve_escape_sequence(char: char) -> Option { } } -/* - * Enum for Token kind. Valid variants: - * LeftBrace, RightBrace, LeftBracket, RightBracket, Comma, Colon - * String(String), Number(f64), Boolean(bool), Null - */ +/// Represents a Token result of tokenization #[derive(Debug, Clone, PartialEq)] pub enum Token { - // Data tokens - carry values + /// A quoted string value. String(String), + /// A numeric literal. Number(f64), + /// A `true` or `false` literal. Boolean(bool), + /// The `null` literal. Null, - // Structural tokens - organize values into containers - LeftBracket, // [ - RightBracket, // ] - LeftBrace, // { - RightBrace, // } - Colon, // : - Comma, // , + /// Opening bracket `[`. + LeftBracket, + /// Closing bracket `]`. + RightBracket, + /// Opening brace `{`. + LeftBrace, + /// Closing brace `}`. + RightBrace, + /// Colon `:` separating keys from values. + Colon, + /// Comma `,` separating elements. + Comma, } impl Token { + /// Returns `true` if `self` and `other` are the same variant, ignoring inner values. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::Token; + /// + /// let a = Token::String("hello".to_string()); + /// let b = Token::String("world".to_string()); + /// assert!(a.is_variant(&b)); + /// + /// let c = Token::Number(42.0); + /// assert!(!a.is_variant(&c)); + /// ``` pub fn is_variant(&self, other: &Self) -> bool { std::mem::discriminant(self) == std::mem::discriminant(other) } @@ -50,12 +68,22 @@ fn parse_unicode_hex(s: &str) -> Option { u32::from_str_radix(s, 16).ok().and_then(char::from_u32) } +/// A lexer that converts a JSON input string into a sequence of [`Token`]s. pub struct Tokenizer { input: Vec, current: usize, } impl Tokenizer { + /// Creates a new `Tokenizer` for the given JSON input string. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::Tokenizer; + /// + /// let tokenizer = Tokenizer::new(r#"{"key": 42}"#); + /// ``` pub fn new(input: &str) -> Self { Self { current: 0, @@ -185,6 +213,32 @@ impl Tokenizer { } } + /// Consumes the input and returns the complete list of tokens. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::{Tokenizer, Token}; + /// + /// let mut tokenizer = Tokenizer::new("[1, true]"); + /// let tokens = tokenizer.tokenize()?; + /// assert_eq!(tokens, vec![ + /// Token::LeftBracket, + /// Token::Number(1.0), + /// Token::Comma, + /// Token::Boolean(true), + /// Token::RightBracket, + /// ]); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` + /// + /// # Errors + /// + /// Returns [`JsonError::UnexpectedToken`] if an invalid character is encountered, + /// [`JsonError::InvalidNumber`] if a numeric literal cannot be parsed, + /// [`JsonError::InvalidEscape`] if a string contains an unrecognized escape sequence, + /// [`JsonError::InvalidUnicode`] if a `\uXXXX` sequence is malformed, or + /// [`JsonError::UnexpectedEndOfInput`] if a string is unterminated. pub fn tokenize(&mut self) -> JsonResult> { let mut tokens: Vec = Vec::new(); diff --git a/rust-json-parser/src/value.rs b/rust-json-parser/src/value.rs index 2c32a53..997fa5b 100644 --- a/rust-json-parser/src/value.rs +++ b/rust-json-parser/src/value.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, fmt}; -pub fn escape_json_string(s: &str) -> String { +fn escape_json_string(s: &str) -> String { let mut result = String::new(); for c in s.chars() { match c { @@ -17,18 +17,21 @@ pub fn escape_json_string(s: &str) -> String { result } -/* - * Enum for JsonValue kind. Valid variants: - * String(String), Number(f64), Boolean(bool), Null - */ +/// Represents a parsed JSON value. #[derive(Debug, Clone, PartialEq)] pub enum JsonValue { + /// A JSON string (e.g. `"hello"`). String(String), + /// A JSON number, stored as `f64` (e.g. `42`, `3.14`). Number(f64), + /// A JSON boolean (`true` or `false`). Boolean(bool), + /// The JSON `null` literal. Null, - Array(Vec), // A JSON array is a Vec of values - Object(HashMap), // A JSON object is a HashMap + /// An ordered JSON array of values (e.g. `[1, "two", true]`). + Array(Vec), + /// A JSON object mapping string keys to values (e.g. `{"key": "value"}`). + Object(HashMap), } trait JsonFormat { @@ -101,10 +104,38 @@ impl JsonFormat for [JsonValue] { } impl JsonValue { + /// Returns `true` if this value is `JsonValue::Null`. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json("null")?; + /// assert!(value.is_null()); + /// + /// let value = parse_json("42")?; + /// assert!(!value.is_null()); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn is_null(&self) -> bool { matches!(self, JsonValue::Null) } + /// Returns the inner string slice if this is a `JsonValue::String`, or `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json(r#""hello""#)?; + /// assert_eq!(value.as_str(), Some("hello")); + /// + /// let value = parse_json("42")?; + /// assert_eq!(value.as_str(), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn as_str(&self) -> Option<&str> { match self { JsonValue::String(s) => Some(s.as_str()), @@ -112,6 +143,20 @@ impl JsonValue { } } + /// Returns the inner `f64` if this is a `JsonValue::Number`, or `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json("3.14")?; + /// assert_eq!(value.as_f64(), Some(3.14)); + /// + /// let value = parse_json("true")?; + /// assert_eq!(value.as_f64(), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn as_f64(&self) -> Option { let JsonValue::Number(n) = self else { return None; @@ -119,6 +164,20 @@ impl JsonValue { Some(*n) } + /// Returns the inner `bool` if this is a `JsonValue::Boolean`, or `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json("true")?; + /// assert_eq!(value.as_bool(), Some(true)); + /// + /// let value = parse_json("42")?; + /// assert_eq!(value.as_bool(), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn as_bool(&self) -> Option { let JsonValue::Boolean(b) = self else { return None; @@ -126,6 +185,20 @@ impl JsonValue { Some(*b) } + /// Returns a reference to the inner `Vec` if this is a `JsonValue::Array`, or `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json("[1, 2, 3]")?; + /// assert_eq!(value.as_array().map(|a| a.len()), Some(3)); + /// + /// let value = parse_json("42")?; + /// assert_eq!(value.as_array(), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn as_array(&self) -> Option<&Vec> { match self { JsonValue::Array(a) => Some(a), @@ -133,6 +206,20 @@ impl JsonValue { } } + /// Returns a reference to the inner `HashMap` if this is a `JsonValue::Object`, or `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json(r#"{"key": "value"}"#)?; + /// assert_eq!(value.as_object().map(|o| o.len()), Some(1)); + /// + /// let value = parse_json("[1, 2]")?; + /// assert_eq!(value.as_object(), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn as_object(&self) -> Option<&HashMap> { match self { JsonValue::Object(o) => Some(o), @@ -140,6 +227,19 @@ impl JsonValue { } } + /// Looks up a value by key if this is a `JsonValue::Object`. Returns `None` if the + /// key is missing or if this value is not an object. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::{parse_json, JsonValue}; + /// + /// let value = parse_json(r#"{"name": "Alice", "age": 30}"#)?; + /// assert_eq!(value.get("name"), Some(&JsonValue::String("Alice".to_string()))); + /// assert_eq!(value.get("missing"), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn get(&self, key: &str) -> Option<&JsonValue> { let object = self.as_object(); match object { @@ -148,6 +248,19 @@ impl JsonValue { } } + /// Looks up a value by index if this is a `JsonValue::Array`. Returns `None` if the + /// index is out of bounds or if this value is not an array. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::{parse_json, JsonValue}; + /// + /// let value = parse_json("[10, 20, 30]")?; + /// assert_eq!(value.get_index(1), Some(&JsonValue::Number(20.0))); + /// assert_eq!(value.get_index(5), None); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn get_index(&self, index: usize) -> Option<&JsonValue> { let array = self.as_array(); match array { @@ -156,10 +269,26 @@ impl JsonValue { } } + /// Serializes this value to a pretty-printed JSON string with the given number + /// of spaces per indentation level. + /// + /// # Examples + /// + /// ``` + /// use rust_json_parser::parse_json; + /// + /// let value = parse_json(r#"{"key": [1, 2]}"#)?; + /// let pretty = value.pretty_print(2); + /// assert!(pretty.contains("\"key\"")); + /// assert!(pretty.contains('\n')); + /// # Ok::<(), rust_json_parser::JsonError>(()) + /// ``` pub fn pretty_print(&self, indent: usize) -> String { self.pretty_print_recursive(0, indent) } + /// Recursive helper for [`pretty_print`](Self::pretty_print) that tracks the current + /// nesting depth. fn pretty_print_recursive(&self, depth: usize, indent: usize) -> String { let pad = " ".repeat(depth * indent); let inner_pad = " ".repeat((depth + 1) * indent); From 9a9e64f6ca86f3dd84d8fda08b6b7305d233e69e Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sat, 28 Feb 2026 00:11:30 +0000 Subject: [PATCH 2/7] feat: first version of benchmark rust function and Python CLI benchmark flag --- README.md | 15 +++- .../python/rust_json_parser/__init__.py | 3 +- .../python/rust_json_parser/__main__.py | 80 +++++++++++++++++++ rust-json-parser/requirements.txt | 1 + rust-json-parser/src/python_bindings.rs | 80 +++++++++++++++++++ .../tests/test_python_integration.py | 14 ++++ 6 files changed, 188 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2821cf4..bc39a48 100644 --- a/README.md +++ b/README.md @@ -26,24 +26,31 @@ And configure the `rust-analyzer` extension in your IDE. To use the library as a CLI tool to parse a file within the project directory, run: -``` +```bash cargo run --bin parse_file -- path-to/file.json ``` +## Python bits + To build the Python package, run any of the following: -``` +```bash # Development (builds and installs into your venv) maturin develop ``` -``` +```bash # Release build (optimized) maturin develop --release ``` -``` +```bash # Build a wheel without installing maturin build --release ``` +Once built, the tool can be run as a module like: + +```bash +python3 -m rust_json_parser path-to-json/file.json +``` diff --git a/rust-json-parser/python/rust_json_parser/__init__.py b/rust-json-parser/python/rust_json_parser/__init__.py index 88d4d50..9dbe4e5 100644 --- a/rust-json-parser/python/rust_json_parser/__init__.py +++ b/rust-json-parser/python/rust_json_parser/__init__.py @@ -2,6 +2,7 @@ parse_json, parse_json_file, dumps, + benchmark_performance, ) -__all__ = ["parse_json", "parse_json_file", "dumps"] +__all__ = ["parse_json", "parse_json_file", "dumps", "benchmark_performance"] diff --git a/rust-json-parser/python/rust_json_parser/__main__.py b/rust-json-parser/python/rust_json_parser/__main__.py index 763dbd7..3fca432 100644 --- a/rust-json-parser/python/rust_json_parser/__main__.py +++ b/rust-json-parser/python/rust_json_parser/__main__.py @@ -1,13 +1,70 @@ import argparse +import os import sys from pathlib import Path from rust_json_parser import ( + benchmark_performance, dumps, parse_json, parse_json_file, ) +BENCHMARK_ROUNDS = 1000 +WARMUP_ROUNDS = 10 + + +def _human_size(nbytes: int) -> str: + for unit in ("bytes", "KB", "MB"): + if nbytes < 1024 or unit == "MB": + return f"{nbytes:.0f} {unit}" if unit == "bytes" else f"{nbytes:.1f} {unit}" + nbytes /= 1024 + return f"{nbytes:.1f} MB" + + +def _auto_rounds(size: int, requested: int) -> int: + """Scale rounds down for large files to keep runtime reasonable.""" + if size > 1_000_000: + return max(10, requested // 100) + if size > 100_000: + return max(50, requested // 10) + return requested + + +def _comparison(label: str, other_time: float, rust_time: float) -> str: + if other_time >= rust_time: + return f" {label:<20} {other_time:.6f}s (Rust is {other_time / rust_time:.2f}x faster)" + return f" {label:<20} {other_time:.6f}s ({label.rstrip(':')} is {rust_time / other_time:.2f}x faster)" + + +def _benchmark_file(path: str, rounds: int, warmup: int) -> None: + raw = open(path).read() + size = os.path.getsize(path) + rounds = _auto_rounds(size, rounds) + name = os.path.basename(path) + + times = benchmark_performance(raw, rounds=rounds, warmup=warmup) + + print(f"\n{name} ({_human_size(size)}, {rounds} rounds):") + print(f" {'Rust:':<20} {times['rust']:.6f}s") + print(_comparison("Python json (C):", times["json"], times["rust"])) + if times["simplejson"] is not None: + print(_comparison("simplejson:", times["simplejson"], times["rust"])) + + +def run_benchmark(test_data_dir: str, rounds: int, warmup: int) -> None: + files = sorted(Path(test_data_dir).glob("*.json")) + if not files: + print(f"No JSON files found in {test_data_dir}", file=sys.stderr) + sys.exit(1) + + print(f"Benchmarking {len(files)} files...") + + for f in files: + _benchmark_file(str(f), rounds, warmup) + + print() + def main(): parser = argparse.ArgumentParser( @@ -24,8 +81,31 @@ def main(): default=2, help="indentation level for output (default: 2)", ) + parser.add_argument( + "--benchmark", + nargs="?", + const="test-data", + metavar="DIR", + help="run performance comparisons against json and simplejson (default dir: test-data)", + ) + parser.add_argument( + "--rounds", + type=int, + default=BENCHMARK_ROUNDS, + help=f"number of benchmark iterations per file (default: {BENCHMARK_ROUNDS})", + ) + parser.add_argument( + "--warmup", + type=int, + default=WARMUP_ROUNDS, + help=f"number of warmup iterations per parser (default: {WARMUP_ROUNDS})", + ) args = parser.parse_args() + if args.benchmark is not None: + run_benchmark(args.benchmark, args.rounds, args.warmup) + return + if args.input is None: if sys.stdin.isatty(): parser.error("no input provided (pass a file, a JSON string, or pipe to stdin)") diff --git a/rust-json-parser/requirements.txt b/rust-json-parser/requirements.txt index b2dc0ab..acb2ce0 100644 --- a/rust-json-parser/requirements.txt +++ b/rust-json-parser/requirements.txt @@ -4,3 +4,4 @@ packaging==26.0 pluggy==1.6.0 pygments==2.19.2 pytest==9.0.2 +simplejson==3.20.2 diff --git a/rust-json-parser/src/python_bindings.rs b/rust-json-parser/src/python_bindings.rs index f716cf0..9182ca4 100644 --- a/rust-json-parser/src/python_bindings.rs +++ b/rust-json-parser/src/python_bindings.rs @@ -1,5 +1,6 @@ use crate::parse_json as parse; use crate::parse_json_file as parse_file; +use std::time::Instant; use crate::{JsonError, JsonValue}; use pyo3::exceptions::{PyIOError, PyTypeError, PyValueError}; use pyo3::prelude::*; @@ -195,10 +196,89 @@ fn dumps(obj: &Bound, indent: Option) -> PyResult { } } +fn median(times: &mut [f64]) -> f64 { + let mid = times.len() / 2; + times.select_nth_unstable_by(mid, |a, b| a.partial_cmp(b).unwrap()); + if times.len() % 2 == 1 { + times[mid] + } else { + let left = *times[..mid].iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + (left + times[mid]) / 2.0 + } +} + +/// Benchmark parse_json against json.loads and simplejson.loads. +/// +/// All three parsers are measured doing identical work: taking a JSON string +/// and returning Python objects. Each gets the same number of warmup and +/// timed rounds to ensure a fair comparison. Reports the **median** +/// per-iteration time to reduce the impact of GC pauses and other outliers. +/// +/// Args: +/// input: A JSON string to parse. +/// rounds: Number of timed iterations per parser (default: 1000). +/// warmup: Number of untimed warmup iterations per parser (default: 10). +/// +/// Returns: +/// A dict with median per-iteration times in seconds: +/// ``{"rust": float, "json": float, "simplejson": float | None}``. +#[pyfunction] +#[pyo3(signature = (input, rounds=1000, warmup=10))] +fn benchmark_performance<'py>( + py: Python<'py>, + input: &str, + rounds: u32, + warmup: u32, +) -> PyResult> { + let n = rounds as usize; + + // --- Rust (parse + PyO3 conversion) --- + for _ in 0..warmup { + let _ = parse_json(py, input)?; + } + let mut rust_times = Vec::with_capacity(n); + for _ in 0..rounds { + let start = Instant::now(); + let _ = parse_json(py, input)?; + rust_times.push(start.elapsed().as_secs_f64()); + } + + // --- json (stdlib C implementation) --- + let json_loads = py.import("json")?.getattr("loads")?; + for _ in 0..warmup { + let _ = json_loads.call1((input,))?; + } + let mut json_times = Vec::with_capacity(n); + for _ in 0..rounds { + let start = Instant::now(); + let _ = json_loads.call1((input,))?; + json_times.push(start.elapsed().as_secs_f64()); + } + + // --- simplejson --- + let simplejson_loads = py.import("simplejson")?.getattr("loads")?; + for _ in 0..warmup { + let _ = simplejson_loads.call1((input,))?; + } + let mut simplejson_times = Vec::with_capacity(n); + for _ in 0..rounds { + let start = Instant::now(); + let _ = simplejson_loads.call1((input,))?; + simplejson_times.push(start.elapsed().as_secs_f64()); + } + + let result = PyDict::new(py); + result.set_item("rust", median(&mut rust_times))?; + result.set_item("json", median(&mut json_times))?; + result.set_item("simplejson", median(&mut simplejson_times))?; + Ok(result) +} + #[pymodule] fn _rust_json_parser(m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(parse_json, m)?)?; m.add_function(wrap_pyfunction!(parse_json_file, m)?)?; m.add_function(wrap_pyfunction!(dumps, m)?)?; + m.add_function(wrap_pyfunction!(benchmark_performance, m)?)?; Ok(()) } diff --git a/rust-json-parser/tests/test_python_integration.py b/rust-json-parser/tests/test_python_integration.py index 0239010..6ed09ff 100644 --- a/rust-json-parser/tests/test_python_integration.py +++ b/rust-json-parser/tests/test_python_integration.py @@ -71,3 +71,17 @@ def test_dumps_basic(self): def test_dumps_with_indent(self): result = dumps({"key": "value"}, indent=2) assert "{\n \"key\": \"value\"\n}" == result + + +class TestBenchmark: + def test_benchmark_returns_tuple(self): + """Verify benchmark_performance returns timing tuple with all three values.""" + from rust_json_parser import benchmark_performance + + rust_time, python_json_time, simplejson_time = benchmark_performance('{"test": 1}') + assert isinstance(rust_time, float) + assert isinstance(python_json_time, float) + assert isinstance(simplejson_time, float) + assert rust_time > 0 + assert python_json_time > 0 + assert simplejson_time > 0 From 6dd38b3bebd5f4448e6f8dab06284102b9eb4c0e Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sat, 28 Feb 2026 00:14:34 +0000 Subject: [PATCH 3/7] chore: start optimisations (hold reference to input &str in Tokenizer instead of converting to [char] --- rust-json-parser/src/python_bindings.rs | 7 +++++-- rust-json-parser/src/tokenizer.rs | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/rust-json-parser/src/python_bindings.rs b/rust-json-parser/src/python_bindings.rs index 9182ca4..454d860 100644 --- a/rust-json-parser/src/python_bindings.rs +++ b/rust-json-parser/src/python_bindings.rs @@ -1,11 +1,11 @@ use crate::parse_json as parse; use crate::parse_json_file as parse_file; -use std::time::Instant; use crate::{JsonError, JsonValue}; use pyo3::exceptions::{PyIOError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList}; use std::collections::HashMap; +use std::time::Instant; /// Utility function to convert a JsonValue instance (value) into a PyAny instance fn json_value_to_py<'py>(value: JsonValue, py: Python<'py>) -> Result, PyErr> { @@ -202,7 +202,10 @@ fn median(times: &mut [f64]) -> f64 { if times.len() % 2 == 1 { times[mid] } else { - let left = *times[..mid].iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + let left = *times[..mid] + .iter() + .max_by(|a, b| a.partial_cmp(b).unwrap()) + .unwrap(); (left + times[mid]) / 2.0 } } diff --git a/rust-json-parser/src/tokenizer.rs b/rust-json-parser/src/tokenizer.rs index 999e4f8..90d8a01 100644 --- a/rust-json-parser/src/tokenizer.rs +++ b/rust-json-parser/src/tokenizer.rs @@ -15,7 +15,7 @@ fn resolve_escape_sequence(char: char) -> Option { } } -/// Represents a Token result of tokenization +/// Represents a Token result of tokenization #[derive(Debug, Clone, PartialEq)] pub enum Token { /// A quoted string value. @@ -69,12 +69,12 @@ fn parse_unicode_hex(s: &str) -> Option { } /// A lexer that converts a JSON input string into a sequence of [`Token`]s. -pub struct Tokenizer { - input: Vec, +pub struct Tokenizer<'input> { + input: &'input str, current: usize, } -impl Tokenizer { +impl<'input> Tokenizer<'input> { /// Creates a new `Tokenizer` for the given JSON input string. /// /// # Examples @@ -84,26 +84,27 @@ impl Tokenizer { /// /// let tokenizer = Tokenizer::new(r#"{"key": 42}"#); /// ``` - pub fn new(input: &str) -> Self { + pub fn new(input: &'input str) -> Self { Self { current: 0, - input: input.chars().collect(), + input: input, } } /* - * Look at current char without advancing + * Look at current byte */ fn peek(&self) -> Option { - self.input.get(self.current).copied() + self.input.as_bytes().get(self.current).map(|&b| b as char) } /* * Move forward, return previous char */ fn advance(&mut self) -> Option { + let b = self.input.as_bytes().get(self.current).copied()?; self.current += 1; - self.input.get(self.current - 1).copied() + Some(b as char) } /* From 15ef2168d06c6d01934a547f737b11efbf9019cb Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sun, 1 Mar 2026 14:23:17 +0000 Subject: [PATCH 4/7] chore: change peek() and advance() functions to return a byte instead of a char; chore: use slices of input in string and number consuming functions --- rust-json-parser/src/error.rs | 24 +--- rust-json-parser/src/python_bindings.rs | 22 ++-- rust-json-parser/src/tokenizer.rs | 156 +++++++++++++++--------- 3 files changed, 112 insertions(+), 90 deletions(-) diff --git a/rust-json-parser/src/error.rs b/rust-json-parser/src/error.rs index 4295167..a61e191 100644 --- a/rust-json-parser/src/error.rs +++ b/rust-json-parser/src/error.rs @@ -11,29 +11,15 @@ pub enum JsonError { position: usize, }, /// The input ended before the parser found a required token. - UnexpectedEndOfInput { - expected: String, - position: usize, - }, + UnexpectedEndOfInput { expected: String, position: usize }, /// A numeric literal could not be parsed as a valid number. - InvalidNumber { - value: String, - position: usize, - }, + InvalidNumber { value: String, position: usize }, /// An unrecognized escape sequence was encountered inside a string. - InvalidEscape { - char: char, - position: usize, - }, + InvalidEscape { char: char, position: usize }, /// A `\uXXXX` escape sequence contains an invalid or incomplete hex value. - InvalidUnicode { - sequence: String, - position: usize, - }, + InvalidUnicode { sequence: String, position: usize }, /// A file system operation failed (e.g. file not found, permission denied). - Io { - message: String, - }, + Io { message: String }, } impl fmt::Display for JsonError { diff --git a/rust-json-parser/src/python_bindings.rs b/rust-json-parser/src/python_bindings.rs index 454d860..7d35887 100644 --- a/rust-json-parser/src/python_bindings.rs +++ b/rust-json-parser/src/python_bindings.rs @@ -235,15 +235,16 @@ fn benchmark_performance<'py>( ) -> PyResult> { let n = rounds as usize; - // --- Rust (parse + PyO3 conversion) --- + // --- simplejson --- + let simplejson_loads = py.import("simplejson")?.getattr("loads")?; for _ in 0..warmup { - let _ = parse_json(py, input)?; + let _ = simplejson_loads.call1((input,))?; } - let mut rust_times = Vec::with_capacity(n); + let mut simplejson_times = Vec::with_capacity(n); for _ in 0..rounds { let start = Instant::now(); - let _ = parse_json(py, input)?; - rust_times.push(start.elapsed().as_secs_f64()); + let _ = simplejson_loads.call1((input,))?; + simplejson_times.push(start.elapsed().as_secs_f64()); } // --- json (stdlib C implementation) --- @@ -258,16 +259,15 @@ fn benchmark_performance<'py>( json_times.push(start.elapsed().as_secs_f64()); } - // --- simplejson --- - let simplejson_loads = py.import("simplejson")?.getattr("loads")?; + // --- Rust --- for _ in 0..warmup { - let _ = simplejson_loads.call1((input,))?; + let _ = parse_json(py, input)?; } - let mut simplejson_times = Vec::with_capacity(n); + let mut rust_times = Vec::with_capacity(n); for _ in 0..rounds { let start = Instant::now(); - let _ = simplejson_loads.call1((input,))?; - simplejson_times.push(start.elapsed().as_secs_f64()); + let _ = parse_json(py, input)?; + rust_times.push(start.elapsed().as_secs_f64()); } let result = PyDict::new(py); diff --git a/rust-json-parser/src/tokenizer.rs b/rust-json-parser/src/tokenizer.rs index 90d8a01..6faac30 100644 --- a/rust-json-parser/src/tokenizer.rs +++ b/rust-json-parser/src/tokenizer.rs @@ -94,17 +94,21 @@ impl<'input> Tokenizer<'input> { /* * Look at current byte */ - fn peek(&self) -> Option { - self.input.as_bytes().get(self.current).map(|&b| b as char) + fn peek(&self) -> Option<&u8> { + self.input.as_bytes().get(self.current) } /* - * Move forward, return previous char + * Move forward, return previous byte */ - fn advance(&mut self) -> Option { - let b = self.input.as_bytes().get(self.current).copied()?; + fn advance(&mut self) -> Option<&u8> { + let b = self.input.as_bytes().get(self.current)?; self.current += 1; - Some(b as char) + Some(b) + } + + fn _input_slice_to_string(&self, start: usize, end: usize) -> String { + self.input[start..end].to_string() } /* @@ -115,69 +119,101 @@ impl<'input> Tokenizer<'input> { } fn consume_number(&mut self) -> JsonResult { - let mut number_as_string: String = String::new(); + let start = self.current; while let Some(c) = self.peek() { - if !(c.is_numeric() || c == '.' || c == '-' || c == 'e' || c == 'E' || c == '+') { + if !(c.is_ascii_digit() + || *c == b'.' + || *c == b'-' + || *c == b'e' + || *c == b'E' + || *c == b'+') + { break; } - number_as_string.push(c); self.advance(); } - let number = number_as_string - .parse::() - .map_err(|_| JsonError::InvalidNumber { - value: number_as_string, - position: self.current, - })?; + let slice = &self.input[start..self.current]; + let number = slice.parse::().map_err(|_| JsonError::InvalidNumber { + value: slice.to_string(), + position: self.current, + })?; Ok(number) } fn consume_string(&mut self) -> JsonResult { - let mut consumed_string: String = String::new(); + let start = self.current; + + // Fast path: scan for closing quote with no escape sequences + loop { + match self.peek() { + Some(b'"') => { + let s = self._input_slice_to_string(start, self.current); + self.advance(); // Consume closing quote + return Ok(s); + } + Some(b'\\') => { + // Copy what we've scanned so far and switch to slow path + let mut s: String = self._input_slice_to_string(start, self.current); + return self.consume_string_slow(&mut s); + } + Some(_) => { + self.advance(); + } + None => { + return Err(JsonError::UnexpectedEndOfInput { + expected: "Closing quote".to_string(), + position: self.current, + }); + } + } + } + } - while let Some(c) = self.peek() { - match c { - '"' => { - self.advance(); // consume closing quote - return Ok(consumed_string); + fn consume_string_slow(&mut self, s: &mut String) -> JsonResult { + while let Some(&b) = self.peek() { + match b { + b'"' => { + self.advance(); + return Ok(std::mem::take(s)); } - '\\' => { + b'\\' => { self.advance(); // consume escape character let special_meaning = - self.advance().ok_or(JsonError::UnexpectedEndOfInput { - expected: "Special meaning char for escape sequence".to_string(), - position: self.current, - })?; - // Process unicode chars - if special_meaning == 'u' { - let mut consumed_unicode: String = String::new(); - for _ in 0..4 { - let uni_char = self.advance().ok_or(JsonError::InvalidUnicode { - sequence: format!("\\u{}", consumed_unicode), + self.advance() + .copied() + .ok_or(JsonError::UnexpectedEndOfInput { + expected: "Special meaning char for escape sequence".to_string(), position: self.current, })?; - consumed_unicode.push(uni_char); - } - let unicode_sequence = parse_unicode_hex(&consumed_unicode).ok_or( - JsonError::InvalidUnicode { - sequence: format!("\\u{}", consumed_unicode), + + if special_meaning == b'u' { + let hex_start = self.current; + if self.current + 4 > self.input.len() { + return Err(JsonError::InvalidUnicode { + sequence: format!("\\u{}", &self.input[hex_start..]), position: self.current, - }, - )?; - consumed_string.push(unicode_sequence); + }); + } + let hex_str = &self.input[hex_start..hex_start + 4]; + let ch = parse_unicode_hex(hex_str).ok_or(JsonError::InvalidUnicode { + sequence: format!("\\u{}", hex_str), + position: self.current, + })?; + s.push(ch); + self.current += 4; } else { - let escape_sequence = resolve_escape_sequence(special_meaning).ok_or( + let ch = resolve_escape_sequence(special_meaning as char).ok_or( JsonError::InvalidEscape { - char: special_meaning, + char: special_meaning as char, position: self.current, }, )?; - consumed_string.push(escape_sequence); + s.push(ch); } } _ => { - consumed_string.push(c); + s.push(b as char); self.advance(); } } @@ -190,22 +226,22 @@ impl<'input> Tokenizer<'input> { } fn consume_keyword(&mut self) -> JsonResult { - let mut consumed_keyword: String = String::new(); + let start = self.current; while let Some(c) = self.peek() { - if !c.is_alphabetic() { + if !c.is_ascii_alphabetic() { break; } - consumed_keyword.push(c); self.advance(); } - match consumed_keyword.as_str() { + let slice = &self.input[start..self.current]; + match slice { "true" => Ok(Token::Boolean(true)), "false" => Ok(Token::Boolean(false)), "null" => Ok(Token::Null), _ => { - let found = match consumed_keyword.chars().next() { + let found = match slice.chars().next() { Some(first) => first.to_string(), None => "unknown".to_string(), }; @@ -245,43 +281,43 @@ impl<'input> Tokenizer<'input> { while let Some(c) = self.peek() { match c { - ' ' | '\n' | '\t' | '\r' => { + b' ' | b'\n' | b'\t' | b'\r' => { self.advance(); // explicitly skip whitespace } - '"' => { + b'"' => { self.advance(); // consume opening quote let consumed_string = self.consume_string()?; tokens.push(Token::String(consumed_string)); } - '0'..='9' | '-' => { + b'0'..=b'9' | b'-' => { let consumed_number = self.consume_number()?; tokens.push(Token::Number(consumed_number)); } - '{' => { + b'{' => { self.advance(); tokens.push(Token::LeftBrace); } - '}' => { + b'}' => { self.advance(); tokens.push(Token::RightBrace); } - '[' => { + b'[' => { self.advance(); tokens.push(Token::LeftBracket); } - ']' => { + b']' => { self.advance(); tokens.push(Token::RightBracket); } - ',' => { + b',' => { self.advance(); tokens.push(Token::Comma); } - ':' => { + b':' => { self.advance(); tokens.push(Token::Colon); } - _ if c.is_alphabetic() => { + _ if c.is_ascii_alphabetic() => { let keyword_token = self.consume_keyword()?; tokens.push(keyword_token); } @@ -289,7 +325,7 @@ impl<'input> Tokenizer<'input> { if c.is_ascii_punctuation() { return Err(unexpected_token_error( "Valid JSON value", - &c.to_string(), + &(*c as char).to_string(), 0, )); } From 6de935e0504c73bb180eeff1680bced0e35f6c7a Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sun, 1 Mar 2026 17:36:14 +0000 Subject: [PATCH 5/7] chore(parser): move format() macro call into err_on_missing_expected_comma() function; chore(parser): remove clone() from advance() --- rust-json-parser/src/parser.rs | 84 ++++++++-------------------------- 1 file changed, 20 insertions(+), 64 deletions(-) diff --git a/rust-json-parser/src/parser.rs b/rust-json-parser/src/parser.rs index 2ef76c6..7e51c53 100644 --- a/rust-json-parser/src/parser.rs +++ b/rust-json-parser/src/parser.rs @@ -11,11 +11,15 @@ use std::fs; */ fn err_on_missing_expected_comma( expected_comma: bool, - found: &str, + found: &Token, position: usize, ) -> JsonResult<()> { if expected_comma { - return Err(unexpected_token_error(",", found, position)); + return Err(unexpected_token_error( + ",", + &format!("{:?}", found), + position, + )); } Ok(()) } @@ -177,11 +181,7 @@ impl JsonParser { match token { // Start of array Token::LeftBracket => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; let nested_array = self.parse_array()?; array.push(nested_array); @@ -194,55 +194,35 @@ impl JsonParser { } // Start of object (opening { is consumed by parse_object()) Token::LeftBrace => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; let nested_object = self.parse_object()?; array.push(nested_object); expect_comma = true; } Token::String(s) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; array.push(JsonValue::String(s.clone())); self.advance(); expect_comma = true; } Token::Number(n) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; array.push(JsonValue::Number(*n)); self.advance(); expect_comma = true; } Token::Boolean(b) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; array.push(JsonValue::Boolean(*b)); self.advance(); expect_comma = true; } Token::Null => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; array.push(JsonValue::Null); self.advance(); @@ -295,11 +275,7 @@ impl JsonParser { match token { // Start of object Token::LeftBrace => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; if colon_found { let nested_object = self.parse_object()?; @@ -315,11 +291,7 @@ impl JsonParser { } // Start of array (end of array is handled in parse_array()) Token::LeftBracket => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; if colon_found { let array = self.parse_array()?; @@ -330,11 +302,7 @@ impl JsonParser { } // Key or string value Token::String(s) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; // Unexpected end of input let next_token = @@ -359,11 +327,7 @@ impl JsonParser { self.advance(); } Token::Number(n) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; err_on_unexpected_value_before_colon( colon_found, &n.to_string(), @@ -377,11 +341,7 @@ impl JsonParser { self.advance(); } Token::Boolean(b) => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; err_on_unexpected_value_before_colon( colon_found, &b.to_string(), @@ -395,11 +355,7 @@ impl JsonParser { self.advance(); } Token::Null => { - err_on_missing_expected_comma( - expect_comma, - &format!("{:?}", token), - self.current, - )?; + err_on_missing_expected_comma(expect_comma, token, self.current)?; err_on_unexpected_value_before_colon(colon_found, "null", self.current)?; object.insert(key.clone(), JsonValue::Null); @@ -462,8 +418,8 @@ impl JsonParser { /* * Move forward, return previous token */ - fn advance(&mut self) -> Option { - let token = self.tokens.get(self.current).cloned(); + fn advance(&mut self) -> Option<&Token> { + let token = self.tokens.get(self.current); self.current += 1; token } From e189935b90a6a30d9b25bdd305e0f0da9febdff3 Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sun, 1 Mar 2026 21:54:25 +0000 Subject: [PATCH 6/7] chore: tweak results output in benchmark; add pure rust to the mix for more context --- .../python/rust_json_parser/__init__.py | 11 ++++++++--- .../python/rust_json_parser/__main__.py | 18 +++++++++++------- rust-json-parser/src/python_bindings.rs | 12 ++++++++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/rust-json-parser/python/rust_json_parser/__init__.py b/rust-json-parser/python/rust_json_parser/__init__.py index 9dbe4e5..3c30cc5 100644 --- a/rust-json-parser/python/rust_json_parser/__init__.py +++ b/rust-json-parser/python/rust_json_parser/__init__.py @@ -1,8 +1,13 @@ from rust_json_parser._rust_json_parser import ( + benchmark_performance, + dumps, parse_json, parse_json_file, - dumps, - benchmark_performance, ) -__all__ = ["parse_json", "parse_json_file", "dumps", "benchmark_performance"] +__all__ = [ + "parse_json", + "parse_json_file", + "dumps", + "benchmark_performance", +] diff --git a/rust-json-parser/python/rust_json_parser/__main__.py b/rust-json-parser/python/rust_json_parser/__main__.py index 3fca432..15ec0b5 100644 --- a/rust-json-parser/python/rust_json_parser/__main__.py +++ b/rust-json-parser/python/rust_json_parser/__main__.py @@ -33,8 +33,10 @@ def _auto_rounds(size: int, requested: int) -> int: def _comparison(label: str, other_time: float, rust_time: float) -> str: if other_time >= rust_time: - return f" {label:<20} {other_time:.6f}s (Rust is {other_time / rust_time:.2f}x faster)" - return f" {label:<20} {other_time:.6f}s ({label.rstrip(':')} is {rust_time / other_time:.2f}x faster)" + pct = (other_time / rust_time - 1) * 100 + return f" {label:<22} {other_time:.9f}s (Rust with bindings is {pct:.0f}% faster)" + pct = (rust_time / other_time - 1) * 100 + return f" {label:<22} {other_time:.9f}s ({label.rstrip(':')} is {pct:.0f}% faster than Rust with Python bindings)" def _benchmark_file(path: str, rounds: int, warmup: int) -> None: @@ -46,10 +48,10 @@ def _benchmark_file(path: str, rounds: int, warmup: int) -> None: times = benchmark_performance(raw, rounds=rounds, warmup=warmup) print(f"\n{name} ({_human_size(size)}, {rounds} rounds):") - print(f" {'Rust:':<20} {times['rust']:.6f}s") + print(f" {'Rust with bindings:':<22} {times['rust']:.9f}s") + print(_comparison("Rust:", times["pure-rust"], times["rust"])) print(_comparison("Python json (C):", times["json"], times["rust"])) - if times["simplejson"] is not None: - print(_comparison("simplejson:", times["simplejson"], times["rust"])) + print(_comparison("simplejson:", times["simplejson"], times["rust"])) def run_benchmark(test_data_dir: str, rounds: int, warmup: int) -> None: @@ -58,7 +60,7 @@ def run_benchmark(test_data_dir: str, rounds: int, warmup: int) -> None: print(f"No JSON files found in {test_data_dir}", file=sys.stderr) sys.exit(1) - print(f"Benchmarking {len(files)} files...") + print(f"Benchmarking {len(files)} files (including pure Rust implementation)...") for f in files: _benchmark_file(str(f), rounds, warmup) @@ -108,7 +110,9 @@ def main(): if args.input is None: if sys.stdin.isatty(): - parser.error("no input provided (pass a file, a JSON string, or pipe to stdin)") + parser.error( + "no input provided (pass a file, a JSON string, or pipe to stdin)" + ) raw = sys.stdin.read() result = parse_json(raw) elif Path(args.input).is_file(): diff --git a/rust-json-parser/src/python_bindings.rs b/rust-json-parser/src/python_bindings.rs index 7d35887..c7dfc18 100644 --- a/rust-json-parser/src/python_bindings.rs +++ b/rust-json-parser/src/python_bindings.rs @@ -235,6 +235,17 @@ fn benchmark_performance<'py>( ) -> PyResult> { let n = rounds as usize; + // --- Rust (with no bindings) --- + for _ in 0..warmup { + let _ = parse(input)?; + } + let mut pure_rust_times = Vec::with_capacity(n); + for _ in 0..rounds { + let start = Instant::now(); + let _ = parse(input)?; + pure_rust_times.push(start.elapsed().as_secs_f64()); + } + // --- simplejson --- let simplejson_loads = py.import("simplejson")?.getattr("loads")?; for _ in 0..warmup { @@ -271,6 +282,7 @@ fn benchmark_performance<'py>( } let result = PyDict::new(py); + result.set_item("pure-rust", median(&mut pure_rust_times))?; result.set_item("rust", median(&mut rust_times))?; result.set_item("json", median(&mut json_times))?; result.set_item("simplejson", median(&mut simplejson_times))?; From 7de40f9ec567bc2db0d25ef8d2381d0d6739fc90 Mon Sep 17 00:00:00 2001 From: Victor Sandoval Date: Sun, 1 Mar 2026 22:01:32 +0000 Subject: [PATCH 7/7] chore: add --benchmark flag to readme --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bc39a48..971eccb 100644 --- a/README.md +++ b/README.md @@ -49,8 +49,14 @@ maturin develop --release maturin build --release ``` -Once built, the tool can be run as a module like: +Once built, the parser tool can be run as a module like: ```bash python3 -m rust_json_parser path-to-json/file.json ``` + +And a benchmark function is also exposed in the Python CLI as a flag: + +```bash +python -m rust_json_parser --benchmark +```