Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,37 @@ And configure the `rust-analyzer` extension in your IDE.

To use the library as a CLI tool to parse a file within the project directory, run:

```
```bash
cargo run --bin parse_file -- path-to/file.json
```

## Python bits

To build the Python package, run any of the following:

```
```bash
# Development (builds and installs into your venv)
maturin develop
```

```
```bash
# Release build (optimized)
maturin develop --release
```

```
```bash
# Build a wheel without installing
maturin build --release
```

Once built, the parser tool can be run as a module like:

```bash
python3 -m rust_json_parser path-to-json/file.json
```

And a benchmark function is also exposed in the Python CLI as a flag:

```bash
python -m rust_json_parser --benchmark <path-to-dir-with-json-files>
```
10 changes: 8 additions & 2 deletions rust-json-parser/python/rust_json_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from rust_json_parser._rust_json_parser import (
benchmark_performance,
dumps,
parse_json,
parse_json_file,
dumps,
)

__all__ = ["parse_json", "parse_json_file", "dumps"]
__all__ = [
"parse_json",
"parse_json_file",
"dumps",
"benchmark_performance",
]
86 changes: 85 additions & 1 deletion rust-json-parser/python/rust_json_parser/__main__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,72 @@
import argparse
import os
import sys
from pathlib import Path

from rust_json_parser import (
benchmark_performance,
dumps,
parse_json,
parse_json_file,
)

BENCHMARK_ROUNDS = 1000
WARMUP_ROUNDS = 10


def _human_size(nbytes: int) -> str:
for unit in ("bytes", "KB", "MB"):
if nbytes < 1024 or unit == "MB":
return f"{nbytes:.0f} {unit}" if unit == "bytes" else f"{nbytes:.1f} {unit}"
nbytes /= 1024
return f"{nbytes:.1f} MB"


def _auto_rounds(size: int, requested: int) -> int:
"""Scale rounds down for large files to keep runtime reasonable."""
if size > 1_000_000:
return max(10, requested // 100)
if size > 100_000:
return max(50, requested // 10)
return requested


def _comparison(label: str, other_time: float, rust_time: float) -> str:
if other_time >= rust_time:
pct = (other_time / rust_time - 1) * 100
return f" {label:<22} {other_time:.9f}s (Rust with bindings is {pct:.0f}% faster)"
pct = (rust_time / other_time - 1) * 100
return f" {label:<22} {other_time:.9f}s ({label.rstrip(':')} is {pct:.0f}% faster than Rust with Python bindings)"


def _benchmark_file(path: str, rounds: int, warmup: int) -> None:
raw = open(path).read()
size = os.path.getsize(path)
rounds = _auto_rounds(size, rounds)
name = os.path.basename(path)

times = benchmark_performance(raw, rounds=rounds, warmup=warmup)

print(f"\n{name} ({_human_size(size)}, {rounds} rounds):")
print(f" {'Rust with bindings:':<22} {times['rust']:.9f}s")
print(_comparison("Rust:", times["pure-rust"], times["rust"]))
print(_comparison("Python json (C):", times["json"], times["rust"]))
print(_comparison("simplejson:", times["simplejson"], times["rust"]))


def run_benchmark(test_data_dir: str, rounds: int, warmup: int) -> None:
files = sorted(Path(test_data_dir).glob("*.json"))
if not files:
print(f"No JSON files found in {test_data_dir}", file=sys.stderr)
sys.exit(1)

print(f"Benchmarking {len(files)} files (including pure Rust implementation)...")

for f in files:
_benchmark_file(str(f), rounds, warmup)

print()


def main():
parser = argparse.ArgumentParser(
Expand All @@ -24,11 +83,36 @@ def main():
default=2,
help="indentation level for output (default: 2)",
)
parser.add_argument(
"--benchmark",
nargs="?",
const="test-data",
metavar="DIR",
help="run performance comparisons against json and simplejson (default dir: test-data)",
)
parser.add_argument(
"--rounds",
type=int,
default=BENCHMARK_ROUNDS,
help=f"number of benchmark iterations per file (default: {BENCHMARK_ROUNDS})",
)
parser.add_argument(
"--warmup",
type=int,
default=WARMUP_ROUNDS,
help=f"number of warmup iterations per parser (default: {WARMUP_ROUNDS})",
)
args = parser.parse_args()

if args.benchmark is not None:
run_benchmark(args.benchmark, args.rounds, args.warmup)
return

if args.input is None:
if sys.stdin.isatty():
parser.error("no input provided (pass a file, a JSON string, or pipe to stdin)")
parser.error(
"no input provided (pass a file, a JSON string, or pipe to stdin)"
)
raw = sys.stdin.read()
result = parse_json(raw)
elif Path(args.input).is_file():
Expand Down
1 change: 1 addition & 0 deletions rust-json-parser/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ packaging==26.0
pluggy==1.6.0
pygments==2.19.2
pytest==9.0.2
simplejson==3.20.2
66 changes: 41 additions & 25 deletions rust-json-parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,38 +1,25 @@
use std::error::Error;
use std::fmt;

/*
* Enum for JsonError kind, for unsuccessful JSON processing.
* Valid variants:
* UnexpectedToken
* UnexpectedEndOfInput
* InvalidNumber
* InvalidEscape
* InvalidUnicode
*/
/// Error type representing all possible failures during JSON parsing and serialization.
#[derive(Debug, Clone, PartialEq)]
pub enum JsonError {
/// A token was found that does not match what the parser expected at this position.
UnexpectedToken {
expected: String,
found: String,
position: usize,
},
UnexpectedEndOfInput {
expected: String,
position: usize,
},
InvalidNumber {
value: String,
position: usize,
},
InvalidEscape {
char: char,
position: usize,
},
InvalidUnicode {
sequence: String,
position: usize,
},
/// The input ended before the parser found a required token.
UnexpectedEndOfInput { expected: String, position: usize },
/// A numeric literal could not be parsed as a valid number.
InvalidNumber { value: String, position: usize },
/// An unrecognized escape sequence was encountered inside a string.
InvalidEscape { char: char, position: usize },
/// A `\uXXXX` escape sequence contains an invalid or incomplete hex value.
InvalidUnicode { sequence: String, position: usize },
/// A file system operation failed (e.g. file not found, permission denied).
Io { message: String },
}

impl fmt::Display for JsonError {
Expand Down Expand Up @@ -73,12 +60,31 @@ impl fmt::Display for JsonError {
position, sequence,
)
}
JsonError::Io { message } => write!(f, "IO error: {}", message),
}
}
}

impl Error for JsonError {}

impl From<std::io::Error> for JsonError {
fn from(err: std::io::Error) -> Self {
JsonError::Io {
message: err.to_string(),
}
}
}

/// Creates an [`JsonError::UnexpectedToken`] error with the given context.
///
/// # Examples
///
/// ```
/// use rust_json_parser::error::unexpected_token_error;
///
/// let err = unexpected_token_error("number", "@", 5);
/// assert_eq!(err.to_string(), "Unexpected token at position 5: expected number, found @");
/// ```
pub fn unexpected_token_error(expected: &str, found: &str, position: usize) -> JsonError {
JsonError::UnexpectedToken {
expected: expected.to_string(),
Expand All @@ -87,6 +93,16 @@ pub fn unexpected_token_error(expected: &str, found: &str, position: usize) -> J
}
}

/// Creates an [`JsonError::UnexpectedEndOfInput`] error with the given context.
///
/// # Examples
///
/// ```
/// use rust_json_parser::error::unexpected_end_of_input;
///
/// let err = unexpected_end_of_input("closing quote", 10);
/// assert_eq!(err.to_string(), "Unexpected end of input at position 10: expected closing quote");
/// ```
pub fn unexpected_end_of_input(expected: &str, position: usize) -> JsonError {
JsonError::UnexpectedEndOfInput {
expected: expected.to_string(),
Expand Down
8 changes: 7 additions & 1 deletion rust-json-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
//! A JSON parser and serializer library implemented in Rust.
//!
//! Provides a tokenizer, recursive descent parser, and value types for
//! parsing JSON strings or files into structured [`JsonValue`] representations,
//! and serializing them back to JSON strings.

pub mod error;
pub mod parser;
pub mod tokenizer;
Expand All @@ -7,7 +13,7 @@ pub mod value;
// Without this: users write `use my_lib::parser::parse_json`
// With this: users write `use my_lib::parse_json` (cleaner!)
pub use error::JsonError;
pub use parser::{JsonParser, parse_json};
pub use parser::{JsonParser, parse_json, parse_json_file};
pub use tokenizer::{Token, Tokenizer};
pub use value::JsonValue;

Expand Down
Loading