diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a3921d..2ca051e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: requirements-txt-fixer - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.9.2 + rev: v0.9.3 hooks: # Run the linter. - id: ruff diff --git a/README.md b/README.md index 876084d..23702cb 100644 --- a/README.md +++ b/README.md @@ -161,3 +161,19 @@ Several factors influence real-world performance beyond raw decoding speed: - When needing extensive image processing features, OpenCV remains a strong choice - Consider dependency size and installation complexity - Evaluate the full image processing pipeline, not just JPEG decoding + + +## Citation + +If you found this work useful, please cite: +```bibtex +@misc{iglovikov2025speed, + title={Need for Speed: A Comprehensive Benchmark of JPEG Decoders in Python}, + author={Vladimir Iglovikov}, + year={2025}, + eprint={2501.13131}, + archivePrefix={arXiv}, + primaryClass={eess.IV}, + doi={10.48550/arXiv.2501.13131} +} +``` diff --git a/imread_benchmark/benchmark_single.py b/imread_benchmark/benchmark_single.py index e905a97..826cc26 100644 --- a/imread_benchmark/benchmark_single.py +++ b/imread_benchmark/benchmark_single.py @@ -192,9 +192,13 @@ def main(): output_dir = args.output_dir / system_id output_dir.mkdir(parents=True, exist_ok=True) - # Get image paths - image_paths = sorted(Path(args.data_dir).glob("*.*"))[: args.num_images] - image_paths = [str(x) for x in image_paths] + # Define supported image extensions + image_extensions = {".jpg", ".jpeg", ".JPEG", ".JPG"} + + # Get image paths recursively, filtering for supported extensions + image_paths = [str(p) for p in sorted(Path(args.data_dir).rglob("*")) if p.suffix.lower() in image_extensions][ + : args.num_images + ] # Run benchmark results = { diff --git a/run_benchmarks.sh b/run_benchmarks.sh index f648da8..f38593f 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -12,7 +12,7 @@ to output//_results.json Arguments: path_to_image_directory (Required) Directory containing images to benchmark num_images (Optional) Number of images to process (default: 2000) - num_runs (Optional) Number of benchmark runs (default: 5) + num_runs (Optional) Number of benchmark runs (default: 20) Example usage: # Basic usage with defaults (2000 images, 5 runs): @@ -62,6 +62,16 @@ mkdir -p output # List of libraries to benchmark LIBRARIES=("opencv" "pillow" "jpeg4py" "skimage" "imageio" "torchvision" "tensorflow" "kornia" "pillow-simd") +# Function to get libraries based on OS +get_libraries() { + if [[ "$(uname)" == "Darwin" ]]; then + # Skip jpeg4py and pillow-simd on macOS + echo "${LIBRARIES[@]}" | tr ' ' '\n' | grep -v "jpeg4py" | grep -v "pillow-simd" | tr '\n' ' ' + else + echo "${LIBRARIES[@]}" + fi +} + # Function to create and activate virtual environment setup_venv() { local lib=$1 @@ -118,7 +128,7 @@ fi DATA_DIR=$1 NUM_IMAGES=${2:-2000} -NUM_RUNS=${3:-5} +NUM_RUNS=${3:-20} echo "Starting benchmarks with:" echo " Image directory: $DATA_DIR" @@ -127,7 +137,7 @@ echo " Number of runs: $NUM_RUNS" echo # Run benchmarks for each library -for lib in "${LIBRARIES[@]}"; do +for lib in $(get_libraries); do echo "Processing $lib..." setup_venv "$lib" run_benchmark "$lib"