From c7e1914313dc007afa415082643068eba3de66a7 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 23 Jan 2025 19:07:50 -0800 Subject: [PATCH 1/5] Updated reamde and filter jpeg4py and pillow-simd on Mac OS --- .gitignore | 2 +- .pre-commit-config.yaml | 2 +- README.md | 15 +++++++++++++++ run_benchmarks.sh | 16 +++++++++++++--- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index d2eb9b3..9e814ab 100644 --- a/.gitignore +++ b/.gitignore @@ -109,4 +109,4 @@ venv.bak/ .idea/ .ruff_cache/ -venvs/ +output/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a3921d..2ca051e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: requirements-txt-fixer - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.9.2 + rev: v0.9.3 hooks: # Run the linter. - id: ruff diff --git a/README.md b/README.md index 876084d..b42557f 100644 --- a/README.md +++ b/README.md @@ -161,3 +161,18 @@ Several factors influence real-world performance beyond raw decoding speed: - When needing extensive image processing features, OpenCV remains a strong choice - Consider dependency size and installation complexity - Evaluate the full image processing pipeline, not just JPEG decoding + +## Citation + +If you found this work useful, please cite: +```bibtex +@misc{iglovikov2025need, + title={Need for Speed: A Comprehensive Benchmark of JPEG Decoders in Python}, + author={Vladimir Iglovikov}, + year={2025}, + eprint={2501.13131}, + archivePrefix={arXiv}, + primaryClass={eess.IV}, + doi={10.48550/arXiv.2501.13131} +} +``` diff --git a/run_benchmarks.sh b/run_benchmarks.sh index f648da8..f38593f 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -12,7 +12,7 @@ to output//_results.json Arguments: path_to_image_directory (Required) Directory containing images to benchmark num_images (Optional) Number of images to process (default: 2000) - num_runs (Optional) Number of benchmark runs (default: 5) + num_runs (Optional) Number of benchmark runs (default: 20) Example usage: # Basic usage with defaults (2000 images, 5 runs): @@ -62,6 +62,16 @@ mkdir -p output # List of libraries to benchmark LIBRARIES=("opencv" "pillow" "jpeg4py" "skimage" "imageio" "torchvision" "tensorflow" "kornia" "pillow-simd") +# Function to get libraries based on OS +get_libraries() { + if [[ "$(uname)" == "Darwin" ]]; then + # Skip jpeg4py and pillow-simd on macOS + echo "${LIBRARIES[@]}" | tr ' ' '\n' | grep -v "jpeg4py" | grep -v "pillow-simd" | tr '\n' ' ' + else + echo "${LIBRARIES[@]}" + fi +} + # Function to create and activate virtual environment setup_venv() { local lib=$1 @@ -118,7 +128,7 @@ fi DATA_DIR=$1 NUM_IMAGES=${2:-2000} -NUM_RUNS=${3:-5} +NUM_RUNS=${3:-20} echo "Starting benchmarks with:" echo " Image directory: $DATA_DIR" @@ -127,7 +137,7 @@ echo " Number of runs: $NUM_RUNS" echo # Run benchmarks for each library -for lib in "${LIBRARIES[@]}"; do +for lib in $(get_libraries); do echo "Processing $lib..." setup_venv "$lib" run_benchmark "$lib" From d02b35109771138edcd33f0cc89346500ac6c9ce Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 23 Jan 2025 19:13:07 -0800 Subject: [PATCH 2/5] Updated reamde and filter jpeg4py and pillow-simd on Mac OS --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b42557f..23702cb 100644 --- a/README.md +++ b/README.md @@ -162,11 +162,12 @@ Several factors influence real-world performance beyond raw decoding speed: - Consider dependency size and installation complexity - Evaluate the full image processing pipeline, not just JPEG decoding + ## Citation If you found this work useful, please cite: ```bibtex -@misc{iglovikov2025need, +@misc{iglovikov2025speed, title={Need for Speed: A Comprehensive Benchmark of JPEG Decoders in Python}, author={Vladimir Iglovikov}, year={2025}, From 8de885e38cb07b8f31a8c0e4dc21852d1ae37b47 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 23 Jan 2025 19:14:08 -0800 Subject: [PATCH 3/5] Fix in GitIgnore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9e814ab..92be3b9 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,4 @@ venv.bak/ .ruff_cache/ output/ +venvs/ From 94e3478e3548b2b3d4a42cb81251c489c72ba8f9 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 23 Jan 2025 19:14:40 -0800 Subject: [PATCH 4/5] Fix --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 92be3b9..d2eb9b3 100644 --- a/.gitignore +++ b/.gitignore @@ -109,5 +109,4 @@ venv.bak/ .idea/ .ruff_cache/ -output/ venvs/ From de281e0d3f9f79ad1a13770cef914e217dde16cc Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 23 Jan 2025 19:18:12 -0800 Subject: [PATCH 5/5] Can use rglob --- imread_benchmark/benchmark_single.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/imread_benchmark/benchmark_single.py b/imread_benchmark/benchmark_single.py index e905a97..826cc26 100644 --- a/imread_benchmark/benchmark_single.py +++ b/imread_benchmark/benchmark_single.py @@ -192,9 +192,13 @@ def main(): output_dir = args.output_dir / system_id output_dir.mkdir(parents=True, exist_ok=True) - # Get image paths - image_paths = sorted(Path(args.data_dir).glob("*.*"))[: args.num_images] - image_paths = [str(x) for x in image_paths] + # Define supported image extensions + image_extensions = {".jpg", ".jpeg", ".JPEG", ".JPG"} + + # Get image paths recursively, filtering for supported extensions + image_paths = [str(p) for p in sorted(Path(args.data_dir).rglob("*")) if p.suffix.lower() in image_extensions][ + : args.num_images + ] # Run benchmark results = {