ternaus · ternaus · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
       - id: requirements-txt-fixer
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.9.2
+    rev: v0.9.3
     hooks:
       # Run the linter.
       - id: ruff

diff --git a/README.md b/README.md
@@ -161,3 +161,19 @@ Several factors influence real-world performance beyond raw decoding speed:
 - When needing extensive image processing features, OpenCV remains a strong choice
 - Consider dependency size and installation complexity
 - Evaluate the full image processing pipeline, not just JPEG decoding
+
+
+## Citation
+
+If you found this work useful, please cite:
+```bibtex
+@misc{iglovikov2025speed,
+      title={Need for Speed: A Comprehensive Benchmark of JPEG Decoders in Python},
+      author={Vladimir Iglovikov},
+      year={2025},
+      eprint={2501.13131},
+      archivePrefix={arXiv},
+      primaryClass={eess.IV},
+      doi={10.48550/arXiv.2501.13131}
+}
+```
diff --git a/imread_benchmark/benchmark_single.py b/imread_benchmark/benchmark_single.py
@@ -192,9 +192,13 @@ def main():
     output_dir = args.output_dir / system_id
     output_dir.mkdir(parents=True, exist_ok=True)
 
-    # Get image paths
-    image_paths = sorted(Path(args.data_dir).glob("*.*"))[: args.num_images]
-    image_paths = [str(x) for x in image_paths]
+    # Define supported image extensions
+    image_extensions = {".jpg", ".jpeg", ".JPEG", ".JPG"}
+
+    # Get image paths recursively, filtering for supported extensions
+    image_paths = [str(p) for p in sorted(Path(args.data_dir).rglob("*")) if p.suffix.lower() in image_extensions][
+        : args.num_images
+    ]
 
     # Run benchmark
     results = {

diff --git a/run_benchmarks.sh b/run_benchmarks.sh
@@ -12,7 +12,7 @@ to output/<operating_system>/<library>_results.json
 Arguments:
     path_to_image_directory  (Required) Directory containing images to benchmark
     num_images              (Optional) Number of images to process (default: 2000)
-    num_runs               (Optional) Number of benchmark runs (default: 5)
+    num_runs               (Optional) Number of benchmark runs (default: 20)
 
 Example usage:
     # Basic usage with defaults (2000 images, 5 runs):
@@ -62,6 +62,16 @@ mkdir -p output
 # List of libraries to benchmark
 LIBRARIES=("opencv" "pillow" "jpeg4py" "skimage" "imageio" "torchvision" "tensorflow" "kornia" "pillow-simd")
 
+# Function to get libraries based on OS
+get_libraries() {
+    if [[ "$(uname)" == "Darwin" ]]; then
+        # Skip jpeg4py and pillow-simd on macOS
+        echo "${LIBRARIES[@]}" | tr ' ' '\n' | grep -v "jpeg4py" | grep -v "pillow-simd" | tr '\n' ' '
+    else
+        echo "${LIBRARIES[@]}"
+    fi
+}
+
 # Function to create and activate virtual environment
 setup_venv() {
     local lib=$1
@@ -118,7 +128,7 @@ fi
 
 DATA_DIR=$1
 NUM_IMAGES=${2:-2000}
-NUM_RUNS=${3:-5}
+NUM_RUNS=${3:-20}
 
 echo "Starting benchmarks with:"
 echo "  Image directory: $DATA_DIR"
@@ -127,7 +137,7 @@ echo "  Number of runs: $NUM_RUNS"
 echo
 
 # Run benchmarks for each library
-for lib in "${LIBRARIES[@]}"; do
+for lib in $(get_libraries); do
     echo "Processing $lib..."
     setup_venv "$lib"
     run_benchmark "$lib"