Merge pull request #78 from StabRise/fix_yolo_onnx

mykolamelnykml · web-flow · commit d3039a8282c6 · 2025-11-06T10:06:55.000+01:00
fix: Fixed convert color schema in YoloOnnxDetector.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,15 @@
-## 0.2.4 - 02.11.2025
+## [unreleased]
+
+### 🚀 Features
+
+- Added param 'returnEmpty' to [ImageCropBoxes](https://scaledp.stabrise.com/en/latest/image/image_crop_boxes.html) for avoid to have exceptions if no boxes are found
+
+### 🐛 Bug Fixes
+
+- Fixed convert color schema in [YoloOnnxDetector](https://scaledp.stabrise.com/en/latest/models/detectors/yolo_onnx_detector.html)
+
+
+## [0.2.4]- 02.11.2025
 
 ### 🚀 Features
 
diff --git a/docs/source/image/image_crop_boxes.md b/docs/source/image/image_crop_boxes.md
@@ -11,7 +11,7 @@
 from scaledp import FaceDetector, ImageCropBoxes, PipelineModel
 
 # Step 1: Detect faces in images
-detector = FaceDetector(
+face_detector = FaceDetector(
     inputCol="image",
     outputCol="boxes",
     keepInputData=True,
@@ -28,10 +28,11 @@ cropper = ImageCropBoxes(
     limit=5,
     noCrop=True,
     autoRotate=False,  # Automatically rotate crops if box height > width
+    returnEmpty=True,  # Return empty list if no boxes found
 )
 
 # Build and run the pipeline
-pipeline = PipelineModel(stages=[detector, cropper])
+pipeline = PipelineModel(stages=[face_detector, cropper])
 result = pipeline.transform(image_df)
 result.show_image("cropped_image")
 ```
@@ -53,11 +54,13 @@ result.show_image("cropped_image")
 | noCrop            | bool    | Raise error if no boxes to crop                  | True            |
 | limit             | int     | Limit number of crops per image                  | 0 (no limit)    |
 | autoRotate        | bool    | Auto rotate crop if box height > width           | True            |
+| returnEmpty       | bool    | Return empty list if no boxes found              | False           |
 
 ## Notes
-- Crops are performed using bounding boxes from the `boxes` column.
-- If `noCrop` is True and no boxes are present, an error is raised.
+- Crops are performed using bounding boxes from the `boxes` column (e.g., output of [FaceDetector](https://scaledp.stabrise.com/en/latest/models/detectors/face_detector.html)).
+- If `noCrop` is True and no boxes are present, an error is raised unless `returnEmpty` is True.
 - If `limit` is set, only the first N boxes are used for cropping.
 - If `autoRotate` is True, crops are rotated if the bounding box height is greater than its width.
+- If `returnEmpty` is True, returns an empty list of images if no boxes are found (prevents exceptions).
 - Supports distributed processing with Spark.
 - Errors can be propagated or handled gracefully based on `propagateError`.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scaledp"
-version = "0.2.4"
+version = "0.2.5rc1"
 description = "ScaleDP is a library for processing documents using Apache Spark and LLMs"
 authors = ["Mykola Melnyk <mykola@stabrise.com>"]
 repository = "https://github.com/StabRise/scaledp"
diff --git a/scaledp/image/ImageCropBoxes.py b/scaledp/image/ImageCropBoxes.py
@@ -81,6 +81,13 @@ class ImageCropBoxes(
         typeConverter=TypeConverters.toBoolean,
     )
 
+    returnEmpty = Param(
+        Params._dummy(),
+        "returnEmpty",
+        "Return Empty list of images in case no boxes.",
+        typeConverter=TypeConverters.toBoolean,
+    )
+
     defaultParams = MappingProxyType(
         {
             "inputCols": ["image", "boxes"],
@@ -94,6 +101,7 @@ class ImageCropBoxes(
             "noCrop": True,
             "limit": 0,
             "autoRotate": True,
+            "returnEmpty": False,
         },
     )
 
@@ -142,7 +150,7 @@ def transform_udf(self, image, data):
 
             if self.getNoCrop() and len(results) == 0:
                 raise ImageCropError("No boxes to crop")
-            if len(results) == 0:
+            if not self.getReturnEmpty() and len(results) == 0:
                 results.append(
                     Image.from_pil(img, image.path, image.imageType, image.resolution),
                 )
@@ -153,7 +161,9 @@ def transform_udf(self, image, data):
             logging.warning(exception)
             if self.getPropagateError():
                 raise ImageCropError from e
-            return Image(image.path, image.imageType, data=bytes(), exception=exception)
+            return [
+                Image(image.path, image.imageType, data=bytes(), exception=exception),
+            ]
         return results
 
     def _transform(self, dataset):
diff --git a/scaledp/models/detectors/YoloOnnxDetector.py b/scaledp/models/detectors/YoloOnnxDetector.py
@@ -97,7 +97,7 @@ def call_detector(cls, images, params):
         for image, image_path in images:
             boxes = []
             # Convert PIL to NumPy (RGB)
-            image_np = np.array(image)
+            image_np = np.array(image.convert("RGB"))
             raw_boxes, scores, class_ids = detector.detect_objects(image_np)
             # Expand boxes by padding percent if provided
             pad_percent = int(params.get("padding", 0)) if params is not None else 0
diff --git a/scaledp/models/detectors/yolo/yolo.py b/scaledp/models/detectors/yolo/yolo.py
@@ -71,7 +71,9 @@ def rescale_image_with_padding(
         new_height = int(self.original_height * self.scale_factor)
 
         # Resize image
-        resized_image = cv2.resize(image, (new_width, new_height))
+        resized_image = cv2.resize(
+            image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
+        )
 
         # Calculate padding to center the image
         self.pad_x = (target_width - new_width) // 2
@@ -129,19 +131,16 @@ def restore_coordinates(self, boxes: np.ndarray) -> np.ndarray:
 
     def prepare_input(self, image):
         # Store original dimensions for coordinate restoration
-        self.img_height, self.img_width = image.shape[:2]
-
-        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
         # Rescale image with padding instead of simple resize
         input_img = self.rescale_image_with_padding(
-            input_img, (self.input_width, self.input_height)
+            image, (self.input_width, self.input_height)
         )
 
         # Scale input pixel values to 0 to 1
         input_img = input_img / 255.0
         input_img = input_img.transpose(2, 0, 1)
-        return input_img[np.newaxis, :, :, :].astype(np.float32)
+        return np.expand_dims(input_img, 0).astype(np.float32)
 
     def inference(self, input_tensor):
         return self.session.run(self.output_names, {self.input_names[0]: input_tensor})