Skip to content

Commit d3039a8

Browse files
Merge pull request #78 from StabRise/fix_yolo_onnx
fix: Fixed convert color schema in YoloOnnxDetector.py
2 parents 32a44b1 + e06be07 commit d3039a8

6 files changed

Lines changed: 38 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
## 0.2.4 - 02.11.2025
1+
## [unreleased]
2+
3+
### 🚀 Features
4+
5+
- Added param 'returnEmpty' to [ImageCropBoxes](https://scaledp.stabrise.com/en/latest/image/image_crop_boxes.html) for avoid to have exceptions if no boxes are found
6+
7+
### 🐛 Bug Fixes
8+
9+
- Fixed convert color schema in [YoloOnnxDetector](https://scaledp.stabrise.com/en/latest/models/detectors/yolo_onnx_detector.html)
10+
11+
12+
## [0.2.4]- 02.11.2025
213

314
### 🚀 Features
415

docs/source/image/image_crop_boxes.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from scaledp import FaceDetector, ImageCropBoxes, PipelineModel
1212

1313
# Step 1: Detect faces in images
14-
detector = FaceDetector(
14+
face_detector = FaceDetector(
1515
inputCol="image",
1616
outputCol="boxes",
1717
keepInputData=True,
@@ -28,10 +28,11 @@ cropper = ImageCropBoxes(
2828
limit=5,
2929
noCrop=True,
3030
autoRotate=False, # Automatically rotate crops if box height > width
31+
returnEmpty=True, # Return empty list if no boxes found
3132
)
3233

3334
# Build and run the pipeline
34-
pipeline = PipelineModel(stages=[detector, cropper])
35+
pipeline = PipelineModel(stages=[face_detector, cropper])
3536
result = pipeline.transform(image_df)
3637
result.show_image("cropped_image")
3738
```
@@ -53,11 +54,13 @@ result.show_image("cropped_image")
5354
| noCrop | bool | Raise error if no boxes to crop | True |
5455
| limit | int | Limit number of crops per image | 0 (no limit) |
5556
| autoRotate | bool | Auto rotate crop if box height > width | True |
57+
| returnEmpty | bool | Return empty list if no boxes found | False |
5658

5759
## Notes
58-
- Crops are performed using bounding boxes from the `boxes` column.
59-
- If `noCrop` is True and no boxes are present, an error is raised.
60+
- Crops are performed using bounding boxes from the `boxes` column (e.g., output of [FaceDetector](https://scaledp.stabrise.com/en/latest/models/detectors/face_detector.html)).
61+
- If `noCrop` is True and no boxes are present, an error is raised unless `returnEmpty` is True.
6062
- If `limit` is set, only the first N boxes are used for cropping.
6163
- If `autoRotate` is True, crops are rotated if the bounding box height is greater than its width.
64+
- If `returnEmpty` is True, returns an empty list of images if no boxes are found (prevents exceptions).
6265
- Supports distributed processing with Spark.
6366
- Errors can be propagated or handled gracefully based on `propagateError`.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "scaledp"
3-
version = "0.2.4"
3+
version = "0.2.5rc1"
44
description = "ScaleDP is a library for processing documents using Apache Spark and LLMs"
55
authors = ["Mykola Melnyk <mykola@stabrise.com>"]
66
repository = "https://github.com/StabRise/scaledp"

scaledp/image/ImageCropBoxes.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ class ImageCropBoxes(
8181
typeConverter=TypeConverters.toBoolean,
8282
)
8383

84+
returnEmpty = Param(
85+
Params._dummy(),
86+
"returnEmpty",
87+
"Return Empty list of images in case no boxes.",
88+
typeConverter=TypeConverters.toBoolean,
89+
)
90+
8491
defaultParams = MappingProxyType(
8592
{
8693
"inputCols": ["image", "boxes"],
@@ -94,6 +101,7 @@ class ImageCropBoxes(
94101
"noCrop": True,
95102
"limit": 0,
96103
"autoRotate": True,
104+
"returnEmpty": False,
97105
},
98106
)
99107

@@ -142,7 +150,7 @@ def transform_udf(self, image, data):
142150

143151
if self.getNoCrop() and len(results) == 0:
144152
raise ImageCropError("No boxes to crop")
145-
if len(results) == 0:
153+
if not self.getReturnEmpty() and len(results) == 0:
146154
results.append(
147155
Image.from_pil(img, image.path, image.imageType, image.resolution),
148156
)
@@ -153,7 +161,9 @@ def transform_udf(self, image, data):
153161
logging.warning(exception)
154162
if self.getPropagateError():
155163
raise ImageCropError from e
156-
return Image(image.path, image.imageType, data=bytes(), exception=exception)
164+
return [
165+
Image(image.path, image.imageType, data=bytes(), exception=exception),
166+
]
157167
return results
158168

159169
def _transform(self, dataset):

scaledp/models/detectors/YoloOnnxDetector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def call_detector(cls, images, params):
9797
for image, image_path in images:
9898
boxes = []
9999
# Convert PIL to NumPy (RGB)
100-
image_np = np.array(image)
100+
image_np = np.array(image.convert("RGB"))
101101
raw_boxes, scores, class_ids = detector.detect_objects(image_np)
102102
# Expand boxes by padding percent if provided
103103
pad_percent = int(params.get("padding", 0)) if params is not None else 0

scaledp/models/detectors/yolo/yolo.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def rescale_image_with_padding(
7171
new_height = int(self.original_height * self.scale_factor)
7272

7373
# Resize image
74-
resized_image = cv2.resize(image, (new_width, new_height))
74+
resized_image = cv2.resize(
75+
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
76+
)
7577

7678
# Calculate padding to center the image
7779
self.pad_x = (target_width - new_width) // 2
@@ -129,19 +131,16 @@ def restore_coordinates(self, boxes: np.ndarray) -> np.ndarray:
129131

130132
def prepare_input(self, image):
131133
# Store original dimensions for coordinate restoration
132-
self.img_height, self.img_width = image.shape[:2]
133-
134-
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
135134

136135
# Rescale image with padding instead of simple resize
137136
input_img = self.rescale_image_with_padding(
138-
input_img, (self.input_width, self.input_height)
137+
image, (self.input_width, self.input_height)
139138
)
140139

141140
# Scale input pixel values to 0 to 1
142141
input_img = input_img / 255.0
143142
input_img = input_img.transpose(2, 0, 1)
144-
return input_img[np.newaxis, :, :, :].astype(np.float32)
143+
return np.expand_dims(input_img, 0).astype(np.float32)
145144

146145
def inference(self, input_tensor):
147146
return self.session.run(self.output_names, {self.input_names[0]: input_tensor})

0 commit comments

Comments
 (0)