From 8b3e0bd73e52ddda5f7ec925b7aafdedf0a9f3f9 Mon Sep 17 00:00:00 2001 From: vivaansinghvi07 Date: Sun, 9 Nov 2025 20:16:10 +0000 Subject: [PATCH 1/2] added code to perform inference, for now testing on the botcam --- .../launch/ml_executor_launch.xml | 2 +- .../dummy_botcam_publisher.py | 6 +++-- .../mrobosub_perception/ml_executor.py | 27 +++++-------------- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/mrobosub_perception/launch/ml_executor_launch.xml b/mrobosub_perception/launch/ml_executor_launch.xml index 78109a4e..251ac0ce 100644 --- a/mrobosub_perception/launch/ml_executor_launch.xml +++ b/mrobosub_perception/launch/ml_executor_launch.xml @@ -1,3 +1,3 @@ - + diff --git a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py index ef6b8f8f..d6148f6d 100644 --- a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py +++ b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py @@ -1,4 +1,5 @@ import rclpy +from pathlib import Path from mrobosub_lib import Node from sensor_msgs.msg import Image from cv_bridge import CvBridge @@ -16,17 +17,18 @@ def __init__(self): def loop(self): - img_path = f"../dummy_botcam_images/{self.image_number}.png" + img_path = f"{Path(__file__).parent}/bbox.png" try: cv_img = cv2.imread(img_path) self.w = 480 self.h = 640 + cv_img = cv2.resize(cv_img, (self.h, self.w), interpolation=cv2.INTER_CUBIC) cv_img = cv2.rectangle(cv_img, (self.w // 2 - 200 - 7, self.h // 2 - 200 + 4), (self.w // 2 + 200 - 7, self.h // 2 + 200 + 4), (255, 255, 255, 3)) img_msg = self.br.cv2_to_imgmsg(cv_img, encoding='bgr8') self.pub.publish(img_msg) except Exception as e: - self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}") + self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}, {cv_img.shape}") def main(): diff --git a/mrobosub_perception/mrobosub_perception/ml_executor.py b/mrobosub_perception/mrobosub_perception/ml_executor.py index 7c60bbf0..56adc11a 100755 --- a/mrobosub_perception/mrobosub_perception/ml_executor.py +++ b/mrobosub_perception/mrobosub_perception/ml_executor.py @@ -5,6 +5,7 @@ import time import torch +from ultralytics import YOLO import rclpy import numpy as np from cv_bridge import CvBridge @@ -15,28 +16,13 @@ from mrobosub_msgs.msg import Detection, Detections -def load_yolo(): - # load model - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - yolo_path = os.path.join(path, "yolov5") - - model_path = os.path.join(path, "models/2025_best.pt") - print(yolo_path) - print(model_path) - model = torch.hub.load( - yolo_path, "custom", path=model_path, source="local" - ) # local repo - model.conf = 0.1 # NMS confidence threshold - return model - - class MlExecutor(Node): def __init__(self, run_until_time: float): super().__init__("ml_executor") - self.model = load_yolo() + self.model = YOLO("yolo11n.pt") self.run_until_time = run_until_time self.bridge = CvBridge() - self.create_subscription(Image, "/zed2/zed_node/rgb/image_rect_color", self.zed_callback, qos_profile = 1) + self.create_subscription(Image, "/dummy_botcam", self.zed_callback, qos_profile = 1) self.create_subscription(Float64, "/ml/run_until", self.run_until_callback, qos_profile=1) self.detection_pub = self.create_publisher(Detections, "/ml/detections", qos_profile=1) @@ -49,11 +35,12 @@ def zed_callback(self, image: Image): # Find any objects in the image height, width, channels = image_ocv.shape - outputs = self.model(image_ocv, size=width) # get raw detection data + outputs = self.model(image_ocv, imgsz=width) # get raw detection data - detections = outputs.xyxy[0].cpu().numpy() + detections = outputs[0].boxes.xyxy.cpu().numpy() + self.get_logger().info(f"{detections}") - print(f"TIME: {time.time() - start}") + self.get_logger().info(f"TIME: {time.time() - start}") message = Detections( detections=(Detection(*d) for d in detections), width=width, From 46636dca78eab4a26208cf0c8e7a3e8c971a820c Mon Sep 17 00:00:00 2001 From: vivaansinghvi07 Date: Wed, 19 Nov 2025 00:29:46 +0000 Subject: [PATCH 2/2] changed executor to run with yolo11n now --- .../mrobosub_perception/ml_executor.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mrobosub_perception/mrobosub_perception/ml_executor.py b/mrobosub_perception/mrobosub_perception/ml_executor.py index 56adc11a..880c8598 100755 --- a/mrobosub_perception/mrobosub_perception/ml_executor.py +++ b/mrobosub_perception/mrobosub_perception/ml_executor.py @@ -15,6 +15,12 @@ from sensor_msgs.msg import Image from mrobosub_msgs.msg import Detection, Detections +def to_detection(det: np.ndarray) -> Detection: + msg = Detection() + msg.left, msg.top, msg.right, msg.bottom, msg.confidence = map(float, det[:5]) + msg.classification = int(det[5]) + return msg + class MlExecutor(Node): def __init__(self, run_until_time: float): @@ -37,14 +43,13 @@ def zed_callback(self, image: Image): height, width, channels = image_ocv.shape outputs = self.model(image_ocv, imgsz=width) # get raw detection data - detections = outputs[0].boxes.xyxy.cpu().numpy() - self.get_logger().info(f"{detections}") + detections = outputs[0].boxes.data.cpu().numpy() self.get_logger().info(f"TIME: {time.time() - start}") message = Detections( - detections=(Detection(*d) for d in detections), - width=width, - height=height, + detections=([to_detection(d) for d in detections]), + width=float(width), + height=float(height), ) self.detection_pub.publish(message)