From 8b3e0bd73e52ddda5f7ec925b7aafdedf0a9f3f9 Mon Sep 17 00:00:00 2001
From: vivaansinghvi07 <vsinghvi@umich.edu>
Date: Sun, 9 Nov 2025 20:16:10 +0000
Subject: [PATCH 1/2] added code to perform inference, for now testing on the
 botcam

---
 .../launch/ml_executor_launch.xml             |  2 +-
 .../dummy_botcam_publisher.py                 |  6 +++--
 .../mrobosub_perception/ml_executor.py        | 27 +++++--------------
 3 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/mrobosub_perception/launch/ml_executor_launch.xml b/mrobosub_perception/launch/ml_executor_launch.xml
index 78109a4e..251ac0ce 100644
--- a/mrobosub_perception/launch/ml_executor_launch.xml
+++ b/mrobosub_perception/launch/ml_executor_launch.xml
@@ -1,3 +1,3 @@
 <launch>
-    <node name="ml_executor" pkg="mrobosub_perception" exec="ml_executor" args="0"/>
+    <node name="ml_executor" pkg="mrobosub_perception" exec="ml_executor" args="1"/>
 </launch>
diff --git a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py
index ef6b8f8f..d6148f6d 100644
--- a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py
+++ b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py
@@ -1,4 +1,5 @@
 import rclpy
+from pathlib import Path
 from mrobosub_lib import Node
 from sensor_msgs.msg import Image
 from cv_bridge import CvBridge
@@ -16,17 +17,18 @@ def __init__(self):
         
 
     def loop(self):
-            img_path = f"../dummy_botcam_images/{self.image_number}.png"
+            img_path = f"{Path(__file__).parent}/bbox.png"
 
             try:
                 cv_img = cv2.imread(img_path)
                 self.w = 480
                 self.h = 640
+                cv_img = cv2.resize(cv_img, (self.h, self.w), interpolation=cv2.INTER_CUBIC)
                 cv_img = cv2.rectangle(cv_img, (self.w // 2 - 200 - 7, self.h // 2 - 200 + 4), (self.w // 2 + 200 - 7, self.h // 2 + 200 + 4), (255, 255, 255, 3))
                 img_msg = self.br.cv2_to_imgmsg(cv_img, encoding='bgr8')
                 self.pub.publish(img_msg)
             except Exception as e:
-                self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}")
+                self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}, {cv_img.shape}")
                 
 
 def main():
diff --git a/mrobosub_perception/mrobosub_perception/ml_executor.py b/mrobosub_perception/mrobosub_perception/ml_executor.py
index 7c60bbf0..56adc11a 100755
--- a/mrobosub_perception/mrobosub_perception/ml_executor.py
+++ b/mrobosub_perception/mrobosub_perception/ml_executor.py
@@ -5,6 +5,7 @@
 import time
 
 import torch
+from ultralytics import YOLO
 import rclpy
 import numpy as np
 from cv_bridge import CvBridge
@@ -15,28 +16,13 @@
 from mrobosub_msgs.msg import Detection, Detections
 
 
-def load_yolo():
-    # load model
-    path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    yolo_path = os.path.join(path, "yolov5")
-
-    model_path = os.path.join(path, "models/2025_best.pt")
-    print(yolo_path)
-    print(model_path)
-    model = torch.hub.load(
-        yolo_path, "custom", path=model_path, source="local"
-    )  # local repo
-    model.conf = 0.1  # NMS confidence threshold
-    return model
-
-
 class MlExecutor(Node):
     def __init__(self, run_until_time: float):
         super().__init__("ml_executor")
-        self.model = load_yolo()
+        self.model = YOLO("yolo11n.pt")
         self.run_until_time = run_until_time
         self.bridge = CvBridge()
-        self.create_subscription(Image, "/zed2/zed_node/rgb/image_rect_color", self.zed_callback, qos_profile = 1)
+        self.create_subscription(Image, "/dummy_botcam", self.zed_callback, qos_profile = 1)
         self.create_subscription(Float64, "/ml/run_until", self.run_until_callback, qos_profile=1)
         self.detection_pub = self.create_publisher(Detections, "/ml/detections", qos_profile=1)
 
@@ -49,11 +35,12 @@ def zed_callback(self, image: Image):
 
         # Find any objects in the image
         height, width, channels = image_ocv.shape
-        outputs = self.model(image_ocv, size=width)  # get raw detection data
+        outputs = self.model(image_ocv, imgsz=width)  # get raw detection data
 
-        detections = outputs.xyxy[0].cpu().numpy()
+        detections = outputs[0].boxes.xyxy.cpu().numpy()
+        self.get_logger().info(f"{detections}")
 
-        print(f"TIME: {time.time() - start}")
+        self.get_logger().info(f"TIME: {time.time() - start}")
         message = Detections(
             detections=(Detection(*d) for d in detections),
             width=width,

From 46636dca78eab4a26208cf0c8e7a3e8c971a820c Mon Sep 17 00:00:00 2001
From: vivaansinghvi07 <vsinghvi@umich.edu>
Date: Wed, 19 Nov 2025 00:29:46 +0000
Subject: [PATCH 2/2] changed executor to run with yolo11n now

---
 .../mrobosub_perception/ml_executor.py            | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/mrobosub_perception/mrobosub_perception/ml_executor.py b/mrobosub_perception/mrobosub_perception/ml_executor.py
index 56adc11a..880c8598 100755
--- a/mrobosub_perception/mrobosub_perception/ml_executor.py
+++ b/mrobosub_perception/mrobosub_perception/ml_executor.py
@@ -15,6 +15,12 @@
 from sensor_msgs.msg import Image
 from mrobosub_msgs.msg import Detection, Detections
 
+def to_detection(det: np.ndarray) -> Detection:
+    msg = Detection()
+    msg.left, msg.top, msg.right, msg.bottom, msg.confidence = map(float, det[:5])
+    msg.classification = int(det[5])
+    return msg
+
 
 class MlExecutor(Node):
     def __init__(self, run_until_time: float):
@@ -37,14 +43,13 @@ def zed_callback(self, image: Image):
         height, width, channels = image_ocv.shape
         outputs = self.model(image_ocv, imgsz=width)  # get raw detection data
 
-        detections = outputs[0].boxes.xyxy.cpu().numpy()
-        self.get_logger().info(f"{detections}")
+        detections = outputs[0].boxes.data.cpu().numpy()
 
         self.get_logger().info(f"TIME: {time.time() - start}")
         message = Detections(
-            detections=(Detection(*d) for d in detections),
-            width=width,
-            height=height,
+            detections=([to_detection(d) for d in detections]),
+            width=float(width),
+            height=float(height),
         )
         self.detection_pub.publish(message)