diff --git a/mrobosub_perception/launch/ml_executor_launch.xml b/mrobosub_perception/launch/ml_executor_launch.xml index 78109a4e..251ac0ce 100644 --- a/mrobosub_perception/launch/ml_executor_launch.xml +++ b/mrobosub_perception/launch/ml_executor_launch.xml @@ -1,3 +1,3 @@ - + diff --git a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py index a568e4f3..822f79fa 100644 --- a/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py +++ b/mrobosub_perception/mrobosub_perception/dummy_botcam_publisher.py @@ -1,4 +1,5 @@ import rclpy +from pathlib import Path from mrobosub_lib import Node from sensor_msgs.msg import Image from cv_bridge import CvBridge @@ -25,15 +26,16 @@ def declare_params(self): self.set_params() def loop(self): - img_path = f"../dummy_botcam_images/{self.image_number}.png" + img_path = f"{Path(__file__).parent}/bbox.png" try: cv_img = cv2.imread(img_path) + cv_img = cv2.resize(cv_img, (self.h, self.w), interpolation=cv2.INTER_CUBIC) cv_img = cv2.rectangle(cv_img, (self.w // 2 - 200 - 7, self.h // 2 - 200 + 4), (self.w // 2 + 200 - 7, self.h // 2 + 200 + 4), (255, 255, 255, 3)) img_msg = self.br.cv2_to_imgmsg(cv_img, encoding='bgr8') self.pub.publish(img_msg) except Exception as e: - self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}") + self.get_logger().error(f"Failed to publish dummy botcam image {img_path}: {e}, {cv_img.shape}") def main(): diff --git a/mrobosub_perception/mrobosub_perception/ml_executor.py b/mrobosub_perception/mrobosub_perception/ml_executor.py index 7c60bbf0..880c8598 100755 --- a/mrobosub_perception/mrobosub_perception/ml_executor.py +++ b/mrobosub_perception/mrobosub_perception/ml_executor.py @@ -5,6 +5,7 @@ import time import torch +from ultralytics import YOLO import rclpy import numpy as np from cv_bridge import CvBridge @@ -14,29 +15,20 @@ from sensor_msgs.msg import Image from mrobosub_msgs.msg import Detection, Detections - -def load_yolo(): - # load model - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - yolo_path = os.path.join(path, "yolov5") - - model_path = os.path.join(path, "models/2025_best.pt") - print(yolo_path) - print(model_path) - model = torch.hub.load( - yolo_path, "custom", path=model_path, source="local" - ) # local repo - model.conf = 0.1 # NMS confidence threshold - return model +def to_detection(det: np.ndarray) -> Detection: + msg = Detection() + msg.left, msg.top, msg.right, msg.bottom, msg.confidence = map(float, det[:5]) + msg.classification = int(det[5]) + return msg class MlExecutor(Node): def __init__(self, run_until_time: float): super().__init__("ml_executor") - self.model = load_yolo() + self.model = YOLO("yolo11n.pt") self.run_until_time = run_until_time self.bridge = CvBridge() - self.create_subscription(Image, "/zed2/zed_node/rgb/image_rect_color", self.zed_callback, qos_profile = 1) + self.create_subscription(Image, "/dummy_botcam", self.zed_callback, qos_profile = 1) self.create_subscription(Float64, "/ml/run_until", self.run_until_callback, qos_profile=1) self.detection_pub = self.create_publisher(Detections, "/ml/detections", qos_profile=1) @@ -49,15 +41,15 @@ def zed_callback(self, image: Image): # Find any objects in the image height, width, channels = image_ocv.shape - outputs = self.model(image_ocv, size=width) # get raw detection data + outputs = self.model(image_ocv, imgsz=width) # get raw detection data - detections = outputs.xyxy[0].cpu().numpy() + detections = outputs[0].boxes.data.cpu().numpy() - print(f"TIME: {time.time() - start}") + self.get_logger().info(f"TIME: {time.time() - start}") message = Detections( - detections=(Detection(*d) for d in detections), - width=width, - height=height, + detections=([to_detection(d) for d in detections]), + width=float(width), + height=float(height), ) self.detection_pub.publish(message)