diff --git a/971-Robot-Code b/971-Robot-Code index 117a420eb..4bb8d3d20 160000 --- a/971-Robot-Code +++ b/971-Robot-Code @@ -1 +1 @@ -Subproject commit 117a420ebbb98937e13ab05519d06c401b19140e +Subproject commit 4bb8d3d202e6415582f20a93290df787888e98fc diff --git a/zebROS_ws/src/deeptag_ros/CMakeLists.txt b/zebROS_ws/src/deeptag_ros/CMakeLists.txt index 4e5e389ea..012f6885a 100644 --- a/zebROS_ws/src/deeptag_ros/CMakeLists.txt +++ b/zebROS_ws/src/deeptag_ros/CMakeLists.txt @@ -201,6 +201,10 @@ target_link_libraries(run_inference_benchmark PUBLIC deeptag) set_target_properties(run_inference_benchmark PROPERTIES CUDA_ARCHITECTURES "86") set_property(TARGET run_inference_benchmark PROPERTY CUDA_SEPARABLE_COMPILATION ON) +add_executable(decoder_test src/decoder_test.cpp) +target_link_libraries(decoder_test PUBLIC deeptag) +set_target_properties(decoder_test PROPERTIES CUDA_ARCHITECTURES "86") +set_property(TARGET decoder_test PROPERTY CUDA_SEPARABLE_COMPILATION ON) # add_executable(softmax_test src/softmax_test.cpp) # target_link_libraries(softmax_test PUBLIC deeptag) # set_target_properties(softmax_test PROPERTIES CUDA_ARCHITECTURES "86") diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/confidence_filter.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/confidence_filter.h index 84ec7975b..6eaf355f6 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/confidence_filter.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/confidence_filter.h @@ -1,7 +1,7 @@ #ifndef CONFIDENCE_FILTER_INC__ #define CONFIDENCE_FILTER_INC__ -#include "span.hpp" +#include "deeptag_ros/span.hpp" #ifndef __host__ #define __host__ diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_image_tile.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_image_tile.h index 1005c8f2f..f425a961f 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_image_tile.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_image_tile.h @@ -3,7 +3,7 @@ #include #include "device_types.h" -#include "image_format.h" +#include "deeptag_ros/image_format.h" cudaError_t cudaImageTileRGB(const void *input, const imageFormat format, diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_ssd_preprocess.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_ssd_preprocess.h index 0e0e27f56..d69354eac 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_ssd_preprocess.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/cuda_ssd_preprocess.h @@ -1,7 +1,7 @@ #ifndef CUDA_SSD_PREPROCESSING_H__ #define CUDA_SSD_PREPROCESSING_H__ -#include "image_format.h" +#include "deeptag_ros/image_format.h" /* * Downsample and apply pixel normalization, NCHW format diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoded_tag.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoded_tag.h index 748b10114..f7e85ed1d 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoded_tag.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoded_tag.h @@ -3,7 +3,7 @@ #include #include #include "opencv2/core.hpp" -#include "points_and_ids.h" +#include "deeptag_ros/points_and_ids.h" template class DecodedTag @@ -13,10 +13,7 @@ class DecodedTag cv::Mat m_HCrop; int m_tagId; uint64_t m_binaryId; - std::array m_keypointsWithIds; - // TODO : this is just the points from the previous field copied into a different var - // see about combining them - std::array m_keypointsInImage; + PointsAndIDs m_keypointsWithIds; std::array m_roi; int m_mainIdx{0}; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_engine.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_engine.h index 019dc8962..867ca939b 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_engine.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_engine.h @@ -1,10 +1,10 @@ #ifndef DECODER_ENGINE_INC__ #define DECODER_ENGINE_INC__ -#include "span.hpp" +#include "deeptag_ros/span.hpp" -#include "decoder_preprocess.h" -#include "engine.h" +#include "deeptag_ros/decoder_preprocess.h" +#include "deeptag_ros/engine.h" class DecoderEngineCalibrator : public Int8EntropyCalibrator2 { diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_preprocess.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_preprocess.h index 758d9f94a..38a32b640 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_preprocess.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_preprocess.h @@ -3,7 +3,7 @@ #include #include "cuda_runtime.h" // for cudaError -#include "image_format.h" +#include "deeptag_ros/image_format.h" class DecoderPreprocess { diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_softmax.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_softmax.h index 19dacc637..2ae4969dc 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_softmax.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/decoder_softmax.h @@ -2,8 +2,8 @@ #define DECODER_SOFTMAX_ #include -#include "decoder_softmax_results.h" -#include "span.hpp" +#include "deeptag_ros/decoder_softmax_results.h" +#include "deeptag_ros/span.hpp" // Class to handle DecoderSoftmax ops. diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/deeptag_impls.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/deeptag_impls.h index 47f4004a2..dbb5474da 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/deeptag_impls.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/deeptag_impls.h @@ -1,13 +1,13 @@ #ifndef DEEPTAG_IMPLS_INC__ #define DEEPTAG_IMPLS_INC__ -#include "cuda_event_timing.h" -#include "deeptag.h" -#include "detection_engine.h" -#include "marker_dict.h" -#include "pose_estimator.h" -#include "stag_decoder.h" -#include "stag_detector.h" +#include "deeptag_ros/cuda_event_timing.h" +#include "deeptag_ros/deeptag.h" +#include "deeptag_ros/detection_engine.h" +#include "deeptag_ros/marker_dict.h" +#include "deeptag_ros/pose_estimator.h" +#include "deeptag_ros/stag_decoder.h" +#include "deeptag_ros/stag_detector.h" class DeepTagImplBase { diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/detection_engine.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/detection_engine.h index 3c47e9608..9f3f2a352 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/detection_engine.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/detection_engine.h @@ -1,7 +1,7 @@ #ifndef DETECTION_ENGINE_INC__ #define DETECTION_ENGINE_INC__ -#include "engine.h" +#include "deeptag_ros/engine.h" class DetectionEngineCalibrator : public Int8EntropyCalibrator2 { public: diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/distorted_h_transform.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/distorted_h_transform.h index a3e6140d1..bdfeb5d71 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/distorted_h_transform.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/distorted_h_transform.h @@ -4,9 +4,9 @@ #include #include "opencv2/core.hpp" #include "opencv2/calib3d.hpp" -#include "warp_perspective_points.h" +#include "deeptag_ros/warp_perspective_points.h" #undef DEBUG -#include "debug.h" +#include "deeptag_ros/debug.h" template std::array fromCropToUndistorted(const std::array &pointsInCrop, diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/engine.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/engine.h index a1e9950d1..e59bb0477 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/engine.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/engine.h @@ -16,7 +16,7 @@ #include "NvInferRuntimeCommon.h" // for Dims, ILogger #include "driver_types.h" // for cudaStream_t, CUstream_st, CUgraph... -#include "cuda_event_timing.h" // for Timings +#include "deeptag_ros/cuda_event_timing.h" // for Timings class GpuImageWrapper; // Utility methods diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/gpu_image_wrapper.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/gpu_image_wrapper.h index e55351770..565373f6d 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/gpu_image_wrapper.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/gpu_image_wrapper.h @@ -3,7 +3,7 @@ #include #include "driver_types.h" // for cudaStream_t -#include "image_format.h" +#include "deeptag_ros/image_format.h" namespace cv { class Mat; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/grid_prior.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/grid_prior.h index ee7eb6dd1..6de7431c4 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/grid_prior.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/grid_prior.h @@ -1,8 +1,8 @@ #ifndef GRID_PRIOR_INC__ #define GRID_PRIOR_INC__ #include -#include "grid_prior_value.h" -#include "span.hpp" +#include "deeptag_ros/grid_prior_value.h" +#include "deeptag_ros/span.hpp" // Class to handle all of the GridPrior ops. // Stores the grid priors (a map from model output indexes to image coords) along diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/image_format.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/image_format.h index 985ae2338..4ba4aafbe 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/image_format.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/image_format.h @@ -4,6 +4,7 @@ enum class imageFormat { IMAGE_MONO8, + IMAGE_MONO32F, IMAGE_RGB8, IMAGE_BGR8, IMAGE_RGBA8, diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/points_and_ids.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/points_and_ids.h index abcc4242d..a2df2153d 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/points_and_ids.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/points_and_ids.h @@ -2,25 +2,27 @@ #define POINTS_AND_IDS_INC__ #include "opencv2/core.hpp" +template struct PointsAndIDs { PointsAndIDs() - : m_point{0., 0.} - , m_id{-1} - , m_score{0.} { + std::ranges::fill(m_id, -1); + std::ranges::fill(m_score, -1); } - PointsAndIDs(const double x, const double y, const int id, const double score = 0) - : m_point{cv::Point2d{x, y}}, m_id{id}, m_score{score} - { - } - cv::Point2d m_point; - int m_id; - double m_score; + + std::array m_point{}; + std::array m_id{}; + std::array m_score{}; + + size_t size(void) const { return m_point.size(); } // all arrays are the same size friend std::ostream& operator<<(std::ostream &os, const PointsAndIDs &pid) { - os << pid.m_point.x << ", " << pid.m_point.y << " id = " << pid.m_id << " score = " << pid.m_score; + for (size_t ii = 0; ii < pid.m_point.size(); ii++) + { + os << pid.m_point[ii].x << ", " << pid.m_point[ii].y << " id = " << pid.m_id[ii] << " score = " << pid.m_score[ii] << std::endl; + } return os; } }; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/pose_estimator.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/pose_estimator.h index 4a87858ef..0f3689647 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/pose_estimator.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/pose_estimator.h @@ -8,7 +8,7 @@ template struct BoolTag; #undef DEBUG -#include "debug.h" +#include "deeptag_ros/debug.h" template class PoseEstimator diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/roi_generator.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/roi_generator.h index 583312233..6f7efb979 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/roi_generator.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/roi_generator.h @@ -3,7 +3,7 @@ #include #include // for Point2d -#include "span.hpp" +#include "deeptag_ros/span.hpp" template class Stage1GridGroup; class Stage1SSDGroup; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/softmax.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/softmax.h index 24ec19e55..bef98eceb 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/softmax.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/softmax.h @@ -3,7 +3,7 @@ #include // for int32_t #include "driver_types.h" // for cudaStream_t -#include "span.hpp" // for span +#include "deeptag_ros/span.hpp" // for span // Class to handle Softmax ops. // Input is result from model, all bg scores (h x w) followed by all fg scores for grid diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_grid_prior.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_grid_prior.h index e37dda26e..7e7f604bc 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_grid_prior.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_grid_prior.h @@ -6,8 +6,8 @@ #include #include -#include "span.hpp" -#include "ssd_box.h" +#include "deeptag_ros/span.hpp" +#include "deeptag_ros/ssd_box.h" // Class to handle all of the SSDGridPrior ops. // Stores the grid priors (a map from model output indexes to image coords) along diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_tag_keypoint.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_tag_keypoint.h index 445e5ed9b..2d0643fb7 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_tag_keypoint.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/ssd_tag_keypoint.h @@ -9,7 +9,7 @@ #endif #include -#include "ssd_box.h" +#include "deeptag_ros/ssd_box.h" class SSDTagKeypoint { public: diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_decoder.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_decoder.h index de9efb010..dde2f7551 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_decoder.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_decoder.h @@ -8,22 +8,22 @@ #include // for Point2d #include // for string #include // for vector -#include "decoded_tag.h" // for DecodedTag -#include "decoder_engine.h" // for DecoderEngine -#include "decoder_softmax.h" // for DecoderSoftmax -#include "grid_prior.h" // for GridPrior -#include "confidence_filter.h" // for ConfidenceFilter -#include "span.hpp" // for span -#include "stage2_corners.h" // for Stage2Corners -#include "stage2_keypoint_trust.h" // for Stage2KeypointTrust -#include "suppress_and_average_keypoints.h" // for SuppressAndAverageKeypoints +#include "deeptag_ros/decoded_tag.h" // for DecodedTag +#include "deeptag_ros/decoder_engine.h" // for DecoderEngine +#include "deeptag_ros/decoder_softmax.h" // for DecoderSoftmax +#include "deeptag_ros/grid_prior.h" // for GridPrior +#include "deeptag_ros/confidence_filter.h" // for ConfidenceFilter +#include "deeptag_ros/span.hpp" // for span +#include "deeptag_ros/stage2_corners.h" // for Stage2Corners +#include "deeptag_ros/stage2_keypoint_trust.h" // for Stage2KeypointTrust +#include "deeptag_ros/suppress_and_average_keypoints.h" // for SuppressAndAverageKeypoints class GpuImageWrapper; struct GridPriorValue; class Stage2Keypoint; class Stage2KeypointGroup; class Timings; -struct PointsAndIDs; +template struct PointsAndIDs; template class STagDecoder @@ -46,13 +46,14 @@ class STagDecoder void setMinGridMatchRatio(const double minGridMatchRatio); double getMinGridMatchRatio(void) const; ushort2 getModelSize(void) const; + cudaStream_t getCudaStream(void); private: void runInference(std::vector> &stage2KeypointGroupss, - std::vector> &stage2Corners, + std::vector> &stage2Corners, const std::vector> &detectInputs, const tcb::span> &rois); - void fillEmptyIds(std::array &orderedFineGridPointsIds, + void fillEmptyIds(PointsAndIDs &orderedFineGridPointsIds, const tcb::span &fineGridPointsWithIdsCandidates) const; const MARKER_DICT &m_markerDict; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_detector.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_detector.h index 08f3d94b6..aa3bed456 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_detector.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stag_detector.h @@ -7,14 +7,14 @@ #include // for unique_ptr:w #include // for string #include // for vector -#include "confidence_filter.h" // for Stage1Predicate, Confide... +#include "deeptag_ros/confidence_filter.h" // for Stage1Predicate, Confide... #include "driver_types.h" // for CUevent_st, cudaEvent_t -#include "grid_prior.h" // for GridPrior -#include "softmax.h" // for Softmax -#include "span.hpp" // for span -#include "ssd_grid_prior.h" // for SSDGridPrior -#include "suppress_and_average_keypoints.h" // for SuppressAndAverageKeypoints -#include "tag_detect_info.h" // for TagDetectInfo +#include "deeptag_ros/grid_prior.h" // for GridPrior +#include "deeptag_ros/softmax.h" // for Softmax +#include "deeptag_ros/span.hpp" // for span +#include "deeptag_ros/ssd_grid_prior.h" // for SSDGridPrior +#include "deeptag_ros/suppress_and_average_keypoints.h" // for SuppressAndAverageKeypoints +#include "deeptag_ros/tag_detect_info.h" // for TagDetectInfo #include "vector_types.h" // for ushort2 template class DetectionEngine; class GpuImageWrapper; diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_grid.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_grid.h index c38640bab..ffe7349b4 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_grid.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_grid.h @@ -3,7 +3,7 @@ #include #include "vector_types.h" -#include "grid_prior_value.h" +#include "deeptag_ros/grid_prior_value.h" #ifndef __host__ #define __host__ diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_ssd_group.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_ssd_group.h index 15912d9ed..8480275c0 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_ssd_group.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage1_ssd_group.h @@ -6,7 +6,7 @@ #ifndef __device__ #define __device__ #endif -#include "ssd_box.h" +#include "deeptag_ros/ssd_box.h" // Used to store merged Stage1Grid similar data as a single // group rather than deal with duplicates of individual results diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_corners.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_corners.h index 438c7750d..89b167675 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_corners.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_corners.h @@ -2,8 +2,8 @@ #define STAGE2_CORNERS_INC__ #include "cuda_runtime.h" -#include "grid_prior_value.h" -#include "span.hpp" +#include "deeptag_ros/grid_prior_value.h" +#include "deeptag_ros/span.hpp" class Stage2Corners { @@ -18,7 +18,7 @@ class Stage2Corners virtual ~Stage2Corners(); void compute(const float *input, const tcb::span &priors, const float centerVariance, cudaStream_t cudaStream); - const tcb::span getDeviceOutput(void); + // const tcb::span getDeviceOutput(void); const tcb::span getHostOutput(void); private: diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint.h index 22553ed57..e719341a7 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint.h @@ -1,7 +1,7 @@ #ifndef STAGE2_KEYPOINT_INC__ #define STAGE2_KEYPOINT_INC__ #include -#include "grid_prior_value.h" +#include "deeptag_ros/grid_prior_value.h" #ifndef __host__ #define __host__ diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint_trust.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint_trust.h index 00289e3f8..b110da2f4 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint_trust.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/stage2_keypoint_trust.h @@ -1,8 +1,8 @@ #ifndef STAGE2_KEYPOINT_TRUST_INC__ #define STAGE2_KEYPOINT_TRUST_INC__ #include -#include "stage2_keypoint.h" -#include "span.hpp" +#include "deeptag_ros/stage2_keypoint.h" +#include "deeptag_ros/span.hpp" // Given a list of keypoints, check that enough of them // have a high enough confidence that we trust that the group diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/suppress_and_average_keypoints.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/suppress_and_average_keypoints.h index a699881c8..23c7dd8fb 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/suppress_and_average_keypoints.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/suppress_and_average_keypoints.h @@ -1,7 +1,7 @@ #ifndef SUPPRESS_AND_AVERAGE_KEYPOINTS_H__ #define SUPPRESS_AND_AVERAGE_KEYPOINTS_H__ -#include "span.hpp" +#include "deeptag_ros/span.hpp" template class SuppressAndAverageKeypoints diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_arucotag.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_arucotag.h index a44591add..ef79a5014 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_arucotag.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_arucotag.h @@ -4,7 +4,7 @@ #include #include -#include "unit_chessboard_tag.h" +#include "deeptag_ros/unit_chessboard_tag.h" template std::array constexpr makeBinaryIDArray() diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_chessboard_tag.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_chessboard_tag.h index c0e288cb9..9ae5dad2a 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_chessboard_tag.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_chessboard_tag.h @@ -9,7 +9,7 @@ #include "opencv2/core/mat.hpp" #include "opencv2/core/types.hpp" -#include "bool_tag.h" +#include "deeptag_ros/bool_tag.h" class UnitChessboardTagBase { diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_tag_template.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_tag_template.h index 00e315a39..2019ac781 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_tag_template.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/unit_tag_template.h @@ -8,7 +8,7 @@ #include "opencv2/core/mat.hpp" #include "opencv2/core/types.hpp" -#include "span.hpp" +#include "deeptag_ros/span.hpp" #include "vector_types.h" //#define DEBUG @@ -17,8 +17,7 @@ #endif class Stage2KeypointGroup; -struct PointsAndIDs; - +template struct PointsAndIDs; template class UnitTagTemplate @@ -37,11 +36,11 @@ class UnitTagTemplate // mainIdx is the orientation of the tag, in 90* increments (0-3) // the two outputs are the rotated fine grid points (points including tag borders), // and rotated keypoints (just the "data" bits of the tag) - template - void reorderPointsWithMainIdx(std::array &fineGridPointsRotated, - std::array &keypointsRotated, + template + void reorderPointsWithMainIdx(T &fineGridPointsRotated, + T1 &keypointsRotated, const size_t mainIdx, - const std::array &fineGridPoints) const + const T &fineGridPoints) const { constexpr auto N = GRID_SIZE + 2; auto reorderedXYs = m_unitTags.getFineGridPoints(mainIdx, false, STEP_ELEM_NUM); @@ -67,6 +66,26 @@ class UnitTagTemplate #endif } } + + void reorderPointsWithMainIdx(PointsAndIDs &fineGridPointsRotated, + const size_t mainIdx, + const PointsAndIDs &fineGridPoints) const + { + constexpr auto N = GRID_SIZE + 2; + const auto reorderedXYs = m_unitTags.getFineGridPoints(mainIdx, false, STEP_ELEM_NUM); + for (size_t i = 0; i < fineGridPointsRotated.size(); i++) + { + const auto &p = reorderedXYs[i]; + const size_t idx = p.y / STEP_ELEM_NUM * N + p.x / STEP_ELEM_NUM; + fineGridPointsRotated.m_point[i] = fineGridPoints.m_point[idx]; + fineGridPointsRotated.m_id[i] = fineGridPoints.m_id[idx]; + fineGridPointsRotated.m_score[i] = fineGridPoints.m_score[idx]; +#ifdef DEBUG + std::cout << "fineGridPointsRotated[" << i << "] = " << fineGridPointsRotated[i] << std::endl; +#endif + } + } + #if 0 template void reorderPointsWithMainIdx(std::vector &fineGridPointsRotated, std::vector &keypointsRotated, @@ -93,16 +112,17 @@ class UnitTagTemplate keypointsRotated.push_back(fineGridPoints[idx]); } } + #endif void matchFineGrid(double &matchRatio, - std::array &fineGridPointsAndIDs, + PointsAndIDs &bestOrderedPoints, const tcb::span &stage2KeypointGroups, const cv::Mat &H, const tcb::span &stage2PredCorners, const cv::Mat &cameraMatrix, const cv::Mat &distCoeffs) const; - std::array updateCornersInImage(const std::array &orderedPoints, + std::array updateCornersInImage(const PointsAndIDs &orderedPoints, const cv::Mat &HCrop, const cv::Mat &cameraMatrix, const cv::Mat &distCoeffs) const; @@ -115,7 +135,7 @@ class UnitTagTemplate const UNIT_TAG_CLASS m_unitTags; }; -#include "unit_arucotag.h" +#include "deeptag_ros/unit_arucotag.h" template class UnitTagTemplateArucotag : public UnitTagTemplate, false, 1, 0> { diff --git a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/warp_perspective_points.h b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/warp_perspective_points.h index dc64f2da5..7d48f065c 100644 --- a/zebROS_ws/src/deeptag_ros/include/deeptag_ros/warp_perspective_points.h +++ b/zebROS_ws/src/deeptag_ros/include/deeptag_ros/warp_perspective_points.h @@ -11,7 +11,7 @@ #include // for vector #undef DEBUG -#include "debug.h" +#include "deeptag_ros/debug.h" std::vector warpPerspectivePts(const cv::Mat &H, const std::vector &points, const double image_scale = 1.); diff --git a/zebROS_ws/src/deeptag_ros/models/aprilltag_decoder_mono.onnx b/zebROS_ws/src/deeptag_ros/models/aprilltag_decoder_mono.onnx new file mode 100644 index 000000000..b1e6dbb32 --- /dev/null +++ b/zebROS_ws/src/deeptag_ros/models/aprilltag_decoder_mono.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29678b7526e597a35b28b35bdd845c2977e1a099f8a7c36a93b4eb68551a29cf +size 901355 diff --git a/zebROS_ws/src/deeptag_ros/models/apriltag_decoder_mono.onnx b/zebROS_ws/src/deeptag_ros/models/apriltag_decoder_mono.onnx new file mode 100644 index 000000000..409d32873 --- /dev/null +++ b/zebROS_ws/src/deeptag_ros/models/apriltag_decoder_mono.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79292d8d2b66cfa1fc3582bb6df07dce0296d49f0a5032676f2da53c349ea0e1 +size 779902 diff --git a/zebROS_ws/src/deeptag_ros/models/create_engine.sh b/zebROS_ws/src/deeptag_ros/models/create_engine.sh index 430411d1d..5ad51bcb1 100755 --- a/zebROS_ws/src/deeptag_ros/models/create_engine.sh +++ b/zebROS_ws/src/deeptag_ros/models/create_engine.sh @@ -2,6 +2,12 @@ # Then run deeptag_ros and see what specific int8 file name it wants (e.g. Searching for engine file with name: /home/ubuntu/.900RobotCode.readonly/zebROS_ws/src/deeptag_ros/models/arucotag_roi_detector.engine.NVIDIAGeForceRTX4060LaptopGPU.int8.4.4) # ln -s arucotag_roi_detector_int8_batch4.engine aruco_roi_detector.engine.NVIDIAGeForceRTX4060LaptopGPU.int8.4.4 python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_decoder.onnx --output arucotag_decoder.engine --fp16 --batch-size-min=1 --batch-size-max=4 --batch-size=4 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_fp16_batch1.engine --fp16 --batch-size-min=1 --batch-size-max=1 --batch-size=1 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_fp16_batch2.engine --fp16 --batch-size-min=2 --batch-size-max=2 --batch-size=3 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_fp16_batch3.engine --fp16 --batch-size-min=3 --batch-size-max=3 --batch-size=3 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_int8_batch1.engine --int8 --fp16 --batch-size-min=1 --batch-size-max=1 --batch-size=1 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_int8_batch2.engine --int8 --fp16 --batch-size-min=2 --batch-size-max=2 --batch-size=3 --max-workspace-size=1073741824 +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py apriltag_decoder_mono.onnx --output apriltag_decoder_mono_int8_batch3.engine --int8 --fp16 --batch-size-min=3 --batch-size-max=3 --batch-size=2 --max-workspace-size=1073741824 #python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_decoder.onnx --output arucotag_decoder_int8.engine --fp16 --int8 --batch-size-min=1 --batch-size-max=4 --batch-size=4 --max-workspace-size=1073741824 --dataset-path /home/ubuntu/tensorflow_workspace/2023Game/data/combined_88_test --calibration-file arucotag_decoder.calib #python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_roi_detector.onnx --output arucotag_roi_detector.engine --fp16 --input-width=1088 --input-width-min=512 --input-width-max=2048 --max-workspace-size=1073741824 @@ -10,3 +16,4 @@ python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_roi_detector.onnx --output arucotag_roi_detector_int8_batch5.engine --fp16 --input-width=1088 --input-width-min=960 --input-width-max=1280 --batch-size=5 --int8 --max-workspace-size=1073741824 --dataset-path /home/ubuntu --calibration-file arucotag_roi_detector.calib python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_roi_detector.onnx --output arucotag_roi_detector_int8_batch9.engine --fp16 --input-width=1088 --input-width-min=960 --input-width-max=1280 --batch-size=9 --int8 --max-workspace-size=1073741824 --dataset-path /home/ubuntu --calibration-file arucotag_roi_detector.calib python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_roi_detector.onnx --output arucotag_roi_detector_int8_batch10.engine --fp16 --input-width=1088 --input-width-min=960 --input-width-max=1280 --batch-size=10 --int8 --max-workspace-size=1073741824 --dataset-path /home/ubuntu --calibration-file arucotag_roi_detector.calib +python3 ~/900RobotCode/zebROS_ws/src/tf_object_detection/src/onnx_to_tensorrt.py arucotag_roi_detector.onnx --output arucotag_roi_detector_int8_batch10.engine --fp16 --input-width=1088 --input-width-min=960 --input-width-max=1280 --batch-size=10 --int8 --max-workspace-size=1073741824 --dataset-path /home/ubuntu --calibration-file arucotag_roi_detector.calib diff --git a/zebROS_ws/src/deeptag_ros/src/decoder_engine.cpp b/zebROS_ws/src/deeptag_ros/src/decoder_engine.cpp index 6717ca859..64c4f9c62 100644 --- a/zebROS_ws/src/deeptag_ros/src/decoder_engine.cpp +++ b/zebROS_ws/src/deeptag_ros/src/decoder_engine.cpp @@ -151,8 +151,8 @@ void DecoderEngine::blobFromGpuImageWrappers(const std::vector #ifdef DEBUG static int callNum = 0; #endif - constexpr size_t outputHW = 256; - constexpr size_t imgSize = outputHW * outputHW * 3; + const size_t outputHW = 256; // This assumes a square image + const size_t imgSize = outputHW * outputHW * batchInput[0].channels(); const size_t thisBatchSize = std::min(m_rois.size(), static_cast(m_options.maxBatchSize)); //std::cout << "thisBatchSize = " << thisBatchSize << std::endl; // Get crop images ordered corners @@ -173,7 +173,11 @@ void DecoderEngine::blobFromGpuImageWrappers(const std::vector cv::Mat inputRoi(4, 2, CV_64FC1); for (size_t batchIdx = 0; batchIdx < thisBatchSize; batchIdx++) { - // std::cout << "batchIdx = " << batchIdx << std::endl; + // Create a mapping from the input roi (tag corners) in + // the input image to a fixed position in the output image + // Assign to a location in the output image with a border + // to catch the tag corners even if the initial detection + // is off by a bit. for (int i = 0; i < 4; i++) { inputRoi.at(i, 0) = m_rois[batchIdx][i].x; @@ -193,39 +197,46 @@ void DecoderEngine::blobFromGpuImageWrappers(const std::vector std::cout << "H = " << std::endl << "\t" << H << std::endl; std::cout << "H.inv() = " << std::endl << "\t" << H.inv() << std::endl; #endif - m_hH[batchIdx][0] = static_cast(H.at(0, 0)); - m_hH[batchIdx][1] = static_cast(H.at(0, 1)); - m_hH[batchIdx][2] = static_cast(H.at(0, 2)); - m_hH[batchIdx][3] = static_cast(H.at(1, 0)); - m_hH[batchIdx][4] = static_cast(H.at(1, 1)); - m_hH[batchIdx][5] = static_cast(H.at(1, 2)); - m_hH[batchIdx][6] = static_cast(H.at(2, 0)); - m_hH[batchIdx][7] = static_cast(H.at(2, 1)); - m_hH[batchIdx][8] = static_cast(H.at(2, 2)); - // Calculate H mat from roi - // Get inv transform matrix (from dest to src) - // pass this into a kernel which creates output image at requested - // buffer output - // Needs to pick appropriate input pixel for each output pixel, possible with bilinear filtering - // then split channels, convert to float. - cudaSafeCall(m_decoderPreprocess[batchIdx].decoderPreprocessRGB(m_hH[batchIdx], - batchInput[0].getDataPtr(), - imageFormat::IMAGE_RGB8, - batchInput[0].cols(), - batchInput[0].rows(), - static_cast(m_buffers[inputIdx]) + batchIdx * imgSize, - outputHW, - outputHW, - float2{0., 1.}, - m_preprocCudaStreams[batchIdx])); - + // Use a separate H matrix for each batch entry since they + // might not be copied to the device before the next + // iteration writes over the same memory + for (size_t i = 0; i < 9; i++) + { + m_hH[batchIdx][i] = static_cast(H.at(i / 3, i % 3)); + } + if (batchInput[0].channels() == 1) + { + cudaSafeCall(m_decoderPreprocess[batchIdx].decoderPreprocessGray(m_hH[batchIdx], + batchInput[0].getDataPtr(), + imageFormat::IMAGE_MONO8, + batchInput[0].cols(), + batchInput[0].rows(), + static_cast(m_buffers[inputIdx]) + batchIdx * imgSize, + outputHW, + outputHW, + float2{0., 1.}, + m_preprocCudaStreams[batchIdx])); + } + else + { + cudaSafeCall(m_decoderPreprocess[batchIdx].decoderPreprocessRGB(m_hH[batchIdx], + batchInput[0].getDataPtr(), + imageFormat::IMAGE_RGB8, + batchInput[0].cols(), + batchInput[0].rows(), + static_cast(m_buffers[inputIdx]) + batchIdx * imgSize, + outputHW, + outputHW, + float2{0., 1.}, + m_preprocCudaStreams[batchIdx])); + } cudaSafeCall(cudaEventRecord(m_preprocCudaEvents[batchIdx], m_preprocCudaStreams[batchIdx])); #ifdef DEBUG cv::Mat m = getDebugImage(batchIdx); std::stringstream s; s << "C" << callNum << "B" << batchIdx; cv::imshow(s.str().c_str(), m); - cv::imwrite(s.str() + ".png", m); + // cv::imwrite(s.str() + ".png", m); #endif } for (size_t batchIdx = 0; batchIdx < thisBatchSize; batchIdx++) @@ -235,7 +246,7 @@ void DecoderEngine::blobFromGpuImageWrappers(const std::vector #ifdef DEBUG callNum += 1; - constexpr size_t channelStride = outputHW * outputHW; + const size_t channelStride = outputHW * outputHW; #if 0 std::cout << " imgSize = " << imgSize << std::endl; cv::Mat hR(outputHW, outputHW, CV_32FC1); @@ -282,7 +293,7 @@ nvinfer1::Dims DecoderEngine::inputDimsFromInputImage(const GpuImageWrapper &gpu // Decoder is fixed at 3, 256, 256 return nvinfer1::Dims{4, {modelInputDims.d[0], - 3, + static_cast(gpuImg.channels()), 256, 256}}; } diff --git a/zebROS_ws/src/deeptag_ros/src/decoder_preprocess.cu b/zebROS_ws/src/deeptag_ros/src/decoder_preprocess.cu index fa189dfa1..73a6742bb 100644 --- a/zebROS_ws/src/deeptag_ros/src/decoder_preprocess.cu +++ b/zebROS_ws/src/deeptag_ros/src/decoder_preprocess.cu @@ -64,7 +64,7 @@ __global__ void gpuDecoderPreprocess(const float *H, }; const float3 rgb = isBGR ? make_float3(px.z, px.y, px.x) - : make_float3(px.x, px.y, px.z); + : make_float3(px.x, px.y, px.z); output[n * 0 + m] = rgb.x * multiplier + min_value; output[n * 1 + m] = rgb.y * multiplier + min_value; @@ -127,8 +127,10 @@ cudaError_t DecoderPreprocess::launchDecoderPreprocess(const float *hH, void *in } else { - // Mono8 option - 1 channel grayscale image + // Mono8 option - 1 channel grayscale image, in either uint8_t or float format if (format == imageFormat::IMAGE_MONO8) + gpuDecoderPreprocess<<>>(m_dH, (uint8_t *)input, inputWidth, inputHeight, output, outputWidth, outputHeight, multiplier, range.x); + else if (format == imageFormat::IMAGE_MONO32F) gpuDecoderPreprocess<<>>(m_dH, (float *)input, inputWidth, inputHeight, output, outputWidth, outputHeight, multiplier, range.x); else return cudaErrorInvalidValue; diff --git a/zebROS_ws/src/deeptag_ros/src/decoder_test.cpp b/zebROS_ws/src/deeptag_ros/src/decoder_test.cpp new file mode 100644 index 000000000..7ca656d70 --- /dev/null +++ b/zebROS_ws/src/deeptag_ros/src/decoder_test.cpp @@ -0,0 +1,142 @@ +#include + +#include "deeptag_ros/cuda_event_timing.h" +#include "deeptag_ros/cuda_utils.h" +#include "deeptag_ros/gpu_image_wrapper.h" +#include "deeptag_ros/marker_dict.h" +#include "deeptag_ros/stag_decoder.h" + +constexpr size_t MARKER_GRID_SIZE = 6; + +static cv::Mat getTag(const cv::Mat &image, const size_t outputHW, const cv::Mat H) +{ + cv::Mat tag; + warpPerspective(image, tag, H, cv::Size(outputHW, outputHW)); + return tag; +} + +template +static void writeStage2Debug(cv::Mat &image, + const PointsAndIDs &keypointsAndIds, + const uint16_t tagId) +{ + for (size_t kp = 0; kp < keypointsAndIds.m_point.size(); kp++) + { + const auto id = keypointsAndIds.m_id[kp]; + cv::circle(image, + cv::Point2d(keypointsAndIds.m_point[kp].x, keypointsAndIds.m_point[kp].y), + 3, + (id < 0) ? + cv::Scalar(255, 0, 0) : (id == 0) ? cv::Scalar(0, 0, 255) + : cv::Scalar(0, 255, 0)); + } + std::stringstream s; + s << tagId; + cv::putText(image, s.str(), cv::Point(5, 35), 0, 1.5, cv::Scalar(0, 255, 255), 2); +} + +template +void visualizeStage2(cv::Mat &image, + const size_t outputHW, + const std::vector, 2>> &result) +{ + cv::Mat output(outputHW * 2, // rows + outputHW * std::max(result.size(), static_cast(1)), // cols + CV_8UC3, + cv::Scalar(255, 255, 255)); + if (result.empty()) + { + image = output; + return; + } + cv::Mat tag; + // Arrange tags horizontally, with the first pass of the tag decode on top and second on the bottom + for (size_t i = 0; i < result.size(); i++) + { + tag = getTag(image, outputHW, result[i][0].m_HCrop); + writeStage2Debug(tag, result[i][0].m_keypointsWithIds, result[i][0].m_tagId); + tag.copyTo(output(cv::Rect(i * outputHW, 0, outputHW, outputHW))); + + tag = getTag(image, outputHW, result[i][1].m_HCrop); + writeStage2Debug(tag, result[i][1].m_keypointsWithIds, result[i][1].m_tagId); + tag.copyTo(output(cv::Rect(i * outputHW, outputHW, outputHW, outputHW))); + } + image = output; +} +int main(int argc, char **argv) +{ + const auto cameraMatrix = (cv::Mat_(3, 3) << 128., 0.0, 128., 0.0, 128., 128., 0.0, 0.0, 1.0); + const auto distCoeffs = (cv::Mat_(1, 8) << 0, 0, 0, 0, 0, 0, 0, 0); + Timings timings{}; + ArucoMarkerDict arucoMarkerDict{cv::aruco::PREDEFINED_DICTIONARY_NAME::DICT_APRILTAG_36h11}; + ArucoSTagDecoder sTagDecoder{arucoMarkerDict, cameraMatrix, distCoeffs, timings}; + + sTagDecoder.initEngine("/home/ubuntu/900RobotCode/zebROS_ws/src/deeptag_ros/models", "apriltag_decoder_mono.onnx"); + + const std::string inputImage = argv[1]; + auto cpuImg = cv::imread(inputImage, cv::IMREAD_GRAYSCALE); + if (cpuImg.empty()) + { + throw std::runtime_error("Unable to read image at path: " + inputImage); + } + std::vector> detectInputs; + detectInputs.emplace_back(); + detectInputs[0].emplace_back(); + detectInputs[0][0].upload(cpuImg, sTagDecoder.getCudaStream()); + std::vector> rois; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + rois.emplace_back(); + rois[0][0] = cv::Point2d{.15 * 256, .15 * 256}; + rois[0][1] = cv::Point2d{.85 * 256, .15 * 256}; + rois[0][2] = cv::Point2d{.85 * 256, .85 * 256}; + rois[0][3] = cv::Point2d{.15 * 256, .85 * 256}; + for (size_t i = 0; i < 1000; i++) + { + const auto decodedTags = sTagDecoder.detectTags(detectInputs, rois); + for (const auto &decodedTag : decodedTags) + { + std::cout << "Tag " << decodedTag[1].m_tagId << std::endl; + } + } + cv::Mat stage2DebugImg = cpuImg.clone(); + cv::cvtColor(stage2DebugImg, stage2DebugImg, cv::COLOR_GRAY2BGR); + visualizeStage2(stage2DebugImg, sTagDecoder.getModelSize().x, decodedTags); + cv::imshow((inputImage + "_stage_2").c_str(), stage2DebugImg); + + cv::waitKey(0); + return 0; +} \ No newline at end of file diff --git a/zebROS_ws/src/deeptag_ros/src/deeptag_impls.cpp b/zebROS_ws/src/deeptag_ros/src/deeptag_impls.cpp index 0cc9aaf77..c720af94f 100644 --- a/zebROS_ws/src/deeptag_ros/src/deeptag_impls.cpp +++ b/zebROS_ws/src/deeptag_ros/src/deeptag_impls.cpp @@ -83,14 +83,12 @@ std::vector DeepTagImpl channels; - channels.push_back(hR); - channels.push_back(hG); - channels.push_back(hB); - cv::Mat fin_img; - cv::merge(channels, fin_img); - return fin_img; + if (inputDims.d[1] == 1) + { + cv::Mat fin_img(outputHeight, outputWidth, CV_32FC1); + cudaSafeCall(cudaMemcpyAsync(fin_img.data, destBuffer + imageIdx * channelStride, channelStride * sizeof(float), cudaMemcpyDeviceToHost, getCudaStream())); + return fin_img; + } + else + { + cv::Mat hR(outputHeight, outputWidth, CV_32FC1); + cv::Mat hG(outputHeight, outputWidth, CV_32FC1); + cv::Mat hB(outputHeight, outputWidth, CV_32FC1); + cudaSafeCall(cudaMemcpyAsync(hR.data, destBuffer + (imageIdx * 3 + 0) * channelStride, channelStride * sizeof(float), cudaMemcpyDeviceToHost, getCudaStream())); + cudaSafeCall(cudaMemcpyAsync(hG.data, destBuffer + (imageIdx * 3 + 1) * channelStride, channelStride * sizeof(float), cudaMemcpyDeviceToHost, getCudaStream())); + cudaSafeCall(cudaMemcpyAsync(hB.data, destBuffer + (imageIdx * 3 + 2) * channelStride, channelStride * sizeof(float), cudaMemcpyDeviceToHost, getCudaStream())); + cudaSafeCall(cudaStreamSynchronize(getCudaStream())); + std::vector channels; + channels.push_back(hR); + channels.push_back(hG); + channels.push_back(hB); + cv::Mat fin_img; + cv::merge(channels, fin_img); + return fin_img; + } } Int8EntropyCalibrator2::Int8EntropyCalibrator2(int32_t batchSize, int32_t inputW, int32_t inputH, diff --git a/zebROS_ws/src/deeptag_ros/src/stag_decoder.cpp b/zebROS_ws/src/deeptag_ros/src/stag_decoder.cpp index d89641c6d..2e1ef0fd6 100644 --- a/zebROS_ws/src/deeptag_ros/src/stag_decoder.cpp +++ b/zebROS_ws/src/deeptag_ros/src/stag_decoder.cpp @@ -67,7 +67,7 @@ void STagDecoder::initEngine(const std::string &modelPat // (although TODO : those might be duplicates of data in the KeyPoints) template void STagDecoder::runInference(std::vector> &stage2KeypointGroups, - std::vector> &stage2Corners, + std::vector> &stage2Corners, const std::vector> &detectInputs, const tcb::span> &rois) { @@ -93,28 +93,38 @@ void STagDecoder::runInference(std::vectorgetCudaStream()); // model input size and image size are the same + // Divide x&y by 128 to get a 2x2 grid giving four corner points + // of the outer black border of the tag m_stage2CornerPrior.generate(getModelSize(), 128, getModelSize(), {}, m_decodeEngine->getCudaStream()); m_timing.end("stage2_corner_priors"); + // Grid priors create a 32x32 grid of anchor points for keypoint detection + // Each has an associated offset from the anchor point along with a class confidence + // (background or foreground black or white) m_timing.start("stage2_grid_priors", m_decodeEngine->getCudaStream()); m_stage2GridPrior.generate(getModelSize(), 8, getModelSize(), {}, m_decodeEngine->getCudaStream()); m_timing.end("stage2_grid_priors"); for (size_t roiNum = 0; roiNum < rois.size(); roiNum++) { + // Run softmax on the keypoint grid output, giving a confidence for + // each keypoint being a black or white corner. We drop any keypoints + // which are part of the background class. m_timing.start("stage2_softmax", m_decodeEngine->getCudaStream()); m_stage2DecoderSoftmax.compute(m_decodeEngine->getBufferByName("confidences_pred", roiNum), 32 * 32, m_decodeEngine->getCudaStream()); m_timing.end("stage2_softmax"); + // Grab keypoint coordinates by applying offsets to the grid anchor points + // Filter out keypoints with low confidence m_timing.start("stage2_keypoint_detect", m_decodeEngine->getCudaStream()); m_confidenceFilter.detect({m_stage2DecoderSoftmax.getOutput().data(), m_decodeEngine->getBufferByName("locations_pred", roiNum), nullptr /* not used */}, m_stage2GridPrior.getOutput(), 0.05f, // centerVariance - 0.0f, // sizeVariance - not used for corners + 0.0f, // sizeVariance - not used for keypoints 0.6f, // min confidence // TODO : configurable m_decodeEngine->getCudaStream(), buffersResized); @@ -132,10 +142,14 @@ void STagDecoder::runInference(std::vectorgetCudaStream()); m_keypointGrouper.compute(m_confidenceFilter.getOutput(), 12, 0.0, m_decodeEngine->getCudaStream()); m_timing.end("stage2_keypoint_group"); + // Compute corner locations as offsets from the corner prior anchor points + // Do this here so the memcpy from the keypoint grouper above has time + // to possibly finish m_timing.start("stage2_corner_locations", m_decodeEngine->getCudaStream()); m_corners.compute(m_decodeEngine->getBufferByName("corner_locations_pred", roiNum), m_stage2CornerPrior.getOutput(), @@ -143,6 +157,7 @@ void STagDecoder::runInference(std::vectorgetCudaStream()); m_timing.end("stage2_corner_locations"); + // Grab the host outputs of each of the above operations m_timing.start("stage2_keypoint_group_out", m_decodeEngine->getCudaStream()); const tcb::span hStage2KeypointGroup = m_keypointGrouper.getOutput(); for (const auto &k : hStage2KeypointGroup) @@ -153,10 +168,7 @@ void STagDecoder::runInference(std::vectorgetCudaStream()); const tcb::span hStage2Corners = m_corners.getHostOutput(); - for (const auto &c : hStage2Corners) - { - stage2Corners.back().push_back(c); - } + std::copy(hStage2Corners.begin(), hStage2Corners.end(), stage2Corners.back().begin()); m_timing.end("stage2_corners_out"); } } @@ -164,12 +176,17 @@ void STagDecoder::runInference(std::vector std::vector, 2>> STagDecoder::detectTags(const std::vector> &detectInputs, - const std::vector> &rois) + const std::vector> &rois) { + // Array of tag corners detected in the input image std::vector> thisRois{rois}; + // Output of model inference on the extracted rois std::vector> stage2KeypointGroups; - std::vector> stage2Corners; + std::vector> stage2Corners; + + // Decoded tag info. 2 iterations per tag to refine corners std::vector, 2>> ret; + for (size_t iter = 0; iter < 2; iter++) { #ifdef DEBUG @@ -190,8 +207,8 @@ std::vector, 2>> STagDecoder, 2>{}); - ret[retIdx][0].m_HCrop = m_decodeEngine->getH(ii); } + ret[retIdx][iter].m_HCrop = m_decodeEngine->getH(ii); ret[retIdx][iter].m_isValid = stage2KeypointGroups[retIdx].size() > 0; #ifdef DEBUG std::cout << "iter = " << iter << " ret[" << retIdx << "].m_isValid = " << ret[retIdx].m_isValid << std::endl; @@ -204,7 +221,10 @@ std::vector, 2>> STagDecodergetCudaStream()); double matchRatio; constexpr auto FINE_GRID_SIZE = MARKER_DICT::getGridSize() + 2; - std::array orderedFineGridPointsIds; + PointsAndIDs orderedFineGridPointsIds; + // Assign the points detected in the crop to actual grid + // points in the proposed tag. This is done by matching the + // detected keypoints to the nearest grid points in the tag m_markerDict.getUnitTagTemplate().matchFineGrid(matchRatio, orderedFineGridPointsIds, stage2KeypointGroups[retIdx], @@ -219,9 +239,9 @@ std::vector, 2>> STagDecoder m_minGridMatchRatio) { - m_timing.start("stage2_fillemptyids", m_decodeEngine->getCudaStream()); + // m_timing.start("stage2_fillemptyids", m_decodeEngine->getCudaStream()); //fillEmptyIds(orderedFineGridPointsIds, stage2KeypointGroups[retIdx]); - m_timing.end("stage2_fillemptyids"); + // m_timing.end("stage2_fillemptyids"); m_timing.start("stage2_updatecornersinimage", m_decodeEngine->getCudaStream()); const auto roiUpdated = m_markerDict.getUnitTagTemplate().updateCornersInImage(orderedFineGridPointsIds, @@ -233,44 +253,32 @@ std::vector, 2>> STagDecodergetCudaStream()); thisRois[retIdx] = roiUpdated; ret[retIdx][iter].m_roi = roiUpdated; - std::array tagBits; - for (size_t i = 0; i < orderedFineGridPointsIds.size(); i++) - { - tagBits[i] = orderedFineGridPointsIds[i].m_id; - } + int hammingDist = 2; // TODO - configurable, dynamic reconfig potential - m_markerDict.getMainIdx(ret[retIdx][iter].m_mainIdx, ret[retIdx][iter].m_tagId, ret[retIdx][iter].m_binaryId, tagBits, hammingDist); + // Decode tag bits into a tagID and binaryID + // Main index is the rotation of the tag (in 90* increments) + m_markerDict.getMainIdx(ret[retIdx][iter].m_mainIdx, + ret[retIdx][iter].m_tagId, + ret[retIdx][iter].m_binaryId, + orderedFineGridPointsIds.m_id, + hammingDist); #ifdef DEBUG std::cout << "mainIdx = " << ret[retIdx].m_mainIdx << " tagId = " << ret[retIdx].m_tagId << std::endl; #endif m_timing.end("stage2_getmainindex"); m_timing.start("stage2_reorderpointswithmainidx", m_decodeEngine->getCudaStream()); - std::array orderedKptsWithIds; m_markerDict.getUnitTagTemplate().reorderPointsWithMainIdx(ret[retIdx][iter].m_keypointsWithIds, // [re] orderedFineGridPointsIds - orderedKptsWithIds, ret[retIdx][iter].m_mainIdx, orderedFineGridPointsIds); - for (size_t i = 0; i < ret[retIdx][iter].m_keypointsWithIds.size(); i++) - { - ret[retIdx][iter].m_keypointsInImage[i] = ret[retIdx][iter].m_keypointsWithIds[i].m_point; - } - warpPerspectivePts(ret[retIdx][0].m_HCrop.inv(), ret[retIdx][iter].m_keypointsInImage); + warpPerspectivePts(ret[retIdx][iter].m_HCrop.inv(), ret[retIdx][iter].m_keypointsWithIds.m_point); m_timing.end("stage2_reorderpointswithmainidx"); #ifdef DEBUG - for (const auto &r : orderedFineGridPointsIds) - { - std::cout << " O : " << r.m_point.x << " " << r.m_point.y << " " << r.m_id << std::endl; - } - for (const auto &r : ret[retIdx].m_keypointsWithIds) - { - std::cout << " R : " << r.m_point.x << " " << r.m_point.y << " " << r.m_id << std::endl; - } - for (const auto &r : orderedKptsWithIds) - { - std::cout << " K : " << r.m_point.x << " " << r.m_point.y << " " << r.m_id << std::endl; - } + std::cout << "orderedFineGripPointsIds" << std::endl + << orderedFineGridPointsIds << std::endl; + std::cout << "ret[retIdx][iter].m_keypointsWithIds.m_point" << std::endl + << ret[retIdx][iter].m_keypointsWithIds << std::endl; for (const auto &kg : stage2KeypointGroups[retIdx]) { kg.print(); @@ -279,66 +287,38 @@ std::vector, 2>> STagDecodergetDebugImage(ii); - for (const auto &kg : stage2KeypointGroups[retIdx]) - { - cv::circle(debugImg, - cv::Point2d(kg.m_keypoint.x, kg.m_keypoint.y), - 3, - (kg.m_label < 0) ? cv::Scalar(128, 0, 0) : (kg.m_label == 0) ? cv::Scalar(0, 0, 128) - : cv::Scalar(0, 128, 0)); - } - if (stage2Corners[retIdx].size() >= 4) - { - const std::array orderedCorners = {stage2Corners[retIdx][0], - stage2Corners[retIdx][1], - stage2Corners[retIdx][3], - stage2Corners[retIdx][2]}; - cv::line(debugImg, cv::Point2f(orderedCorners[0].x, orderedCorners[0].y), cv::Point2f(orderedCorners[1].x, orderedCorners[1].y), cv::Scalar(0, 128, 0), 2); - cv::line(debugImg, cv::Point2f(orderedCorners[1].x, orderedCorners[1].y), cv::Point2f(orderedCorners[2].x, orderedCorners[2].y), cv::Scalar(0, 128, 0), 2); - cv::line(debugImg, cv::Point2f(orderedCorners[2].x, orderedCorners[2].y), cv::Point2f(orderedCorners[3].x, orderedCorners[3].y), cv::Scalar(0, 128, 0), 2); - cv::line(debugImg, cv::Point2f(orderedCorners[3].x, orderedCorners[3].y), cv::Point2f(orderedCorners[0].x, orderedCorners[0].y), cv::Scalar(0, 128, 0), 2); - } - std::stringstream s; - s << ret[retIdx].m_tagId; - cv::putText(debugImg, s.str(), cv::Point(5, 35), 0, 1.5, cv::Scalar(0, 255, 255), 2); - s.str(""); - s << "Debug_Iter" << iter << "Idx" << retIdx; - cv::imshow(s.str().c_str(), debugImg); #endif } else { ret[retIdx][iter].m_isValid = false; } - } + } // if tag is valid retIdx += 1; - } - } + } // loop over tags in batch + } // loop over batch in batches } -#ifdef DEBUG - cv::waitKey(20); -#endif return ret; } template -void STagDecoder::fillEmptyIds(std::array &orderedFineGridPointsIds, +void STagDecoder::fillEmptyIds(PointsAndIDs &orderedFineGridPointsIds, const tcb::span &fineGridPointsWithIdsCandidates) const { - for (auto &kpt1 : orderedFineGridPointsIds) + for (size_t i = 0; i < orderedFineGridPointsIds.m_point.size(); i++) { - if (kpt1.m_id == -1) + const auto &kpt1 = orderedFineGridPointsIds.m_point[i]; + auto kid1 = orderedFineGridPointsIds.m_id[i]; + if (kid1 == -1) { - double minDist = 1000.; + auto minDist = std::numeric_limits::max(); for (const auto &kpt2 : fineGridPointsWithIdsCandidates) { - const auto dist = hypot(kpt1.m_point.x - kpt2.m_keypoint.x, kpt1.m_point.y - kpt2.m_keypoint.y); + const auto dist = hypot(kpt1.x - kpt2.m_keypoint.x, kpt1.y - kpt2.m_keypoint.y); if (dist < minDist) { minDist = dist; - kpt1.m_id = kpt2.m_label; + kid1 = kpt2.m_label; } } } @@ -363,6 +343,12 @@ ushort2 STagDecoder::getModelSize(void) const return ushort2{inputDim.d[2], inputDim.d[3]}; } +template +cudaStream_t STagDecoder::getCudaStream(void) +{ + return m_decodeEngine->getCudaStream(); +} + #include "deeptag_ros/marker_dict.h" template class STagDecoder, 4>; template class STagDecoder, 5>; diff --git a/zebROS_ws/src/deeptag_ros/src/stage2_corners.cu b/zebROS_ws/src/deeptag_ros/src/stage2_corners.cu index 0a89d2648..d387f9943 100644 --- a/zebROS_ws/src/deeptag_ros/src/stage2_corners.cu +++ b/zebROS_ws/src/deeptag_ros/src/stage2_corners.cu @@ -17,7 +17,7 @@ __global__ static void calculateStage2CornerKernel(float2 *output, } //printf("idx = %d, input[idx] = %f %f, centerVariance = %f, priors[idx] = %f %f\n", idx, input[idx], input[idx + length], centerVariance, priors[idx].x, priors[idx].y); output[idx].x = (input[idx] * centerVariance + priors[idx].m_scale.x) * priors[idx].m_imageSize.x + priors[idx].m_offset.x; - output[idx].y = (input[idx + length] * centerVariance + priors[idx].m_scale.y) * priors[idx].m_imageSize.x + priors[idx].m_offset.y; + output[idx].y = (input[idx + length] * centerVariance + priors[idx].m_scale.y) * priors[idx].m_imageSize.y + priors[idx].m_offset.y; } Stage2Corners::Stage2Corners(void) @@ -45,11 +45,11 @@ void Stage2Corners::compute(const float *input, cudaSafeCall(cudaEventRecord(m_dataReadyEvent, cudaStream)); } -const tcb::span Stage2Corners::getDeviceOutput(void) -{ - cudaEventSynchronize(m_dataReadyEvent); - return tcb::span(m_dResults, 4); -} +// const tcb::span Stage2Corners::getDeviceOutput(void) +// { +// cudaEventSynchronize(m_dataReadyEvent); +// return tcb::span(m_dResults, 4); +// } const tcb::span Stage2Corners::getHostOutput(void) { diff --git a/zebROS_ws/src/deeptag_ros/src/unit_tag_template.cpp b/zebROS_ws/src/deeptag_ros/src/unit_tag_template.cpp index 695015fb9..5e6ab6d3f 100644 --- a/zebROS_ws/src/deeptag_ros/src/unit_tag_template.cpp +++ b/zebROS_ws/src/deeptag_ros/src/unit_tag_template.cpp @@ -1,4 +1,4 @@ -// #include +#include #include #include "opencv2/calib3d.hpp" #include "deeptag_ros/distorted_h_transform.h" @@ -11,21 +11,21 @@ static constexpr size_t maxWarpTry = 3; template -std::array iterativeMatchAndWarp(const tcb::span &unorderedPoints, - const std::array &unitPoints, - const std::array &unitCorners, - const std::array &cptsInCrop, // ordered corners in crop - const std::array &cameraMatrix, - const std::array &distCoeffs, - const cv::Mat &H, - const std::vector &HListForCtpsInCrop, - const size_t maxWarpTry); +PointsAndIDs iterativeMatchAndWarp(const tcb::span &unorderedPoints, + const std::array &unitPoints, + const std::array &unitCorners, + const std::array &cptsInCrop, // ordered corners in crop + const std::array &cameraMatrix, + const std::array &distCoeffs, + const cv::Mat &H, + const std::vector &HListForCtpsInCrop, + const size_t maxWarpTry); template static void checkMatchRatio(double &matchRatio, int &count, int &totalCount, - const std::array &orderedPoints, + const PointsAndIDs &orderedPoints, const int unorderedPointsNum); template @@ -36,7 +36,7 @@ UnitTagTemplate void UnitTagTemplate::matchFineGrid(double &maxMatchRatio, - std::array &bestOrderedPoints, + PointsAndIDs &bestOrderedPoints, const tcb::span &unorderedPoints, const cv::Mat &H, const tcb::span &stage2PredCorners, @@ -44,10 +44,6 @@ void UnitTagTemplate -std::array UnitTagTemplate::updateCornersInImage(const std::array &orderedPointsAndIds, +std::array UnitTagTemplate::updateCornersInImage(const PointsAndIDs &orderedPointsAndIds, const cv::Mat &HCrop, const cv::Mat &cameraMatrix, const cv::Mat &distCoeffs) const { const auto unitPoints = m_unitTags.getFineGridPoints(0, true, STEP_ELEM_NUM); - std::array orderedPoints; - for (size_t i = 0; i < orderedPointsAndIds.size(); i++) - { - orderedPoints[i] = orderedPointsAndIds[i].m_point; - } const auto unitCorners = m_unitTags.getOrderedCorners(); - const auto cornersInCropUpdated = controlpointsToKeypointsInCropWithH(unitPoints, orderedPoints, unitCorners, cameraMatrix, distCoeffs, HCrop); + const auto cornersInCropUpdated = controlpointsToKeypointsInCropWithH(unitPoints, + orderedPointsAndIds.m_point, + unitCorners, + cameraMatrix, + distCoeffs, + HCrop); return warpPerspectivePts(HCrop.inv(), cornersInCropUpdated); } @@ -240,15 +237,15 @@ static void matchAndWarp(cv::Mat &HNew, } template -std::array iterativeMatchAndWarp(const tcb::span &stage2KeypointGroups, // keypoints in crop - const std::array &orderedKptsGt, // unit points - const std::array &cptsGt, // unit corners - const std::array &cptsInCrop, // ordered corners in crop - const cv::Mat &cameraMatrix, - const cv::Mat &distCoeffs, - const cv::Mat &H, - const std::vector &HListForCtpsInCrop, - const size_t maxWarpTry) +PointsAndIDs iterativeMatchAndWarp(const tcb::span &stage2KeypointGroups, // keypoints in crop + const std::array &orderedKptsGt, // unit points + const std::array &cptsGt, // unit corners + const std::array &cptsInCrop, // ordered corners in crop + const cv::Mat &cameraMatrix, + const cv::Mat &distCoeffs, + const cv::Mat &H, + const std::vector &HListForCtpsInCrop, + const size_t maxWarpTry) { #ifdef DEBUG std::cout << "iterativeMatchAndWarp" << std::endl; @@ -416,14 +413,11 @@ std::array iterativeMatchAndWarp( #endif } } - std::array orderedKptsWithIds; + PointsAndIDs orderedKptsWithIds; //std::cout << "HCurr = " << HCurr << std::endl; if (HCurr.empty()) { - for (size_t ii = 0; ii < orderedKptsWithIds.size(); ii++) - { - orderedKptsWithIds[ii] = PointsAndIDs{orderedKptsGt[ii].x, orderedKptsGt[ii].y, -1}; - } + orderedKptsWithIds.m_point = orderedKptsGt; } else { @@ -440,28 +434,26 @@ std::array iterativeMatchAndWarp( if (matchFlagsCandBest[ii]) { const auto &kp = stage2KeypointGroups[matchIdsBest[ii]]; - orderedKptsWithIds[ii] = PointsAndIDs{kp.m_keypoint.x, - kp.m_keypoint.y, - kp.m_label, - kp.m_score}; + orderedKptsWithIds.m_point[ii].x = kp.m_keypoint.x; + orderedKptsWithIds.m_point[ii].y = kp.m_keypoint.y; + orderedKptsWithIds.m_id[ii] = kp.m_label; + orderedKptsWithIds.m_score[ii] = kp.m_score; } else { - orderedKptsWithIds[ii] = PointsAndIDs{orderedKptCandidatesWarp[ii].x, - orderedKptCandidatesWarp[ii].y, - -1}; + orderedKptsWithIds.m_point[ii] = orderedKptCandidatesWarp[ii]; + orderedKptsWithIds.m_id[ii] = -1; } } } #ifdef DEBUG std::cout << "orderedKptsWithIds" << std::endl; - for (const auto &o : orderedKptsWithIds) + for (size_t i = 0; i < orderedKptsWithIds.m_point.size(); i++) { - std::cout << o.m_point.x << " " << o.m_point.y << " " << o.m_id << std::endl; + std::cout << orderedKptsWithIds.m_point[i].x << " " << orderedKptsWithIds.m_point[i].y << " " << orderedKptsWithIds.m_id[i] << std::endl; } #endif - return orderedKptsWithIds; } @@ -469,18 +461,18 @@ template static void checkMatchRatio(double &matchRatio, int &count, int &totalCount, - const std::array &orderedPoints, + PointsAndIDs &orderedPoints, const int unorderedPointsNum) { count = 0; - for (const auto &op: orderedPoints) + for (const auto &id: orderedPoints.m_id) { - if (op.m_id >= 0) + if (id >= 0) { count += 1; } } - totalCount = std::max(static_cast(orderedPoints.size()), count); + totalCount = std::max(static_cast(orderedPoints.m_id.size()), count); matchRatio = static_cast(count) / totalCount; #ifdef DEBUG std::cout << "checkMatchRatio : count = " << count << " unorderedPointNum = " << unorderedPointsNum << " totalCount = " << totalCount << " matchRatio = " << matchRatio << std::endl; diff --git a/zebROS_ws/src/gpu_apriltag/CMakeLists.txt b/zebROS_ws/src/gpu_apriltag/CMakeLists.txt index d0b6bdcd2..cd9751741 100644 --- a/zebROS_ws/src/gpu_apriltag/CMakeLists.txt +++ b/zebROS_ws/src/gpu_apriltag/CMakeLists.txt @@ -5,6 +5,8 @@ enable_language(CUDA) include("../cmake_modules/CMakeOpt.cmake") set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Ofast -DNDEBUG -Wno-deprecated-declarations") +add_compile_options(-Wno-deprecated-declarations) ## Find catkin macros and libraries ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) ## is used, also find other catkin packages @@ -25,14 +27,16 @@ find_package(catkin REQUIRED COMPONENTS ## System dependencies are found with CMake's conventions # find_package(Boost REQUIRED COMPONENTS system) - set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) set(CMAKE_CXX_STANDARD_REQUIRED TRUE) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) + # Use the correct version of CUDA set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) +find_package(TensorRT REQUIRED) find_package(CUDA REQUIRED) find_package(OpenCV REQUIRED) find_library(LIBNVTOOLSEXT nvToolsExt PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) @@ -147,6 +151,7 @@ include_directories( ../../../cccl/thrust ../../../cccl/libcudacxx/include ${CUDA_INCLUDE_DIRS} + ${TensorRT_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS} ${FRC971} ${FRC971}/third_party/apriltag @@ -168,12 +173,31 @@ add_library(${PROJECT_NAME} src/stubs.cpp ${FRC971}/frc971/orin/apriltag.cc ${FRC971}/frc971/orin/apriltag_detect.cc + ${FRC971}/frc971/orin/aruco_dict.cpp + ${FRC971}/frc971/orin/confidence_filter.cu ${FRC971}/frc971/orin/cuda.cc ${FRC971}/frc971/orin/cuda_event_timing.cc ${FRC971}/frc971/orin/cuda_utils.cc ${FRC971}/frc971/orin/labeling_allegretti_2019_BKE.cc ${FRC971}/frc971/orin/line_fit_filter.cc ${FRC971}/frc971/orin/threshold.cc + ${FRC971}/frc971/orin/engine.cpp + ${FRC971}/frc971/orin/decoder_engine.cpp + ${FRC971}/frc971/orin/decoder_preprocess.cu + ${FRC971}/frc971/orin/decoder_softmax.cu + ${FRC971}/frc971/orin/grid_prior.cu + ${FRC971}/frc971/orin/marker_dict.cpp + ${FRC971}/frc971/orin/stag_decoder.cpp + ${FRC971}/frc971/orin/stage2_corners.cu + ${FRC971}/frc971/orin/stage2_keypoint_group.cu + ${FRC971}/frc971/orin/stage2_keypoint_group_trust.cpp + ${FRC971}/frc971/orin/stage2_keypoint_trust.cu + ${FRC971}/frc971/orin/stage2_keypoint.cu + ${FRC971}/frc971/orin/suppress_and_average_keypoints.cu + ${FRC971}/frc971/orin/unit_arucotag.cpp + ${FRC971}/frc971/orin/unit_chessboard_tag.cpp + ${FRC971}/frc971/orin/unit_tag_template.cpp + ${FRC971}/frc971/orin/warp_perspective_points.cpp ${FRC971}/third_party/apriltag/apriltag.c ${FRC971}/third_party/apriltag/apriltag_pose.c ${FRC971}/third_party/apriltag/apriltag_quad_thresh.c @@ -200,6 +224,7 @@ target_link_libraries(${PROJECT_NAME} ${catkin_LIBRARIES} ${OpenCV_LIBS} ${CUDA_LIBRARIES} + ${TensorRT_LIBRARIES} ${LIBNVTOOLSEXT} ${glog_LIBRARIES} # absl::flags_internal diff --git a/zebROS_ws/src/gpu_apriltag/cmake/FindTensorRT.cmake b/zebROS_ws/src/gpu_apriltag/cmake/FindTensorRT.cmake new file mode 100644 index 000000000..087cffc17 --- /dev/null +++ b/zebROS_ws/src/gpu_apriltag/cmake/FindTensorRT.cmake @@ -0,0 +1,87 @@ +# source: +# https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake + +# This module defines the following variables: +# +# :: +# +# TensorRT_INCLUDE_DIRS +# TensorRT_LIBRARIES +# TensorRT_FOUND +# +# :: +# +# TensorRT_VERSION_STRING - version (x.y.z) +# TensorRT_VERSION_MAJOR - major version (x) +# TensorRT_VERSION_MINOR - minor version (y) +# TensorRT_VERSION_PATCH - patch version (z) +# +# Hints +# ^^^^^ +# A user may set ``TensorRT_DIR`` to an installation root to tell this module where to look. +# +set(_TensorRT_SEARCHES) + +if(TensorRT_DIR) + set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_DIR} NO_DEFAULT_PATH) + list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT) +endif() + +# appends some common paths +set(_TensorRT_SEARCH_NORMAL + PATHS "/usr" + ) +list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL) + +# Include dir +foreach(search ${_TensorRT_SEARCHES}) + find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include) +endforeach() + +if(NOT TensorRT_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib) + endforeach() +endif() + +if(NOT TensorRT_PARSERS_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TensorRT_NVPARSERS_LIBRARY NAMES nvparsers ${${search}} PATH_SUFFIXES lib) + endforeach() +endif() + +if(NOT TensorRT_NVONNXPARSER_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TensorRT_NVONNXPARSER_LIBRARY NAMES nvonnxparser ${${search}} PATH_SUFFIXES lib) + endforeach() +endif() + +mark_as_advanced(TensorRT_INCLUDE_DIR) + +if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") + + string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") + set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") +endif() + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING) + +if(TensorRT_FOUND) + set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) + + if(NOT TensorRT_LIBRARIES) + set(TensorRT_LIBRARIES ${TensorRT_LIBRARY} ${TensorRT_NVONNXPARSER_LIBRARY} ${TensorRT_NVPARSERS_LIBRARY}) + endif() + + if(NOT TARGET TensorRT::TensorRT) + add_library(TensorRT::TensorRT UNKNOWN IMPORTED) + set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}") + set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}") + endif() +endif() \ No newline at end of file diff --git a/zebROS_ws/src/gpu_apriltag/models/.gitignore b/zebROS_ws/src/gpu_apriltag/models/.gitignore new file mode 100644 index 000000000..4c785b991 --- /dev/null +++ b/zebROS_ws/src/gpu_apriltag/models/.gitignore @@ -0,0 +1 @@ +*engine* diff --git a/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.calib b/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.calib new file mode 100644 index 000000000..a1e8d1384 --- /dev/null +++ b/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.calib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d4a006023f0ec0afd03640606d8893a8f9899008183396328364cf6c74f65e +size 5054 diff --git a/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.onnx b/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.onnx new file mode 100644 index 000000000..b1e6dbb32 --- /dev/null +++ b/zebROS_ws/src/gpu_apriltag/models/arucotag_decoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29678b7526e597a35b28b35bdd845c2977e1a099f8a7c36a93b4eb68551a29cf +size 901355 diff --git a/zebROS_ws/src/tf_object_detection/src/apriltag_decoder_calibrator.py b/zebROS_ws/src/tf_object_detection/src/apriltag_decoder_calibrator.py new file mode 100644 index 000000000..6a25d9b45 --- /dev/null +++ b/zebROS_ws/src/tf_object_detection/src/apriltag_decoder_calibrator.py @@ -0,0 +1,156 @@ +"""calibrator.py +From https://raw.githubusercontent.com/jkjung-avt/tensorrt_demos/master/yolo/calibrator.py + +The original code could be found in TensorRT-7.x sample code: +"samples/python/int8_caffe_mnist/calibrator.py". I made the +modification so that the Calibrator could handle MS-COCO dataset +images instead of MNIST. +""" + +# +# Copyright 1993-2019 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO LICENSEE: +# +# This source code and/or documentation ("Licensed Deliverables") are +# subject to NVIDIA intellectual property rights under U.S. and +# international Copyright laws. +# +# These Licensed Deliverables contained herein is PROPRIETARY and +# CONFIDENTIAL to NVIDIA and is being provided under the terms and +# conditions of a form of NVIDIA software license agreement by and +# between NVIDIA and Licensee ("License Agreement") or electronically +# accepted by Licensee. Notwithstanding any terms or conditions to +# the contrary in the License Agreement, reproduction or disclosure +# of the Licensed Deliverables to any third party without the express +# written consent of NVIDIA is prohibited. +# +# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE +# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE +# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS +# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. +# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED +# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, +# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE +# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY +# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THESE LICENSED DELIVERABLES. +# +# U.S. Government End Users. These Licensed Deliverables are a +# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT +# 1995), consisting of "commercial computer software" and "commercial +# computer software documentation" as such terms are used in 48 +# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government +# only as a commercial end item. Consistent with 48 C.F.R.12.212 and +# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all +# U.S. Government End Users acquire the Licensed Deliverables with +# only those rights set forth herein. +# +# Any use of the Licensed Deliverables in individual and commercial +# software must include, in the user documentation and internal +# comments to the code, the above Disclaimer and U.S. Government End +# Users Notice. + + +import os +import numpy as np +import cv2 +import pycuda.driver as cuda +import tensorrt as trt +from random import shuffle + + +def _preprocess_yolo(img, input_shape): + """Preprocess an image before TRT YOLO inferencing. + + # Args + img: uint8 numpy array of shape either (img_h, img_w, 3) + or (img_h, img_w) + input_shape: a tuple of (H, W) + + # Returns + preprocessed img: float32 numpy array of shape (H, W) + """ + if img.ndim != 2: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if img.shape != input_shape: + img = cv2.resize(img, (input_shape[1], input_shape[0])) + img = img.astype(np.float32) / 255.0 + return img + + +class ApriltagDecoderEntropyCalibrator(trt.IInt8EntropyCalibrator2): + """ApriltagDecoderEntropyCalibrator + + This class implements TensorRT's IInt8EntropyCalibtrator2 interface. + It reads all images from the specified directory and generates INT8 + calibration data for YOLO models accordingly. + """ + + def __init__(self, img_dir, net_hw, cache_file, batch_size=1): + if not os.path.isdir(img_dir) and not cache_file: # is there is a calibration file, we don't care about the images dir + raise FileNotFoundError('%s does not exist' % img_dir) + if len(net_hw) != 2 or net_hw[0] % 32 or net_hw[1] % 32: + raise ValueError('bad net shape: %s' % str(net_hw)) + + super().__init__() # trt.IInt8EntropyCalibrator2.__init__(self) + + self.img_dir = img_dir + self.net_hw = net_hw + self.cache_file = cache_file + self.batch_size = batch_size + self.blob_size = net_hw[0] * net_hw[1] * np.dtype('float32').itemsize * batch_size + + self.pngs = [f for f in os.listdir(img_dir) if f.endswith('.png')] + shuffle(self.pngs) + # The number "500" is NVIDIA's suggestion. See here: + # https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#optimizing_int8_c + if len(self.pngs) < 500: + print('WARNING: found less than 500 images in %s!' % img_dir) + else: + self.pngs = self.pngs[:9500] + self.current_index = 0 + + # Allocate enough memory for a whole batch. + self.device_input = cuda.mem_alloc(self.blob_size) + + def __del__(self): + del self.device_input # free CUDA memory + + def get_batch_size(self): + return self.batch_size + + def get_batch(self, names): + if self.current_index + self.batch_size > len(self.pngs): + return None + # current_batch = int(self.current_index / self.batch_size) + + batch = [] + for i in range(self.batch_size): + img_path = os.path.join( + self.img_dir, self.pngs[self.current_index + i]) + img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) + assert img is not None, 'failed to read %s' % img_path + batch.append(_preprocess_yolo(img, self.net_hw)) + batch = np.stack(batch) + + assert batch.nbytes == self.blob_size, "Calibrator: batch.nbytes / blob size mismatch" + + cuda.memcpy_htod(self.device_input, np.ascontiguousarray(batch)) + self.current_index += self.batch_size + return [self.device_input] + + def read_calibration_cache(self): + # If there is a cache, use it instead of calibrating again. + # Otherwise, implicitly return None. + if os.path.exists(self.cache_file): + with open(self.cache_file, 'rb') as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, 'wb') as f: + f.write(cache) \ No newline at end of file