diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 0b95ff7df..2c750e8b0 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -38,4 +38,31 @@ cc_test( ":translate_types", "@com_google_googletest//:gtest_main" ] -) \ No newline at end of file +) + + +cc_library( + name = "camera", + hdrs = ["camera.hh"], + srcs = ["camera.cc"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@opencv", + ":translate_types", + "@gtsam", + "//common:check" + ] +) + +cc_test( + name = "camera_test", + srcs = ["camera_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":camera", + "@com_google_googletest//:gtest_main", + "//visualization/opencv:opencv_viz", + ":translate_types" + ] +) diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc new file mode 100644 index 000000000..46ddb340a --- /dev/null +++ b/common/geometry/camera.cc @@ -0,0 +1,63 @@ +#include "common/geometry/camera.hh" + +#include +#include + +#include "common/check.hh" +#include "common/geometry/translate_types.hh" + +namespace robot::geometry { +Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) { + Eigen::Matrix3d K; + K << intrinsic.fx(), intrinsic.skew(), intrinsic.px(), 0, intrinsic.fy(), intrinsic.py(), 0, 0, + 1; + return K; +} + +Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) { + return K * p_cam_point; +} + +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) { + return K.inverse() * pixel_homog; +} + +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, + const double depth) { + return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.); +} + +std::optional estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, + const cv::Mat &K) { + ROBOT_CHECK(matches.size() >= 5 && kpts1.size() >= 5 && kpts0.size() >= 5); + Eigen::Isometry3d result; + std::vector pts1; + std::vector pts2; + for (const cv::DMatch &match : matches) { + pts1.push_back(kpts0[match.queryIdx].pt); + pts2.push_back(kpts1[match.trainIdx].pt); + } + ROBOT_CHECK(pts1.size() == pts2.size() && pts1.size() >= 5); + try { + cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0); + cv::Mat R_c1_c0, t_c1_c0; + ROBOT_CHECK(!E.empty(), "Essential matrix is empty."); + // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of + // each other in E) + if (E.rows > 3) { + E = E.rowRange(0, 3); + } + cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0); + result.linear() = cv_to_eigen_mat(R_c1_c0); + result.translation() = cv_to_eigen_mat(t_c1_c0); + result = result.inverse(); + return result; + } catch (const std::exception &e) { + std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n" + << e.what() << std::endl; + return std::nullopt; + } +} +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh new file mode 100644 index 000000000..8048bac87 --- /dev/null +++ b/common/geometry/camera.hh @@ -0,0 +1,30 @@ +#pragma once +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "gtsam/geometry/Cal3_S2.h" +#include "opencv2/opencv.hpp" + +namespace robot::geometry { +Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic); + +/// @return Homogeneous pixel projection coordinates +Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point); +/** + * @param pixel_homog homogeneous pixel cordinates 3x1 + * @return 3d coordinate in camera frame + */ +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog); +/** + * @param pixel_inhomog inhomogeneous pixel cordinates 2x1 + * @return 3d coordinate in camera frame + */ +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, + const double depth); + +std::optional estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, + const cv::Mat &K); +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc new file mode 100644 index 000000000..04c7411f2 --- /dev/null +++ b/common/geometry/camera_test.cc @@ -0,0 +1,129 @@ +#include "common/geometry/camera.hh" + +#include + +#include "common/geometry/translate_types.hh" +#include "gtest/gtest.h" +#include "visualization/opencv/opencv_viz.hh" + +class CameraTestHelper { + public: + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height; + } +}; + +namespace robot::geometry { +TEST(CameraTest, test_get_intrinsics) { + const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5; + const gtsam::Cal3_S2 K(fx, fy, s, px, py); + const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K); + + EXPECT_FLOAT_EQ(fx, intrinsic_mat(0, 0)); + EXPECT_FLOAT_EQ(fy, intrinsic_mat(1, 1)); + EXPECT_FLOAT_EQ(s, intrinsic_mat(0, 1)); + EXPECT_FLOAT_EQ(px, intrinsic_mat(0, 2)); + EXPECT_FLOAT_EQ(py, intrinsic_mat(1, 2)); +} + +TEST(CameraTest, test_projection) { + const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5; + const gtsam::Cal3_S2 K(fx, fy, s, px, py); + const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K); + + const Eigen::Vector3d p_cam_lmk1(0.1, 0.15, 1.2); + const Eigen::Vector3d pxl_homog_cam_lmk1 = project(intrinsic_mat, p_cam_lmk1); + const Eigen::Vector2d pxl_inhomog_cam_lmk1 = + pxl_homog_cam_lmk1.head<2>() / pxl_homog_cam_lmk1(2); + const Eigen::Vector3d p_cam_lmk1_deproject = deproject(intrinsic_mat, pxl_homog_cam_lmk1); + const Eigen::Vector3d p_cam_lmk1_deproject_inhomog = + deproject(intrinsic_mat, pxl_inhomog_cam_lmk1, p_cam_lmk1(2)); + + EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject)); + EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject_inhomog)); +} + +TEST(CameraTest, test_estimate_pose) { + std::vector p_W_cube; + float cube_size = 1.0f; + p_W_cube.push_back(Eigen::Vector3d(0, 0, 0)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, 0)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); + p_W_cube.push_back(Eigen::Vector3d(0, cube_size, 0)); + p_W_cube.push_back(Eigen::Vector3d(0, 0, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(0, cube_size, cube_size)); + + Eigen::Matrix3d R_new_points( + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + + // const int initial_size = p_W_cube.size(); + const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2); + for (const Eigen::Vector3d &point_W_cube : p_W_cube) { + p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) + + p_world_cube_center); + } + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + cv::Mat K = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + + Eigen::Matrix3d K_eig = cv_to_eigen_mat(K); + + Eigen::Matrix3d R_world_from_cam0( + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity(); + world_from_cam0.linear() = R_world_from_cam0; + world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2); + + Eigen::Matrix3d R_world_45deg( + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); + Eigen::Isometry3d world_from_cam1; + world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear(); + world_from_cam1.translation() = + R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center; + + std::vector kpts0; + std::vector kpts1; + std::vector matches; + + for (const Eigen::Vector3d &point_W_cube : p_W_cube) { + Eigen::Vector3d p_cube_points_in_cam0 = + (world_from_cam0.inverse() * + Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) + .head<3>(); + Eigen::Vector3d p_cube_points_in_cam1 = + (world_from_cam1.inverse() * + Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) + .head<3>(); + Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cube_points_in_cam0; + Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cube_points_in_cam1; + Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2); + Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2); + if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) && + CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) { + kpts0.emplace_back(pxl_c0_pcube[0], pxl_c0_pcube[1], 3); + kpts1.emplace_back(pxl_c1_pcube[0], pxl_c1_pcube[1], 3); + matches.emplace_back(kpts0.size() - 1, kpts1.size() - 1, 0); + } + } + + std::optional cam0_from_cam1_estimate = + estimate_cam0_from_cam1(kpts0, kpts1, matches, K); + if (cam0_from_cam1_estimate) { + Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1; + cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm(); + EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(), + 0.000001)); + EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001)); + } +} +} // namespace robot::geometry \ No newline at end of file diff --git a/common/gps/BUILD b/common/gps/BUILD index 43907c146..4c1877509 100644 --- a/common/gps/BUILD +++ b/common/gps/BUILD @@ -19,6 +19,29 @@ cc_test( ] ) +cc_library( + name = "frame_translation", + srcs = ["frame_translation.cc"], + hdrs = ["frame_translation.hh"], + visibility = ["//visibility:public"], + deps = [ + "@com_google_googletest//:gtest_main", + "@geographiclib//:geographiclib", + "@eigen" + ] +) + +cc_test( + name = "frame_translation_test", + srcs = ["frame_translation_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":frame_translation", + "@eigen", + "//common:check" + ] +) + py_library( name = "web_mercator", srcs = ["web_mercator.py"], diff --git a/common/gps/frame_translation.cc b/common/gps/frame_translation.cc new file mode 100644 index 000000000..28c0875ea --- /dev/null +++ b/common/gps/frame_translation.cc @@ -0,0 +1,32 @@ +#include "common/gps/frame_translation.hh" + +#include "GeographicLib/Constants.hpp" +#include "GeographicLib/Geocentric.hpp" +#include "GeographicLib/Geodesic.hpp" +#include "GeographicLib/LocalCartesian.hpp" + +namespace robot::gps { +Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF) { + static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); + + double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z(); + + double lat_deg, lon_deg, alt_m; + earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m); + + return Eigen::Vector3d(lat_deg, lon_deg, alt_m); +} + +Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate) { + double lat_deg = gcs_coordinate.x(); + double lon_deg = gcs_coordinate.y(); + double alt_m = gcs_coordinate.z(); + + double x, y, z; + + static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); + earth.Forward(lat_deg, lon_deg, alt_m, x, y, z); + + return Eigen::Vector3d(x, y, z); +} +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/frame_translation.hh b/common/gps/frame_translation.hh new file mode 100644 index 000000000..60d03c8e9 --- /dev/null +++ b/common/gps/frame_translation.hh @@ -0,0 +1,8 @@ +#pragma once + +#include "Eigen/Core" + +namespace robot::gps { +Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF); +Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate); +} // namespace robot::gps diff --git a/common/gps/frame_translation_test.cc b/common/gps/frame_translation_test.cc new file mode 100644 index 000000000..7e6afa47c --- /dev/null +++ b/common/gps/frame_translation_test.cc @@ -0,0 +1,29 @@ +#include "common/gps/frame_translation.hh" + +#include "Eigen/Core" +#include "common/check.hh" +#include "gtest/gtest.h" + +namespace robot::gps { +TEST(frame_translation_test, lla_from_ecef) { + const Eigen::Vector3d lla_easthampton_ma(42.2608, -72.6634, 51.82); // + const Eigen::Vector3d ecef_easthampton_ma = ecef_from_lla(lla_easthampton_ma); + + const Eigen::Vector3d ecef_expected_easthampton_ma(1408753.87, -4512832.60, 4267122.34); + + constexpr double tol = 1e-9; + ROBOT_CHECK(ecef_easthampton_ma.isApprox(ecef_expected_easthampton_ma, tol), + ecef_easthampton_ma, ecef_expected_easthampton_ma); +} + +TEST(frame_translation_test, ecef_from_lla) { + const Eigen::Vector3d ecef_palace_versailles(4205987.87, 155694.68, 4776399.15); + const Eigen::Vector3d lla_palace_versailles = lla_from_ecef(ecef_palace_versailles); + + const Eigen::Vector3d expected_lla_palace_versailles(48.8049440, 2.1199720, 131.985); + + constexpr double tol = 1e-4; + ROBOT_CHECK(lla_palace_versailles.isApprox(expected_lla_palace_versailles, tol), + lla_palace_versailles, expected_lla_palace_versailles); +} +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/geographiclib_test.cc b/common/gps/geographiclib_test.cc index 5ef8a1fb3..19bc98a2b 100644 --- a/common/gps/geographiclib_test.cc +++ b/common/gps/geographiclib_test.cc @@ -8,7 +8,7 @@ #include "GeographicLib/LocalCartesian.hpp" #include "gtest/gtest.h" -namespace robot::experimental::learn_descriptors { +namespace robot::gps { TEST(GeographiclibTest, airport_dist) { const GeographicLib::Geodesic& geod = GeographicLib::Geodesic::WGS84(); // Distance from JFK to LHR @@ -58,4 +58,4 @@ TEST(GeographiclibTest, local_cartestian) { EXPECT_NEAR(calais_height_meters, 264.915, 1e-3); } } -} // namespace robot::experimental::learn_descriptors \ No newline at end of file +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/nmea_test.cc b/common/gps/nmea_test.cc index c1be0aca4..25b38b09f 100644 --- a/common/gps/nmea_test.cc +++ b/common/gps/nmea_test.cc @@ -2,7 +2,7 @@ #include "nmea/message/gga.hpp" #include "nmea/sentence.hpp" -namespace robot::experimental::learn_descriptors { +namespace robot::gps { TEST(NmeaTest, parse) { // Read an NMEA string from your serial port std::string nmea_string = @@ -44,4 +44,4 @@ TEST(NmeaTest, generate) { EXPECT_EQ(nmea_sentence.get_field(1), "abc"); EXPECT_EQ(nmea_sentence.get_field(2), ""); } -} // namespace robot::experimental::learn_descriptors \ No newline at end of file +} // namespace robot::gps \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index e80062e51..3f1229b82 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -1,134 +1,358 @@ -package(features=["warning_compile_flags"]) +package(features = ["warning_compile_flags"]) cc_library( - name = "learn_descriptors", - hdrs = ["learn_descriptors.hh"], - visibility = ["//visibility:public"], - srcs = ["learn_descriptors.cc"], + name = "learn_descriptors", + srcs = ["learn_descriptors.cc"], + hdrs = ["learn_descriptors.hh"], + visibility = ["//visibility:public"], ) cc_test( - name = "learn_descriptors_test", - srcs = ["learn_descriptors_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":learn_descriptors", - ] + name = "learn_descriptors_test", + srcs = ["learn_descriptors_test.cc"], + deps = [ + ":learn_descriptors", + "@com_google_googletest//:gtest_main", + ], ) cc_library( - name = "visual_odometry", - hdrs = ["visual_odometry.hh"], - visibility = ["//visibility:public"], - srcs = ["visual_odometry.cc"], - deps = [ - "@opencv//:opencv" - ] + name = "structure_from_motion", + srcs = ["structure_from_motion.cc"], + hdrs = ["structure_from_motion.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":backend", + ":frontend", + ":frame", + ":image_point", + "//common:check", + "//visualization/opencv:opencv_viz", + "@eigen", + "@gtsam", + "@opencv", + ], ) cc_library( - name = "symphony_lake_parser", - hdrs = ["symphony_lake_parser.hh"], - copts = ["-Wno-unused-parameter"], - visibility = ["//visibility:public"], - srcs = ["symphony_lake_parser.cc"], - deps = [ - "@symphony_lake_parser", - "//common:check" - ] + name = "symphony_lake_parser", + srcs = ["symphony_lake_parser.cc"], + hdrs = ["symphony_lake_parser.hh"], + copts = [ + "-Wno-unused-parameter", + ], + data = ["@symphony_lake_snippet//:files"], + visibility = ["//visibility:public"], + deps = [ + "//common:check", + "@eigen", + "@symphony_lake_parser", + ], ) cc_test( - name = "visual_odometry_test", - srcs = ["visual_odometry_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":visual_odometry" - ] + name = "symphony_lake_parser_test", + srcs = ["symphony_lake_parser_test.cc"], + copts = ["-Wno-unused-parameter"], + data = ["@symphony_lake_snippet//:files"], + deps = [ + ":symphony_lake_parser", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + ], ) cc_test( - name = "symphony_lake_parser_test", - srcs = ["symphony_lake_parser_test.cc"], - copts = ["-Wno-unused-parameter"], - data = ["@symphony_lake_snippet//:files"], - deps = [ - "@com_google_googletest//:gtest_main", - ":symphony_lake_parser", - ] + name = "gtsam_test", + srcs = ["gtsam_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + "@gtsam", + ], ) -cc_test( - name = "gtsam_test", - srcs = ["gtsam_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - ] +cc_library( + name = "image_point", + hdrs = ["image_point.hh"], + visibility = ["//visibility:public"], + deps = [ + "//common/liegroups:se3", + "@eigen", + ], ) cc_library( - name = "image_point", - hdrs = ["image_point.hh"], + name = "four_seasons_transforms", + hdrs = ["four_seasons_transforms.hh"], + srcs = ["four_seasons_transforms.cc"], + visibility = ["//visibility:public"], + deps = [ + ":four_seasons_parser_detail", + "//common/liegroups:se3", + "//common:check", + "@eigen", + ], +) + +cc_library( + name = "image_point_four_seasons", + hdrs = ["image_point_four_seasons.hh"], + srcs = ["image_point_four_seasons.cc"], + visibility = ["//visibility:public"], + deps = [ + ":image_point", + ":gps_data", + ":four_seasons_transforms", + "//common/liegroups:se3", + "//common/gps:frame_translation", + "//common:check", + "@eigen", + "@opencv", + "@geographiclib", + ] +) + +cc_library( + name = "gps_data", + hdrs = ["gps_data.hh"], + visibility = ["//visibility:public"], deps = [ - "//common/liegroups:se3", - "@opencv//:opencv", - "@eigen" + "@eigen", ] ) cc_library( - name = "four_seasons_parser", + name = "four_seasons_parser", hdrs = ["four_seasons_parser.hh"], srcs = ["four_seasons_parser.cc"], + visibility = ["//visibility:public"], deps = [ - "@eigen", - "@opencv//:opencv", - "//common/liegroups:se3", - "//common/time:robot_time", - "@com_google_absl//absl/strings:str_format", - ":image_point", - "@nmea", - "@geographiclib", - "//common:check" + + ":image_point_four_seasons", + ":four_seasons_transforms", + ":four_seasons_parser_detail", + "//common:check", + "//common/liegroups:se3", + "@eigen", + "@opencv", + ":camera_calibration" + ], +) + +cc_library( + name = "four_seasons_parser_detail", + hdrs = ["four_seasons_parser_detail.hh"], + srcs = ["four_seasons_parser_detail.cc"], + visibility = ["//visibility:public"], + deps = [ + ":camera_calibration", + ":gps_data", + "@com_google_absl//absl/strings:str_format", + "@nmea", ] ) cc_binary( - name = "four_seasons_parser_example", - srcs = ["four_seasons_parser_example.cc"], - deps = [ - ":four_seasons_parser", - "@cxxopts//:cxxopts", - "//common:check", - "@geographiclib" - ] + name = "four_seasons_parser_example", + srcs = ["four_seasons_parser_example.cc"], + deps = [ + ":four_seasons_parser", + ":gps_data", + "//common:check", + "@cxxopts", + ], ) cc_binary( - name = "four_seasons_parser_example_viz", - srcs = ["four_seasons_parser_example_viz.cc"], - deps = [ - ":four_seasons_parser", - "@cxxopts//:cxxopts", - "//common:check", - "//visualization/opencv:opencv_viz", - "@eigen", - ] + name = "four_seasons_parser_example_viz", + srcs = ["four_seasons_parser_example_viz.cc"], + deps = [ + ":four_seasons_parser", + ":frontend", + "//common:check", + "//common/gps:frame_translation", + "//visualization/opencv:opencv_viz", + "@cxxopts", + "@eigen", + ], ) cc_test( - name = "four_seasons_parser_test", - srcs = ["four_seasons_parser_test.cc"], - data = ["@four_seasons_snippet//:files"], - deps = [ - "@com_google_googletest//:gtest_main", - "//common/liegroups:se3", - "@com_google_absl//absl/strings:str_format", - "@eigen", - ":four_seasons_parser", - "@nmea", - "@geographiclib", - "//common:check" - ] + name = "four_seasons_parser_test", + srcs = ["four_seasons_parser_test.cc"], + data = ["@four_seasons_snippet//:files"], + deps = [ + ":four_seasons_parser", + ":four_seasons_parser_detail", + ":image_point_four_seasons", + "//common:check", + "//common/gps:frame_translation", + "//common/liegroups:se3", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + "@eigen", + "@geographiclib", + "@nmea", + ":camera_calibration" + ], ) + +cc_library( + name = "frontend_definitions", + hdrs = ["frontend_definitions.hh"], + visibility = ["//visibility:public"], + deps = [ + ":structure_from_motion_types", + "@gtsam", + ], +) + +cc_library( + name = "structure_from_motion_types", + hdrs = ["structure_from_motion_types.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam", + "@opencv", + ], +) + +cc_library( + name = "frame", + srcs = ["frame.cc"], + hdrs = ["frame.hh"], + visibility = ["//visibility:public"], + deps = [ + ":frontend_definitions", + "//common:check", + "@gtsam", + "@opencv", + "@eigen" + ], +) + +cc_library( + name = "spatial_test_scene", + srcs = ["spatial_test_scene.cc"], + hdrs = ["spatial_test_scene.hh"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@gtsam", + "@opencv", + "//common:check" + ], +) + +cc_library( + name = "spatial_test_scene_cube", + hdrs = ["spatial_test_scene_cube.hh"], + visibility = ["//visibility:public"], + deps = [ + ":spatial_test_scene", + "@eigen", + ], +) + +cc_binary( + name = "spatial_test_scene_cube_example", + srcs = ["spatial_test_scene_cube_example.cc"], + deps = [ + ":spatial_test_scene_cube", + "//visualization/opencv:opencv_viz", + "//common:check", + "@eigen", + "@gtsam" + ], +) + +cc_library( + name = "frontend", + srcs = ["frontend.cc"], + hdrs = ["frontend.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":frame", + ":frontend_definitions", + ":image_point_four_seasons", + ":structure_from_motion_types", + "//common:check", + "//common/geometry:camera", + "@opencv", + "@gtsam", + "@eigen", + ], +) + +cc_test( + name = "frontend_test", + srcs = ["frontend_test.cc"], + deps = [ + ":frame", + ":frontend", + ":image_point", + "@com_google_googletest//:gtest_main", + "@opencv", + "@eigen" + ], +) + +cc_library( + name = "backend", + srcs = ["backend.cc"], + hdrs = ["backend.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":structure_from_motion_types", + ":frame", + "//common:check", + "//common/geometry:camera", + "@boost//:smart_ptr", + "@gtsam", + "@opencv", + ], +) + +cc_library( + name = "camera_calibration", + hdrs = ["camera_calibration.hh"], + visibility = ["//visibility:public"], + deps = [ + "@opencv" + ] +) + +cc_binary( + name = "structure_from_motion_example", + srcs = ["structure_from_motion_example.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-function", + ], + deps = [ + ":structure_from_motion", + ":frontend", + ":backend", + ":four_seasons_parser", + ":frame", + ":image_point", + ":image_point_four_seasons", + ":spatial_test_scene_cube", + ":structure_from_motion_types", + ":camera_calibration", + "//visualization/opencv:opencv_viz", + "@boost//:smart_ptr", + "@cxxopts", + "@eigen", + "@opencv", + ], +) \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc new file mode 100644 index 000000000..37a9c7a4d --- /dev/null +++ b/experimental/learn_descriptors/backend.cc @@ -0,0 +1,342 @@ +#include "experimental/learn_descriptors/backend.hh" + +#include +#include +#include +#include +#include +#include + +#include "boost/make_shared.hpp" +#include "common/check.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "gtsam/geometry/triangulation.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/navigation/GPSFactor.h" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/ProjectionFactor.h" + +namespace robot::experimental::learn_descriptors { + +std::optional Backend::attempt_triangulate( + const std::vector& cam_poses, const std::vector& cam_obs, + gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error, const bool verbose) { + gtsam::Point3 p_lmk_in_world; + if (cam_poses.size() > 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_lmk_in_world = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + } catch (const std::exception& e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate or discard + if (verbose) { + std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: " + << e.what() << ". Discarding involved keypoints." << std::endl; + } + return std::nullopt; + } + } else { + return std::nullopt; + } + // Optional: perform an explicit cheirality check + for (const auto& pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } + + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto& pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[attempt_triangulate] point behind camera " << i + << " (z=" << p_cam.z() << ")\n"; + } + return std::nullopt; + } + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[attempt_triangulate] reprojection error too large on view " << i + << " (" << err << " px)\n"; + } + return std::nullopt; + } + } + } + return p_lmk_in_world; +} + +gtsam::Rot3 average_rotations(const std::vector& rotations, int max_iter = 10) { + if (rotations.empty()) throw std::runtime_error("No rotations to average"); + if (rotations.size() == 1) return rotations.front(); + + gtsam::Rot3 mean = rotations[0]; + + for (int iter = 0; iter < max_iter; ++iter) { + Eigen::Vector3d total = Eigen::Vector3d::Zero(); + for (const gtsam::Rot3& R : rotations) { + gtsam::Rot3 delta = mean.between(R); // R_mean^T * R + total += gtsam::Rot3::Logmap(delta); // Lie algebra element in R³ + } + total /= static_cast(rotations.size()); + mean = mean.compose(gtsam::Rot3::Expmap(total)); // update estimate + } + + return mean; +} + +void Backend::populate_rotation_estimate(std::vector& shared_frames) { + struct FrameVertex; + using SharedFrameVertex = std::shared_ptr; + struct FrameVertex { + Frame& frame; + std::optional w_from_this; + std::unordered_map + neighbor_id_to_rot; // rot is frame_from_neighbor_frame + std::unordered_set parents; + std::unordered_set children; + }; + std::unordered_map frame_tree; + for (SharedFrame& shared_frame : shared_frames) { + frame_tree.emplace(shared_frame->id_, std::make_shared(*shared_frame)); + } + // populate all frames and add children to neighbors + std::queue q; // indices of Frame in frames + std::unordered_set seen_frame_id; + q.push(shared_frames.front()->id_); + + gtsam::NonlinearFactorGraph graph; + gtsam::Values initial; + // TODO: somehow get more informative/more robust noise values + // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength + gtsam::noiseModel::Diagonal::shared_ptr noise_rotation = + gtsam::noiseModel::Isotropic::Sigma(3, 0.1); // radians + + std::vector symbols_frames; + while (!q.empty()) { + Frame frame = *shared_frames[q.front()]; + q.pop(); + seen_frame_id.insert(frame.id_); + SharedFrameVertex frame_vertex = frame_tree.at(frame.id_); + if (seen_frame_id.size() == 1) { // make the first frame rotation identity unless the + // initial guess is set otherwise + frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ + ? *frame_vertex->frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(); + graph.add(gtsam::PriorFactor( + gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_), + *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3))); + } + frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; + std::cout << frame.to_string() << std::endl; + + const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); + symbols_frames.push_back(symbol_frame); + initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_); + for (const auto& [other_frame_id, frame_from_other_frame] : + frame.frame_from_other_frames_) { + if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) { + q.push(other_frame_id); + seen_frame_id.insert(other_frame_id); + } + frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); + SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id); + frame_vertex->children.insert(other_frame_vertex); + other_frame_vertex->parents.insert(frame_vertex); + other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_, + frame_from_other_frame.inverse()); + if (!other_frame_vertex->w_from_this) { + other_frame_vertex->w_from_this = + *frame_vertex->w_from_this * frame_from_other_frame; + other_frame_vertex->frame.world_from_cam_initial_guess_ = + other_frame_vertex->w_from_this; + } + + graph.add(gtsam::BetweenFactor( + symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id), + frame_from_other_frame.inverse(), noise_rotation)); + } + frame_tree.emplace(frame.id_, frame_vertex); + } + + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + // Output results and assign results to frames + for (const gtsam::Symbol& symbol_rotation : symbols_frames) { + gtsam::Rot3 w_from_frame = result.at(symbol_rotation); + frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; + } +} + +void Backend::calculate_initial_values(bool interpolate_gps) { + populate_rotation_estimate(shared_frames_); + if (interpolate_gps) { + std::vector frames_with_gps; + for (SharedFrame& shared_frame : shared_frames_) { // this assumes that frames are ordered + // by seq (time), later on may have to + // think about this more if trying to run pipeline on unordered images or sets + // whose capture times are on different days + if (shared_frame->cam_in_world_initial_guess_) { + frames_with_gps.push_back(shared_frame); + } + } + if (frames_with_gps.size() < 2) return; + size_t idx_gps = 0; + for (SharedFrame& shared_frame : shared_frames_) { + // advance to next guess window if needed + while (idx_gps + 1 < frames_with_gps.size() - 1 && + shared_frame->seq_ > frames_with_gps[idx_gps + 1]->seq_) { + idx_gps++; + } + + if (shared_frame->cam_in_world_initial_guess_) { + continue; + } + ROBOT_CHECK( + shared_frame->cam_in_world_interpolated_guess_, + "Currently assuming that the frontend populated interpolated initial guesses"); + Eigen::Matrix3d interpolated_covariance( + *shared_frame->translation_covariance_in_cam_ * + 100.0); // very jank for now, not true interpolation + shared_frame->translation_covariance_in_cam_ = interpolated_covariance; + shared_frame->cam_in_world_initial_guess_ = + shared_frame->cam_in_world_interpolated_guess_; + } + } +} + +void Backend::populate_graph(const FeatureTracks& feature_tracks) { + ROBOT_CHECK(shared_frames_.size() > 3); + ROBOT_CHECK(shared_frames_.front()->world_from_cam_groundtruth_, + "We're assuming the first camera has groundtruth for now"); + + shared_frames_.front()->world_from_cam_initial_guess_ = + shared_frames_.front() + ->world_from_cam_groundtruth_->rotation(); // align rotation with groundtruth + + Backend::populate_rotation_estimate(shared_frames_); + + // add first cam as prior + Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_; + const gtsam::Pose3 w_from_cam0_init_estimate( + *shared_frames_.front()->world_from_cam_initial_guess_, gtsam::Point3{0, 0, 0}); + const gtsam::Symbol symbol_cam0(symbol_char_pose, 0); + graph_.add(gtsam::PriorFactor( + symbol_cam0, w_from_cam0_init_estimate, + noise_tight_prior_)); // currently assuming that we have groundtruth on the first pose + initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate); + + // add gps factors and populate initial pose values + for (size_t i = 1; i < shared_frames_.size(); i++) { + const Frame& frame = *shared_frames_[i]; + gtsam::Pose3 world_from_cam_estimate( + frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(), + *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w); + world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate); + + const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_); + initial_estimate_.insert(cam_symbol, world_from_cam_initial_estimates_[frame.id_]); + if (frame.cam_in_world_initial_guess_) { + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( + frame.translation_covariance_in_cam_ + ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), + std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), + std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) + : gps_sigmas_fallback_); + std::cout << "gps noise " << i << gps_noise->sigmas() << std::endl; + graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); + } + } + + // add landmarks + for (size_t i = 0; i < feature_tracks.size(); i++) { + std::vector world_from_lmk_cams; + std::vector lmk_observations; + for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) { + world_from_lmk_cams.push_back(world_from_cam_initial_estimates_[frame_id]); + lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y); + } + std::optional landmark_estimate = + attempt_triangulate(world_from_lmk_cams, lmk_observations, shared_frames_[0]->K_, 2.0, + false); // all K are the same for now... + if (landmark_estimate) { + const gtsam::Symbol lmk_symbol(symbol_char_landmark, i); + for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) { + graph_.add(gtsam::GenericProjectionFactor( + gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_, + gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, + shared_frames_[0]->K_)); // all K are the same for now... + } + initial_estimate_.insert(lmk_symbol, *landmark_estimate); + lmk_initial_estimates_.emplace(i, *landmark_estimate); + } + } +} + +void Backend::solve_graph(const int num_epochs, + std::optional iter_debug_func) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); + + double prev_error = optimizer.error(); + double lowest_error = prev_error; + gtsam::Values best_values = optimizer.values(); + for (int i = 0; i < num_epochs; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + if (curr_error < lowest_error) { + lowest_error = curr_error; + best_values = optimizer.values(); + } + + if (iter_debug_func) { + (*iter_debug_func)(optimizer.values(), i); + } + // if (std::abs(prev_error - curr_error) < 1e-6) { + // std::cout << "Converged at iteration " << i << "\n"; + // break; + // } + } + result_ = best_values; +} + +void Backend::clear() { + shared_frames_.clear(); + shared_frames_.shrink_to_fit(); + + initial_estimate_ = gtsam::Values(); + result_ = gtsam::Values(); + graph_ = gtsam::NonlinearFactorGraph(); + + std::unordered_map().swap(world_from_cam_initial_estimates_); + std::unordered_map().swap(lmk_initial_estimates_); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh new file mode 100644 index 000000000..6f9a8c0c7 --- /dev/null +++ b/experimental/learn_descriptors/backend.hh @@ -0,0 +1,79 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" + +namespace robot::experimental::learn_descriptors { +class Backend { + public: + static constexpr char symbol_char_pose = 'x'; + static constexpr char symbol_char_rotation = 'r'; + static constexpr char symbol_char_translation = 't'; + static constexpr char symbol_char_bearing = 'b'; + static constexpr char symbol_char_landmark = 'l'; + static constexpr char symbol_char_cam_cal = 'k'; + + static std::optional attempt_triangulate( + const std::vector &cam_poses, const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0, + const bool verbose = true); + static gtsam::Rot3 average_rotations(const std::vector &rotations, + int max_iter = 10); + // use rotation averaging to set the rotation initial guess for each frame in the world frame. + // the world frame will be the first frame in the vector + static void populate_rotation_estimate(std::vector &shared_frames); + + void add_frames(std::vector &shared_frames) { + shared_frames_.reserve(shared_frames_.size() + shared_frames.size()); + shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end()); + }; + void calculate_initial_values(bool interpolate_gps = true); + void populate_graph(const FeatureTracks &feature_tracks); + typedef int epoch; + using graph_step_debug_func = std::function; + void solve_graph(const int num_epochs, + std::optional iter_debug_func = std::nullopt); + void clear(); + + const std::vector &shared_frames() const { return shared_frames_; }; + const gtsam::Values ¤t_initial_values() const { return initial_estimate_; }; + const gtsam::Values &result() const { return result_; }; + + private: + std::vector shared_frames_; + + gtsam::Values initial_estimate_; + gtsam::Values result_; + gtsam::NonlinearFactorGraph graph_; + + std::unordered_map world_from_cam_initial_estimates_; + std::unordered_map lmk_initial_estimates_; + + gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior_ = + gtsam::noiseModel::Diagonal::Sigmas((gtsam::Vector(6) << 1e-3, 1e-3, + 1e-3, // rotation stdev in radians + 1e-3, 1e-3, 1e-3 // translation stdev in meters + ) + .finished()); + gtsam::Vector3 gps_sigmas_fallback_{0.5, 0.5, 0.5}; + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 10.0); +}; +using Landmark = gtsam::Point3; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh new file mode 100644 index 000000000..610dd2d8d --- /dev/null +++ b/experimental/learn_descriptors/camera_calibration.hh @@ -0,0 +1,22 @@ +#pragma once + +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +struct CameraCalibrationFisheye { + CameraCalibrationFisheye(double fx, double fy, double cx, double cy, double k1, double k2, + double k3, double k4) + : fx(fx), fy(fy), cx(cx), cy(cy), k1(k1), k2(k2), k3(k3), k4(k4) {} + double fx, fy, cx, cy, k1, k2, k3, k4; + + cv::Mat k_mat() { + cv::Mat K_mat = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + return K_mat; + } + + cv::Mat d_mat() { + cv::Mat D_mat = (cv::Mat_(4, 1) << k1, k2, k3, k4); + return D_mat; + } +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index fdf76afde..61ef35e18 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -1,39 +1,29 @@ #include "experimental/learn_descriptors/four_seasons_parser.hh" -#include #include -#include -#include #include #include -#include +#include #include #include -#include #include #include #include "Eigen/Core" #include "Eigen/Geometry" -#include "GeographicLib/Geocentric.hpp" -#include "GeographicLib/LocalCartesian.hpp" -#include "absl/strings/str_split.h" -#include "absl/strings/strip.h" -#include "common/check.hh" #include "common/liegroups/se3.hh" -#include "common/time/robot_time.hh" -#include "nmea/message/gga.hpp" -#include "nmea/message/rmc.hpp" -#include "nmea/sentence.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" namespace robot::experimental::learn_descriptors { -using namespace detail; +using namespace detail::four_seasons_parser; FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path& calibration_dir) : root_dir_(root_dir), img_dir_(root_dir / "distorted_images" / "cam0"), cal_(txt_parser_help::load_camera_calibration(calibration_dir)), - transforms_(root_dir / "Transformations.txt") { + shared_transforms_(std::make_shared( + root_dir / "Transformations.txt")) { const std::filesystem::path path_img = root_dir_ / "times.txt"; const std::filesystem::path path_gnss = root_dir_ / "GNSSPoses.txt"; const std::filesystem::path path_vio = root_dir_ / "result.txt"; @@ -51,8 +41,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, size_t id = 0; for (const std::pair>& pair_time_data : img_time_list) { const size_t time_key = pair_time_data.first; - ImagePoint img_pt; + ImagePointFourSeasons img_pt; img_pt.id = id; + img_pt.K = cal_; + img_pt.shared_static_transforms = shared_transforms_; img_pt.seq = std::stoull( pair_time_data.second[static_cast(txt_parser_help::ImgIdx::TIME_NS)]); if (gnss_poses_time_map.find(time_key) != gnss_poses_time_map.end()) { @@ -76,9 +68,6 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Z)])); img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w); - } else { - std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id - << std::endl; } if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) { const std::vector& parsed_line_vio = vio_poses_time_map.at(time_key); @@ -94,29 +83,27 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_vio[static_cast(txt_parser_help::ResultIdx::QUAT_Z)])); img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam); - } else { - std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id - << std::endl; } + img_pt_vector_.push_back(img_pt); id++; } // popoulate gps to nearest img time // TODO: could be linear time... but good enough for (const auto& [time_unix_ns, gps_data] : gps_time_list) { - auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns, - [](const ImagePoint& img_pt, const size_t query_unix_time) { - return img_pt.seq < query_unix_time; - }); + auto it = + std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns, + [](const ImagePointFourSeasons& img_pt, const size_t query_unix_time) { + return img_pt.seq < query_unix_time; + }); size_t insert_idx = std::distance(img_pt_vector_.begin(), it); if (it != img_pt_vector_.begin() && - detail::abs_diff(it->seq, time_unix_ns) > - detail::abs_diff(std::prev(it)->seq, time_unix_ns)) { + abs_diff(it->seq, time_unix_ns) > abs_diff(std::prev(it)->seq, time_unix_ns)) { insert_idx--; } // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some // way. maybe to help with interpolation, estimate velocity - if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) < + if (abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) < FourSeasonsParser::CAM_CAP_DELTA_NS) { img_pt_vector_[insert_idx].gps_gcs = gps_data; } @@ -126,284 +113,4 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, cv::Mat FourSeasonsParser::load_image(const size_t m) const { return get_image_point(m).load_image(img_dir_); } - -FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms( - const std::filesystem::path& path_transforms) { - std::ifstream file_transforms(path_transforms); - std::string line; - while (std::getline(file_transforms, line)) { - if (line.find("transform_S_AS") != std::string::npos) { - std::getline(file_transforms, line); - S_from_AS = get_transform_from_line(line); - } else if (line.find("TS_cam_imu") != std::string::npos) { - std::getline(file_transforms, line); - cam_from_imu = get_transform_from_line(line); - } else if (line.find("transform_w_gpsw") != std::string::npos) { - std::getline(file_transforms, line); - w_from_gpsw = get_transform_from_line(line); - } else if (line.find("transform_gps_imu") != std::string::npos) { - std::getline(file_transforms, line); - gps_from_imu = get_transform_from_line(line); - } else if (line.find("transform_e_gpsw") != std::string::npos) { - std::getline(file_transforms, line); - e_from_gpsw = get_transform_from_line(line); - } else if (line.find("GNSS scale") != std::string::npos) { - std::getline(file_transforms, line); - gnss_scale = std::stod(line); - } - } -} - -liegroups::SE3 FourSeasonsParser::FourSeasonsTransforms::get_transform_from_line( - const std::string& line) { - enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W }; - std::vector parsed_transform_line = txt_parser_help::parse_line_adv(line, ","); - if (parsed_transform_line.size() < 7) { - std::stringstream error_stream; - error_stream << "parsed_transform_line doesn't have sufficient entries for " - "transform! parsed_transform_line.size(): " - << parsed_transform_line.size() << std::endl; - throw std::runtime_error(error_stream.str()); - } - std::vector transform_nums; - for (const std::string& num : parsed_transform_line) { - transform_nums.push_back(static_cast(std::stod(num))); - } - Eigen::Vector3d translation(transform_nums[TransformEntry::T_X], - transform_nums[TransformEntry::T_Y], - transform_nums[TransformEntry::T_Z]); - Eigen::Quaterniond rotation( - transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X], - transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]); - return liegroups::SE3(rotation, translation); -} - -Eigen::Vector3d FourSeasonsParser::gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF) { - static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); - - double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z(); - - double lat_deg, lon_deg, alt_m; - earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m); - - return Eigen::Vector3d(lat_deg, lon_deg, alt_m); -} - -Eigen::Vector3d FourSeasonsParser::ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate) { - double lat_deg = gcs_coordinate.x(); - double lon_deg = gcs_coordinate.y(); - double alt_m = gcs_coordinate.z(); - - double x, y, z; - - static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); - earth.Forward(lat_deg, lon_deg, alt_m, x, y, z); - - return Eigen::Vector3d(x, y, z); -} - -namespace detail { -namespace txt_parser_help { - -std::vector parse_line_adv(const std::string& line, const std::string& delim) { - if (delim == " ") { - return std::vector( - absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace())); - } - return std::vector(absl::StrSplit(line, delim, absl::SkipWhitespace())); -} - -FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration( - const std::filesystem::path& calibration_dir) { - std::ifstream file_calibration(calibration_dir / "calib_0.txt"); - std::string line_calibration; - std::getline(file_calibration, line_calibration); - std::vector parsed_calib_line = parse_line_adv(line_calibration, " "); - return FourSeasonsParser::CameraCalibrationFisheye{ - std::stod(parsed_calib_line[static_cast(CalibIdx::FX)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::FY)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::CX)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::CY)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K1)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K2)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K3)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K4)])}; -} - -// minimum sig figs of time in seconds (of type double) for all entries of results.txt -size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) { - std::ifstream file_vio(path_vio); - std::string line; - int min_figs = INT_MAX; - while (std::getline(file_vio, line)) { - const std::vector parsed_line = parse_line_adv(line, " "); - const std::string str_time_seconds = parsed_line[static_cast(ResultIdx::TIME_SEC)]; - const int figs = - str_time_seconds.size() + - (str_time_seconds.find('.') != std::string::npos ? -1 : 0); // '.' is not a fig - min_figs = std::min(min_figs, figs); - } - return min_figs; -} - -const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, - const size_t time_sig_figs) { - TimeDataList time_map_img; - std::ifstream file_img(path_img); - std::string line; - while (std::getline(file_img, line)) { - std::vector parsed_line = parse_line_adv(line, " "); - size_t time_ns = std::stoull(parsed_line[static_cast(ImgIdx::TIME_NS)]); - time_map_img.push_back( - std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line)); - } - return time_map_img; -} - -const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, - const size_t time_sig_figs) { - TimeDataMap time_map_gnss_poses; - std::ifstream file_gnss_poses(path_gnss); - std::string line; - std::getline(file_gnss_poses, - line); // advance a line to get past the top comment in GNSSPoses.txt - while (std::getline(file_gnss_poses, line)) { - std::vector parsed_line = parse_line_adv(line, ","); - size_t time_ns = std::stoull(parsed_line[static_cast(GPSIdx::TIME_NS)]); - time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); - } - return time_map_gnss_poses; -} - -const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, - const size_t time_sig_figs) { - TimeDataMap time_map_vio; - std::ifstream file_vio(path_vio); - std::string line; - while (std::getline(file_vio, line)) { - std::vector parsed_line = parse_line_adv(line, " "); - double time_seconds = std::stod(parsed_line[static_cast(ResultIdx::TIME_SEC)]); - size_t time_ns = time_seconds * 1e9; - time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); - } - return time_map_vio; -} -} // namespace txt_parser_help - -namespace gps_parser_help { -size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) { - std::chrono::sys_days date = std::chrono::year_month_day{ - std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month}, - std::chrono::day{utc_date.day}}; - std::chrono::nanoseconds utc_time_day_ns = - std::chrono::nanoseconds{static_cast(utc_time_day_seconds * 1e9)}; - std::chrono::sys_time timestamp = date + utc_time_day_ns; - return timestamp.time_since_epoch().count(); -} -TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { - TimeGPSList time_list_gps; - std::ifstream file_gps(path_gps); - std::string line; - std::optional nmea_sentence; - nmea::date date_last; - date_last.day = 255; - date_last.month = 255; - date_last.year = 255; - double time_of_day_last = -1.0; - while (std::getline(file_gps, line) && !line.empty()) { - try { - nmea_sentence = nmea::sentence(line); - } catch (const std::exception& e) { - std::cerr << "failed to parse line as nmea sentence: " << e.what() - << "\ncontinuing to next line\n"; - continue; - } - if (nmea_sentence->type() == "GGA") { - nmea::gga gga(*nmea_sentence); - if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { - if (std::abs(gga.utc.get() - time_of_day_last) < - 1e-3) { // GGA messages for this dataset come second - ImagePoint::GPSData gps_data; - gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); - gps_data.latitude = gga.latitude.get(); - gps_data.longitude = gga.longitude.get(); - if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); - time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data)); - } - } - } else if (nmea_sentence->type() == "RMC") { - nmea::rmc rmc(*nmea_sentence); - if (rmc.utc.exists() && rmc.date.exists()) { - date_last = rmc.date.get(); - time_of_day_last = rmc.utc.get(); - } - } else if (nmea_sentence->type() == "GST") { - std::optional gst_data = parse_gpgst(nmea_sentence->nmea_string()); - if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) < - 1e-3) { // GST message for this dataset come third - size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns); - if (time_list_gps.back().first == unix_time_ns) { - time_list_gps.back().second.uncertainty.emplace( - gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m, - gst_data->error_orientation_deg, gst_data->rms_range_error_m); - } - } - } - } - return time_list_gps; -} - -std::vector split_nmea_sentence(const std::string& sentence) { - std::vector fields; - std::string field; - std::stringstream ss(sentence); - - while (std::getline(ss, field, ',')) { - // remove checksum from the last field if present - auto asterisk = field.find('*'); - if (asterisk != std::string::npos) field = field.substr(0, asterisk); - fields.push_back(field); - } - - return fields; -} -double time_of_day_seconds(const double utc_time_hhmmss) { - int hours = static_cast(utc_time_hhmmss / 10000); - int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); - double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; - return hours * 3600 + minutes * 60 + seconds; -} -std::optional parse_gpgst(const std::string& sentence) { - if (sentence.substr(0, 6) != "$GPGST") { - return std::nullopt; - } - - std::vector fields = split_nmea_sentence(sentence); - if (fields.size() != 9) { - return std::nullopt; - } - for (size_t i = 0; i < fields.size(); i++) { - if (fields[i].empty()) return std::nullopt; - } - - GSTData gst; - gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1])); - gst.rms_range_error_m = std::stod(fields[2]); - gst.error_semi_major_m = std::stod(fields[3]); - gst.error_semi_minor_m = std::stod(fields[4]); - gst.error_orientation_deg = std::stod(fields[5]); - gst.sigma_lat_m = std::stod(fields[6]); - gst.sigma_lon_m = std::stod(fields[7]); - gst.sigma_alt_m = std::stod(fields[8]); - - return gst; -} - -} // namespace gps_parser_help - -template -size_t abs_diff(const T& a, const T& b) { - return a > b ? a - b : b - a; -} -} // namespace detail } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh index 9709bc396..4458215c8 100644 --- a/experimental/learn_descriptors/four_seasons_parser.hh +++ b/experimental/learn_descriptors/four_seasons_parser.hh @@ -1,144 +1,56 @@ #pragma once -#include - #include #include +#include #include #include #include #include "common/liegroups/se3.hh" -#include "experimental/learn_descriptors/image_point.hh" -#include "nmea/message/gga.hpp" -#include "nmea/message/rmc.hpp" -#include "nmea/sentence.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" +#include "experimental/learn_descriptors/image_point_four_seasons.hh" namespace robot::experimental::learn_descriptors { -class FourSeasonsParser { +class FourSeasonsParser : std::enable_shared_from_this { public: static constexpr double CAM_HZ = 30.0; static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ; - struct CameraCalibrationFisheye { - double fx, fy, cx, cy, k1, k2, k3, k4; - }; + FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path& calibration_dir); + std::shared_ptr get_shared() { return shared_from_this(); } cv::Mat load_image(const size_t idx) const; - const ImagePoint& get_image_point(const size_t idx) const { return img_pt_vector_[idx]; }; + const ImagePointFourSeasons& get_image_point(const size_t idx) const { + return img_pt_vector_[idx]; + }; size_t num_images() const { return img_pt_vector_.size(); }; - const CameraCalibrationFisheye& camera_calibration() const { return cal_; }; + const std::shared_ptr& camera_calibration() const { return cal_; }; const liegroups::SE3& S_from_AS() const { - return transforms_.S_from_AS; + return shared_transforms_->S_from_AS; }; // metric scale from arbitrary (internal slam) scale - const liegroups::SE3& cam_from_imu() const { return transforms_.cam_from_imu; }; + const liegroups::SE3& cam_from_imu() const { return shared_transforms_->cam_from_imu; }; const liegroups::SE3& w_from_gpsw() const { - return transforms_.w_from_gpsw; + return shared_transforms_->w_from_gpsw; }; // visual world from local gps (ENU) const liegroups::SE3& gps_from_imu() const { - return transforms_.gps_from_imu; + return shared_transforms_->gps_from_imu; }; // phsyical onboard gps from physical onboard imu const liegroups::SE3& e_from_gpsw() const { - return transforms_.e_from_gpsw; + return shared_transforms_->e_from_gpsw; }; // ECEF from local gps (ENU) double gnss_scale() const { - return transforms_.gnss_scale; + return shared_transforms_->gnss_scale; }; // scale from vio frame to gnss frame. WARNING: will require retooling if the scales per // keyframe (pose) are not all one value. See more here: // https://github.com/pmwenzel/4seasons-dataset - static Eigen::Vector3d gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF); - static Eigen::Vector3d ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate); - protected: - struct FourSeasonsTransforms { - liegroups::SE3 S_from_AS; - liegroups::SE3 cam_from_imu; - liegroups::SE3 w_from_gpsw; - liegroups::SE3 gps_from_imu; - liegroups::SE3 e_from_gpsw; - double gnss_scale; - - FourSeasonsTransforms(const std::filesystem::path& path_transforms); - - private: - static liegroups::SE3 get_transform_from_line(const std::string& line); - }; const std::filesystem::path root_dir_; const std::filesystem::path img_dir_; - const CameraCalibrationFisheye cal_; - const FourSeasonsTransforms transforms_; - ImagePointVector img_pt_vector_; -}; - -namespace detail { -template -std::size_t abs_diff(const T& a, const T& b); -namespace txt_parser_help { -using TimeDataMap = std::unordered_map>; -using TimeDataList = std::vector>>; -enum class GPSIdx { - TIME_NS = 0, - TRAN_X = 1, - TRAN_Y = 2, - TRAN_Z = 3, - QUAT_X = 4, - QUAT_Y = 5, - QUAT_Z = 6, - QUAT_W = 7, -}; -enum class ImgIdx { - TIME_NS = 0, - TIME_SEC = 1, - EXPOSURE_TIME = 2 // I'm not completely sure if this is correct - Nico -}; -enum class ResultIdx { - TIME_SEC = 0, - TRAN_X = 1, - TRAN_Y = 2, - TRAN_Z = 3, - QUAT_X = 4, - QUAT_Y = 5, - QUAT_Z = 6, - QUAT_W = 7, -}; -enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 }; -std::vector parse_line_adv(const std::string& line, const std::string& delim = " "); -FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration( - const std::filesystem::path& calibration_dir); -template -T round_to_sig_figs(T val, int n) { - if (val == 0) return 0; - double d = static_cast(val); - int exponent = static_cast(std::floor(std::log10(std::abs(d)))); - double multiplier = std::pow(10.0, n - exponent - 1); - return static_cast(std::round(d * multiplier) / multiplier); -} -size_t min_sig_figs_result_time(const std::filesystem::path& path_vio); -const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, - const size_t time_sig_figs); -const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, - const size_t time_sig_figs); -const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, - const size_t time_sig_figs); -} // namespace txt_parser_help -namespace gps_parser_help { -using TimeGPSList = std::vector>; -struct GSTData { - double utc_time_ns; - double rms_range_error_m; - double error_semi_major_m; - double error_semi_minor_m; - double error_orientation_deg; - double sigma_lat_m; - double sigma_lon_m; - double sigma_alt_m; + const std::shared_ptr cal_; + const std::shared_ptr shared_transforms_; + ImagePointFourSeasonsVector img_pt_vector_; }; -std::vector split_nmea_sentence(const std::string& nmea_sentence); -double time_of_day_seconds(const double utc_time_hhmmss); -std::optional parse_gpgst(const std::string& nmea_sentence); -size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds); -TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps); -} // namespace gps_parser_help -} // namespace detail } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc new file mode 100644 index 000000000..b41e71113 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc @@ -0,0 +1,220 @@ +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" +#include "nmea/message/gga.hpp" +#include "nmea/message/rmc.hpp" +#include "nmea/sentence.hpp" + +namespace robot::experimental::learn_descriptors::detail::four_seasons_parser { +namespace txt_parser_help { + +std::vector parse_line_adv(const std::string& line, const std::string& delim) { + if (delim == " ") { + return std::vector( + absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace())); + } + return std::vector(absl::StrSplit(line, delim, absl::SkipWhitespace())); +} + +std::shared_ptr load_camera_calibration( + const std::filesystem::path& calibration_dir) { + std::ifstream file_calibration(calibration_dir / "calib_0.txt"); + std::string line_calibration; + std::getline(file_calibration, line_calibration); + std::vector parsed_calib_line = parse_line_adv(line_calibration, " "); + return std::make_shared( + std::stod(parsed_calib_line[static_cast(CalibIdx::FX)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::FY)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::CX)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::CY)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K1)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K2)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K3)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K4)])); +} + +// minimum sig figs of time in seconds (of type double) for all entries of results.txt +size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) { + std::ifstream file_vio(path_vio); + std::string line; + int min_figs = INT_MAX; + while (std::getline(file_vio, line)) { + const std::vector parsed_line = parse_line_adv(line, " "); + const std::string str_time_seconds = parsed_line[static_cast(ResultIdx::TIME_SEC)]; + const int figs = + str_time_seconds.size() + + (str_time_seconds.find('.') != std::string::npos ? -1 : 0); // '.' is not a fig + min_figs = std::min(min_figs, figs); + } + return min_figs; +} + +const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, + const size_t time_sig_figs) { + TimeDataList time_map_img; + std::ifstream file_img(path_img); + std::string line; + while (std::getline(file_img, line)) { + std::vector parsed_line = parse_line_adv(line, " "); + size_t time_ns = std::stoull(parsed_line[static_cast(ImgIdx::TIME_NS)]); + time_map_img.push_back( + std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line)); + } + return time_map_img; +} + +const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, + const size_t time_sig_figs) { + TimeDataMap time_map_gnss_poses; + std::ifstream file_gnss_poses(path_gnss); + std::string line; + std::getline(file_gnss_poses, + line); // advance a line to get past the top comment in GNSSPoses.txt + while (std::getline(file_gnss_poses, line)) { + std::vector parsed_line = parse_line_adv(line, ","); + size_t time_ns = std::stoull(parsed_line[static_cast(GPSIdx::TIME_NS)]); + time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); + } + return time_map_gnss_poses; +} + +const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, + const size_t time_sig_figs) { + TimeDataMap time_map_vio; + std::ifstream file_vio(path_vio); + std::string line; + while (std::getline(file_vio, line)) { + std::vector parsed_line = parse_line_adv(line, " "); + double time_seconds = std::stod(parsed_line[static_cast(ResultIdx::TIME_SEC)]); + size_t time_ns = time_seconds * 1e9; + time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); + } + return time_map_vio; +} +} // namespace txt_parser_help + +namespace gps_parser_help { + +size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) { + std::chrono::sys_days date = std::chrono::year_month_day{ + std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month}, + std::chrono::day{utc_date.day}}; + std::chrono::nanoseconds utc_time_day_ns = + std::chrono::nanoseconds{static_cast(utc_time_day_seconds * 1e9)}; + std::chrono::sys_time timestamp = date + utc_time_day_ns; + return timestamp.time_since_epoch().count(); +} + +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { + TimeGPSList time_list_gps; + std::ifstream file_gps(path_gps); + std::string line; + std::optional nmea_sentence; + nmea::date date_last; + date_last.day = 255; + date_last.month = 255; + date_last.year = 255; + double time_of_day_last = -1.0; + while (std::getline(file_gps, line) && !line.empty()) { + try { + nmea_sentence = nmea::sentence(line); + } catch (const std::exception& e) { + std::cerr << "failed to parse line as nmea sentence: " << e.what() + << "\ncontinuing to next line\n"; + continue; + } + if (nmea_sentence->type() == "GGA") { + nmea::gga gga(*nmea_sentence); + if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { + if (std::abs(gga.utc.get() - time_of_day_last) < + 1e-3) { // GGA messages for this dataset come second + GPSData gps_data; + gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); + gps_data.latitude = gga.latitude.get(); + gps_data.longitude = gga.longitude.get(); + if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); + time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data)); + } + } + } else if (nmea_sentence->type() == "RMC") { + nmea::rmc rmc(*nmea_sentence); + if (rmc.utc.exists() && rmc.date.exists()) { + date_last = rmc.date.get(); + time_of_day_last = rmc.utc.get(); + } + } else if (nmea_sentence->type() == "GST") { + std::optional gst_data = parse_gpgst(nmea_sentence->nmea_string()); + if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) < + 1e-3) { // GST message for this dataset come third + size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns); + if (time_list_gps.back().first == unix_time_ns) { + time_list_gps.back().second.uncertainty.emplace( + gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m, + gst_data->error_orientation_deg, gst_data->rms_range_error_m); + } + } + } + } + return time_list_gps; +} + +std::vector split_nmea_sentence(const std::string& sentence) { + std::vector fields; + std::string field; + std::stringstream ss(sentence); + + while (std::getline(ss, field, ',')) { + // remove checksum from the last field if present + auto asterisk = field.find('*'); + if (asterisk != std::string::npos) field = field.substr(0, asterisk); + fields.push_back(field); + } + + return fields; +} + +double time_of_day_seconds(const double utc_time_hhmmss) { + int hours = static_cast(utc_time_hhmmss / 10000); + int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); + double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; + return hours * 3600 + minutes * 60 + seconds; +} + +std::optional parse_gpgst(const std::string& sentence) { + if (sentence.substr(0, 6) != "$GPGST") { + return std::nullopt; + } + + std::vector fields = split_nmea_sentence(sentence); + if (fields.size() != 9) { + return std::nullopt; + } + for (size_t i = 0; i < fields.size(); i++) { + if (fields[i].empty()) return std::nullopt; + } + + GSTData gst; + gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1])); + gst.rms_range_error_m = std::stod(fields[2]); + gst.error_semi_major_m = std::stod(fields[3]); + gst.error_semi_minor_m = std::stod(fields[4]); + gst.error_orientation_deg = std::stod(fields[5]); + gst.sigma_lat_m = std::stod(fields[6]); + gst.sigma_lon_m = std::stod(fields[7]); + gst.sigma_alt_m = std::stod(fields[8]); + + return gst; +} +} // namespace gps_parser_help +} // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.hh b/experimental/learn_descriptors/four_seasons_parser_detail.hh new file mode 100644 index 000000000..0f0c30ad5 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_parser_detail.hh @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/gps_data.hh" +#include "nmea/object/date.hpp" + +namespace robot::experimental::learn_descriptors::detail::four_seasons_parser { +template +std::size_t abs_diff(const T& a, const T& b) { + return a > b ? a - b : b - a; +}; +namespace txt_parser_help { +using TimeDataMap = std::unordered_map>; +using TimeDataList = std::vector>>; +enum class GPSIdx { + TIME_NS = 0, + TRAN_X = 1, + TRAN_Y = 2, + TRAN_Z = 3, + QUAT_X = 4, + QUAT_Y = 5, + QUAT_Z = 6, + QUAT_W = 7, +}; +enum class ImgIdx { + TIME_NS = 0, + TIME_SEC = 1, + EXPOSURE_TIME = 2 // I'm not completely sure if this is correct - Nico +}; +enum class ResultIdx { + TIME_SEC = 0, + TRAN_X = 1, + TRAN_Y = 2, + TRAN_Z = 3, + QUAT_X = 4, + QUAT_Y = 5, + QUAT_Z = 6, + QUAT_W = 7, +}; +enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 }; +std::vector parse_line_adv(const std::string& line, const std::string& delim = " "); +std::shared_ptr load_camera_calibration( + const std::filesystem::path& calibration_dir); +template +T round_to_sig_figs(T val, int n) { + if (val == 0) return 0; + double d = static_cast(val); + int exponent = static_cast(std::floor(std::log10(std::abs(d)))); + double multiplier = std::pow(10.0, n - exponent - 1); + return static_cast(std::round(d * multiplier) / multiplier); +} +size_t min_sig_figs_result_time(const std::filesystem::path& path_vio); +const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, + const size_t time_sig_figs); +const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, + const size_t time_sig_figs); +const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, + const size_t time_sig_figs); +} // namespace txt_parser_help +namespace gps_parser_help { +using TimeGPSList = std::vector>; +struct GSTData { + double utc_time_ns; + double rms_range_error_m; + double error_semi_major_m; + double error_semi_minor_m; + double error_orientation_deg; + double sigma_lat_m; + double sigma_lon_m; + double sigma_alt_m; +}; +std::vector split_nmea_sentence(const std::string& nmea_sentence); +double time_of_day_seconds(const double utc_time_hhmmss); +std::optional parse_gpgst(const std::string& nmea_sentence); +size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds); +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps); +} // namespace gps_parser_help +} // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc index 064f404ef..e49467bfd 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example.cc @@ -64,7 +64,7 @@ int main(int argc, const char** argv) { const std::string img_str = "img " + std::to_string(i); cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2); - const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::stringstream ss_AS_w_from_gnss_cam; ss_AS_w_from_gnss_cam << "AS_w_from_gnss_cam: "; if (img_pt.AS_w_from_gnss_cam) { @@ -84,7 +84,7 @@ int main(int argc, const char** argv) { std::stringstream ss_gps; ss_gps << "gps_gcs: "; if (img_pt.gps_gcs) { - const lrn_desc::ImagePoint::GPSData& gps_data = *img_pt.gps_gcs; + const lrn_desc::GPSData& gps_data = *img_pt.gps_gcs; ss_gps << "long: " << gps_data.longitude << ", lat: " << gps_data.latitude; if (gps_data.altitude) { ss_gps << ", alt: " << *gps_data.altitude; diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index bca7ec2c4..4e52440d2 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include #include @@ -8,8 +10,10 @@ #include #include "common/check.hh" +#include "common/gps/frame_translation.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "experimental/learn_descriptors/frontend.hh" #include "opencv2/opencv.hpp" #include "visualization/opencv/opencv_viz.hh" @@ -47,19 +51,42 @@ int main(int argc, const char** argv) { lrn_desc::FourSeasonsParser parser(path_data, path_calibration); + lrn_desc::FrontendParams params{lrn_desc::FrontendParams::ExtractorType::SIFT, + lrn_desc::FrontendParams::MatcherType::KNN, true, false}; + lrn_desc::Frontend frontend(params); + constexpr size_t NUM_IMAGES = 100; + (void)NUM_IMAGES; + auto start = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < NUM_IMAGES; i += 1) { + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); + frontend.add_image(lrn_desc::ImageAndPoint{ + parser.load_image(i), std::make_shared(img_pt)}); + } + auto duration = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - start); + std::cout << "done adding! took " << duration.count() << " milliseconds" << std::endl; + frontend.populate_frames(); + start = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - start); + std::cout << "done populating frames! took " << duration.count() << " milliseconds" + << std::endl; + ROBOT_CHECK(parser.num_images() != 0); - std::vector viz_frames; // camera frames from visual world frame - std::vector viz_points; // camera frames from visual world frame + std::vector viz_frames; // camera frames from visual world frame + std::vector + viz_points; // camera frames from visual world frame Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity(); std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; scale_mat.linear() *= parser.gnss_scale(); std::cout << "scale mat: " << scale_mat.matrix() << std::endl; + std::optional first_gps_to_cam; std::optional altitude_gps_from_gnss_cam; std::vector gps_ns_delta_from_shutter; - for (size_t i = 0; i < parser.num_images(); i += 1) { - const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); + for (size_t i = 0; i < NUM_IMAGES; i += 1) { + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::cout << img_pt.to_string() << std::endl; if (!img_pt.AS_w_from_gnss_cam) { continue; @@ -67,9 +94,13 @@ int main(int argc, const char** argv) { Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) * scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix())); - - viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); + Eigen::Isometry3d frontend_w_from_cam_groundtruth( + frontend.frames()[i]->world_from_cam_groundtruth_->matrix()); + ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()), + frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix()); + viz_frames.emplace_back(frontend_w_from_cam_groundtruth, + "x_frontend_ref_" + std::to_string(i)); if (img_pt.gps_gcs) { if (img_pt.gps_gcs->seq > img_pt.seq) { gps_ns_delta_from_shutter.push_back( @@ -88,28 +119,40 @@ int main(int argc, const char** argv) { (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) * w_from_gnss_cam; const Eigen::Vector3d gnss_cam_in_gcs = - parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation()); + robot::gps::lla_from_ecef(ECEF_from_gnss_cam.translation()); if (!altitude_gps_from_gnss_cam) { altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); } + if (!first_gps_to_cam) { + first_gps_to_cam = + frontend.frames()[i]->world_from_cam_groundtruth_->translation() - + *frontend.frames()[i]->cam_in_world_initial_guess_; + } gcs_coordinate.z() += *altitude_gps_from_gnss_cam; std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs << "\ngps_gcs: " << gcs_coordinate << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl; - const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate); + const Eigen::Vector3d ECEF_from_gps = robot::gps::ecef_from_lla(gcs_coordinate); const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(), ECEF_from_gps.z(), 1); Eigen::Vector4d gps_in_w = Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) * ECEF_from_gps_hom; - viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i)); + const Eigen::Isometry3d cam_from_gps( + (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix()); + Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation(); + viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i)); + viz_points.emplace_back( + *frontend.frames()[i]->cam_in_world_initial_guess_ + *first_gps_to_cam, + "x_frontend_cam_gps" + std::to_string(i)); const Eigen::Vector3d gps_from_ref_in_world = gps_in_w.head<3>() - w_from_gnss_cam.translation(); std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; } } + const double sum = std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0); double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size(); @@ -136,6 +179,6 @@ int main(int argc, const char** argv) { std::cout << "max delta: " << max_delta << std::endl; std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl; std::cout << "got " << viz_points.size() << " points" << std::endl; - robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true, - "Viz Trajectory + GPS Points"); + robot::visualization::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true, + "Viz Trajectory + GPS Points"); } diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc index ff49d935d..1b1a940b9 100644 --- a/experimental/learn_descriptors/four_seasons_parser_test.cc +++ b/experimental/learn_descriptors/four_seasons_parser_test.cc @@ -10,14 +10,12 @@ #include "Eigen/Core" #include "Eigen/Geometry" -#include "GeographicLib/Constants.hpp" -#include "GeographicLib/Geocentric.hpp" -#include "GeographicLib/Geodesic.hpp" -#include "GeographicLib/LocalCartesian.hpp" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" #include "common/check.hh" +#include "common/gps/frame_translation.hh" #include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" #include "gtest/gtest.h" #include "nmea/message/gga.hpp" #include "nmea/message/rmc.hpp" @@ -35,6 +33,7 @@ static bool images_equal(cv::Mat img1, cv::Mat img2) { } namespace robot::experimental::learn_descriptors { +using namespace detail::four_seasons_parser; TEST(FourSeasonsParserTest, parser_test) { const std::filesystem::path dir_snippet = "external/four_seasons_snippet/recording_2020-04-07_11-33-45"; @@ -43,6 +42,7 @@ TEST(FourSeasonsParserTest, parser_test) { EXPECT_TRUE(std::filesystem::exists(dir_snippet)); EXPECT_TRUE(std::filesystem::exists(dir_calibration)); + std::cout << "heartbeat -1" << std::endl; FourSeasonsParser parser(dir_snippet, dir_calibration); // transformations test @@ -65,17 +65,17 @@ TEST(FourSeasonsParserTest, parser_test) { EXPECT_DOUBLE_EQ(gnss_scale, parser.gnss_scale()); // calibration test - const FourSeasonsParser::CameraCalibrationFisheye calibration_target = - detail::txt_parser_help::load_camera_calibration(dir_calibration); - const FourSeasonsParser::CameraCalibrationFisheye calibration = parser.camera_calibration(); - EXPECT_DOUBLE_EQ(calibration_target.cx, calibration.cx); - EXPECT_DOUBLE_EQ(calibration_target.cy, calibration.cy); - EXPECT_DOUBLE_EQ(calibration_target.fx, calibration.fx); - EXPECT_DOUBLE_EQ(calibration_target.fy, calibration.fy); - EXPECT_DOUBLE_EQ(calibration_target.k1, calibration.k1); - EXPECT_DOUBLE_EQ(calibration_target.k2, calibration.k2); - EXPECT_DOUBLE_EQ(calibration_target.k3, calibration.k3); - EXPECT_DOUBLE_EQ(calibration_target.k4, calibration.k4); + const std::shared_ptr calibration_target = + txt_parser_help::load_camera_calibration(dir_calibration); + const std::shared_ptr calibration = parser.camera_calibration(); + EXPECT_DOUBLE_EQ(calibration_target->cx, calibration->cx); + EXPECT_DOUBLE_EQ(calibration_target->cy, calibration->cy); + EXPECT_DOUBLE_EQ(calibration_target->fx, calibration->fx); + EXPECT_DOUBLE_EQ(calibration_target->fy, calibration->fy); + EXPECT_DOUBLE_EQ(calibration_target->k1, calibration->k1); + EXPECT_DOUBLE_EQ(calibration_target->k2, calibration->k2); + EXPECT_DOUBLE_EQ(calibration_target->k3, calibration->k3); + EXPECT_DOUBLE_EQ(calibration_target->k4, calibration->k4); EXPECT_NE(parser.num_images(), 0); @@ -87,17 +87,16 @@ TEST(FourSeasonsParserTest, parser_test) { const std::filesystem::path path_img_time = dir_snippet / "times.txt"; const std::filesystem::path path_gnss_poses = dir_snippet / "GNSSPoses.txt"; const std::filesystem::path path_vio = dir_snippet / "result.txt"; - const size_t min_sig_figs_time = detail::txt_parser_help::min_sig_figs_result_time(path_vio); + const size_t min_sig_figs_time = txt_parser_help::min_sig_figs_result_time(path_vio); // fetch data for all the fields (AS_w_from_gnss_cam, img times, result) - const detail::txt_parser_help::TimeDataList img_time_list = - detail::txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time); + const txt_parser_help::TimeDataList img_time_list = + txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time); EXPECT_EQ(img_time_list.size(), parser.num_images()); - const detail::txt_parser_help::TimeDataMap gnss_poses_time_map = - detail::txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, - min_sig_figs_time); - const detail::txt_parser_help::TimeDataMap vio_time_map = - detail::txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time); + const txt_parser_help::TimeDataMap gnss_poses_time_map = + txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, min_sig_figs_time); + const txt_parser_help::TimeDataMap vio_time_map = + txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time); Eigen::Matrix4d scale_mat = Eigen::Matrix4d::Identity(); scale_mat(0, 0) = scale_mat(1, 1) = scale_mat(2, 2) = parser.gnss_scale(); @@ -108,7 +107,7 @@ TEST(FourSeasonsParserTest, parser_test) { std::vector gps_ns_delta_from_shutter; for (size_t i = 0; i < parser.num_images(); i++) { - const ImagePoint& img_pt = parser.get_image_point(i); + const ImagePointFourSeasons& img_pt = parser.get_image_point(i); const cv::Mat img = parser.load_image(i); // seq (time in nanoseconds) test @@ -124,20 +123,22 @@ TEST(FourSeasonsParserTest, parser_test) { const std::vector& parsed_line_gnss_poses = gnss_poses_time_map.at(img_time_list[i].first); - Eigen::Vector3d gps_translation(std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_X)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_Y)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_Z)])); - Eigen::Quaterniond gps_quat(std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_W)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_X)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_Y)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_Z)])); + Eigen::Vector3d gps_translation( + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_X)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_Y)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_Z)])); + Eigen::Quaterniond gps_quat( + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_W)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_X)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Y)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Z)])); EXPECT_TRUE(liegroups::SE3(gps_quat, gps_translation) .matrix() .isApprox(img_pt.AS_w_from_gnss_cam->matrix())); @@ -148,20 +149,16 @@ TEST(FourSeasonsParserTest, parser_test) { const std::vector& parsed_line_gps = vio_time_map.at(img_time_list[i].first); Eigen::Vector3d ground_truth_translation( - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_X)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_Y)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_Z)])); - Eigen::Quaterniond ground_truth_quat(std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_W)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_X)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_Y)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_Z)])); + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_X)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_Y)]), + std::stod( + parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_Z)])); + Eigen::Quaterniond ground_truth_quat( + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_W)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_X)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_Y)]), + std::stod( + parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_Z)])); EXPECT_TRUE(liegroups::SE3(ground_truth_quat, ground_truth_translation) .matrix() .isApprox(img_pt.AS_w_from_vio_cam->matrix())); @@ -177,8 +174,7 @@ TEST(FourSeasonsParserTest, parser_test) { ECEF_from_gnss_cam * gps_from_cam.inverse(); const Eigen::Vector3d& t_ECEF_from_gnss_gps = ECEF_from_gnss_gps.translation(); - const Eigen::Vector3d gnss_gps_gcs = - FourSeasonsParser::gcs_from_ECEF(t_ECEF_from_gnss_gps); + const Eigen::Vector3d gnss_gps_gcs = gps::lla_from_ecef(t_ECEF_from_gnss_gps); const Eigen::Vector3d raw_gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, diff --git a/experimental/learn_descriptors/four_seasons_transforms.cc b/experimental/learn_descriptors/four_seasons_transforms.cc new file mode 100644 index 000000000..c6dc25ac4 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_transforms.cc @@ -0,0 +1,67 @@ +#include "experimental/learn_descriptors/four_seasons_transforms.hh" + +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" + +namespace robot::experimental::learn_descriptors { +FourSeasonsTransforms::StaticTransforms::StaticTransforms( + const std::filesystem::path& path_transforms) { + std::ifstream file_transforms(path_transforms); + ROBOT_CHECK(file_transforms, path_transforms); + std::string line; + while (std::getline(file_transforms, line)) { + if (line.find("transform_S_AS") != std::string::npos) { + std::getline(file_transforms, line); + S_from_AS = get_transform_from_line(line); + } else if (line.find("TS_cam_imu") != std::string::npos) { + std::getline(file_transforms, line); + cam_from_imu = get_transform_from_line(line); + } else if (line.find("transform_w_gpsw") != std::string::npos) { + std::getline(file_transforms, line); + w_from_gpsw = get_transform_from_line(line); + } else if (line.find("transform_gps_imu") != std::string::npos) { + std::getline(file_transforms, line); + gps_from_imu = get_transform_from_line(line); + } else if (line.find("transform_e_gpsw") != std::string::npos) { + std::getline(file_transforms, line); + e_from_gpsw = get_transform_from_line(line); + } else if (line.find("GNSS scale") != std::string::npos) { + std::getline(file_transforms, line); + gnss_scale = std::stod(line); + } + } +} + +liegroups::SE3 FourSeasonsTransforms::StaticTransforms::get_transform_from_line( + const std::string& line) { + enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W }; + std::vector parsed_transform_line = + detail::four_seasons_parser::txt_parser_help::parse_line_adv(line, ","); + if (parsed_transform_line.size() < 7) { + std::stringstream error_stream; + error_stream << "parsed_transform_line doesn't have sufficient entries for " + "transform! parsed_transform_line.size(): " + << parsed_transform_line.size() << std::endl; + throw std::runtime_error(error_stream.str()); + } + std::vector transform_nums; + for (const std::string& num : parsed_transform_line) { + transform_nums.push_back(static_cast(std::stod(num))); + } + Eigen::Vector3d translation(transform_nums[TransformEntry::T_X], + transform_nums[TransformEntry::T_Y], + transform_nums[TransformEntry::T_Z]); + Eigen::Quaterniond rotation( + transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X], + transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]); + return liegroups::SE3(rotation, translation); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_transforms.hh b/experimental/learn_descriptors/four_seasons_transforms.hh new file mode 100644 index 000000000..9e8ab1bde --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_transforms.hh @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include "common/liegroups/se3.hh" + +namespace robot::experimental::learn_descriptors { +struct FourSeasonsTransforms { + struct StaticTransforms { + liegroups::SE3 S_from_AS; + liegroups::SE3 cam_from_imu; + liegroups::SE3 w_from_gpsw; + liegroups::SE3 gps_from_imu; + liegroups::SE3 e_from_gpsw; + double gnss_scale; // scale from vio frame to gnss frame. WARNING: will require retooling + // if the scales per + // keyframe (pose) are not all one value. See more here: + // https://github.com/pmwenzel/4seasons-dataset + + StaticTransforms(const std::filesystem::path& path_transforms); + + private: + static liegroups::SE3 get_transform_from_line(const std::string& line); + }; + // might consider defining a nested "DynamicTransforms" or "PerImageTransforms" at some point - + // Nico +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frame.cc b/experimental/learn_descriptors/frame.cc new file mode 100644 index 000000000..139c27858 --- /dev/null +++ b/experimental/learn_descriptors/frame.cc @@ -0,0 +1,9 @@ +#include "experimental/learn_descriptors/frame.hh" + +namespace robot::experimental::learn_descriptors { +void Frame::add_keypoints(const KeypointsCV& kpts) { + kpts_.insert(kpts_.end(), kpts.begin(), kpts.end()); +}; + +void Frame::assign_descriptors(const cv::Mat& descriptors) { descriptors_ = descriptors; }; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh new file mode 100644 index 000000000..0c5535899 --- /dev/null +++ b/experimental/learn_descriptors/frame.hh @@ -0,0 +1,121 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "common/check.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "gtsam/base/Matrix.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class Frame { + public: + Frame(FrameId id, size_t seq, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, + const KeypointsCV kpts, const cv::Mat descriptors) + : id_(id), + seq_(seq), + undistorted_img_(img), + K_(K), + kpts_(kpts), + descriptors_(descriptors) {} + Frame(const Frame& other) + : id_(other.id_), + seq_(other.seq_), + undistorted_img_(other.undistorted_img_.clone()), + K_(other.K_), + kpts_(other.kpts_), + descriptors_(other.descriptors_.clone()), + frame_from_other_frames_(other.frame_from_other_frames_), + world_from_cam_groundtruth_(other.world_from_cam_groundtruth_), + cam_in_world_initial_guess_(other.cam_in_world_initial_guess_), + cam_in_world_interpolated_guess_(other.cam_in_world_interpolated_guess_), + world_from_cam_initial_guess_(other.world_from_cam_initial_guess_), + translation_covariance_in_cam_(other.translation_covariance_in_cam_) {} + + std::optional velocity_to(const Frame& other_frame) const { + ROBOT_CHECK(other_frame.seq_ >= seq_); + if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_) + return std::nullopt; + double dt_seconds = static_cast(other_frame.seq_ - seq_) * 1e-9; + return (*other_frame.cam_in_world_initial_guess_ - *cam_in_world_initial_guess_) / + dt_seconds; + }; + std::optional velocity_from(const Frame& other_frame) const { + ROBOT_CHECK(seq_ >= other_frame.seq_); + if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_) + return std::nullopt; + double dt_seconds = static_cast(seq_ - other_frame.seq_) * 1e-9; + return (*cam_in_world_initial_guess_ - *other_frame.cam_in_world_initial_guess_) / + dt_seconds; + }; + + void add_keypoints(const KeypointsCV& kpts); + void assign_descriptors(const cv::Mat& descriptors); + + const KeypointsCV& keypoint() { return kpts_; }; + const cv::Mat& descriptors() { return descriptors_; }; + + std::string to_string() const { + std::stringstream ss; + ss << "Frame " << id_ << ":\n"; + ss << "\tseq: " << seq_ << "\n"; + ss << "\tK: "; + if (K_) { + ss << "fx: " << K_->fx() << ", fy: " << K_->fy() << ", s: " << K_->skew() + << ", px: " << K_->px() << ", py: " << K_->py(); + } else { + ss << "N/A"; + } + ss << "\n\tkpts_.size(): " << kpts_.size(); + ss << "\n\tframe_from_other_frames_.size(): " << frame_from_other_frames_.size(); + ss << "\n\tworld_from_cam_groundtruth_: "; + if (world_from_cam_groundtruth_) { + ss << "\n" << world_from_cam_groundtruth_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\tcam_in_world_initial_guess_: "; + if (cam_in_world_initial_guess_) { + ss << "\n" << cam_in_world_initial_guess_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\tworld_from_cam_initial_guess_: "; + if (world_from_cam_initial_guess_) { + ss << "\n" << world_from_cam_initial_guess_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\ttranslation_covariance_in_cam_: "; + if (translation_covariance_in_cam_) { + ss << "\n" << translation_covariance_in_cam_->matrix(); + } else { + ss << "N/A"; + } + return ss.str(); + } + + FrameId id_; + size_t seq_; + cv::Mat undistorted_img_; + gtsam::Cal3_S2::shared_ptr K_; + KeypointsCV kpts_; + cv::Mat descriptors_; + std::unordered_map + frame_from_other_frames_; // map of relative rotation for this_frame_from_frame_[FrameId] + std::optional world_from_cam_groundtruth_; + std::optional + cam_in_world_initial_guess_; // intended for data from sensors (GPS) + std::optional cam_in_world_interpolated_guess_; + std::optional world_from_cam_initial_guess_; + std::optional translation_covariance_in_cam_; +}; +using SharedFrame = std::shared_ptr; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc new file mode 100644 index 000000000..32af22642 --- /dev/null +++ b/experimental/learn_descriptors/frontend.cc @@ -0,0 +1,525 @@ +#include "experimental/learn_descriptors/frontend.hh" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/triangulation.h" +#include "opencv2/opencv.hpp" + +namespace detail_sfm { + +std::optional attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + const double max_reproj_error = 2.0, + const bool verbose = true) { + gtsam::Point3 p_lmk_in_world; + if (cam_poses.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_lmk_in_world = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate. + if (verbose) + std::cerr << "attempt_triangulation failed. Likely cheirality exception: " + << e.what() << ". Discarding involved keypoints." << std::endl; + } + } + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } + + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto &pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z() + << ")\n"; + } + return std::nullopt; + } + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[triangulate] reprojection error too large on view " << i << " (" + << err << " px)\n"; + } + return std::nullopt; + } + } + } + + return p_lmk_in_world; +} + +gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { + if (poses.empty()) throw std::runtime_error("Empty pose vector"); + + gtsam::Pose3 mean = poses[0]; + + for (int iter = 0; iter < maxIter; ++iter) { + gtsam::Vector6 total = gtsam::Vector6::Zero(); + + for (const auto &pose : poses) { + gtsam::Pose3 delta = mean.between(pose); + total += gtsam::Pose3::Logmap(delta); + } + + total /= poses.size(); + mean = mean.compose(gtsam::Pose3::Expmap(total)); + } + + return mean; +} + +gtsam::Point3 averagePoints(const std::vector &points) { + if (points.empty()) throw std::runtime_error("Empty point vector"); + gtsam::Point3 sum(0, 0, 0); + for (const auto &pt : points) sum += pt; + return sum / points.size(); +} + +gtsam::Point3 get_variance(const std::vector &points) { + const gtsam::Point3 mean = averagePoints(points); + gtsam::Point3 var(0, 0, 0); + for (const gtsam::Point3 &pt : points) { + var += (mean - pt).array().square().matrix(); + } + return var / points.size(); +} + +double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { + Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); + + // 2. Compute trace and clamp to [-1,1] for numerical safety + double tr = R_err.trace(); + double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); + + // 3. Recover angle (in radians) + return std::acos(cos_theta); +} +} // namespace detail_sfm + +namespace robot::experimental::learn_descriptors { + +Frontend::Frontend(FrontendParams params_) : params_(params_) { + switch (params_.extractor_type) { + case FrontendParams::ExtractorType::SIFT: + feature_extractor_ = cv::SIFT::create(); + break; + case FrontendParams::ExtractorType::ORB: + feature_extractor_ = cv::ORB::create(); + break; + default: + // Error handling needed? + break; + } + switch (params_.matcher_type) { + case FrontendParams::MatcherType::BRUTE_FORCE: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case FrontendParams::MatcherType::KNN: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case FrontendParams::MatcherType::FLANN: + if (params_.extractor_type == FrontendParams::ExtractorType::ORB) { + throw std::invalid_argument("FLANN can not be used with ORB."); + } + descriptor_matcher_ = cv::FlannBasedMatcher::create(); + break; + default: + // Error handling needed? + break; + } +} + +void Frontend::populate_frames(bool verbose) { + shared_frames_.clear(); + shared_frames_.shrink_to_fit(); + shared_frames_.reserve(images_and_points_.size()); + FrameId id = shared_frames_.size(); + for (const ImageAndPoint &img_and_pt : images_and_points_) { + const std::shared_ptr &shared_img_pt = img_and_pt.shared_img_pt; + if (verbose) { + std::cout << shared_img_pt->to_string() << std::endl; + } + cv::Mat img_undistorted; + cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, + shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(), + shared_img_pt->K->k_mat(), img_and_pt.image_distorted.size()); + auto [kpts, descriptors] = extract_features(img_undistorted); + KeypointsCV kpts_cv; + for (const cv::KeyPoint &kpt : kpts) { + kpts_cv.push_back(kpt.pt); + } + + gtsam::Cal3_S2::shared_ptr K = + boost::make_shared(shared_img_pt->K->fx, shared_img_pt->K->fy, 0, + shared_img_pt->K->cx, shared_img_pt->K->cy); + + Frame frame(id, shared_img_pt->seq, img_undistorted, K, kpts_cv, descriptors); + const std::optional maybe_world_from_cam_grnd_trth = + shared_img_pt->world_from_cam_ground_truth(); + if (maybe_world_from_cam_grnd_trth) { + frame.world_from_cam_groundtruth_ = + gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix()); + } + const std::optional maybe_cam_in_world = shared_img_pt->cam_in_world(); + if (maybe_cam_in_world) { + frame.cam_in_world_initial_guess_ = *maybe_cam_in_world; + } + if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity(); + const std::optional maybe_translation_covariance_in_cam = + shared_img_pt->translation_covariance_in_cam(); + if (maybe_translation_covariance_in_cam) { + frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam; + } + shared_frames_.push_back(std::make_shared(frame)); + id++; + } + interpolate_frames(); +} + +void Frontend::match_frames_and_build_tracks() { + if (params_.exhaustive) { + for (size_t i = 0; i < shared_frames_.size() - 1; i++) { + for (size_t j = i + 1; j < shared_frames_.size(); j++) { + std::vector matches = compute_matches(shared_frames_[i]->descriptors(), + shared_frames_[j]->descriptors()); + enforce_bijective_buffer_matches(matches); + if (matches.size() < 5) { + continue; + } + + std::vector cv_kpts_1; + std::vector cv_kpts_2; + for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_1.push_back(cv_kpt); + } + for (const KeypointCV &kpt : shared_frames_[j]->keypoint()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_2.push_back(cv_kpt); + } + std::optional scale_cam0_from_cam1 = + robot::geometry::estimate_cam0_from_cam1( + cv_kpts_1, cv_kpts_2, matches, + images_and_points_[i] + .shared_img_pt->K + ->k_mat()); // fine for now since all cameras have the same K, + // how to reconcile later though...? + + if (!scale_cam0_from_cam1) { + continue; + } + + shared_frames_[i]->frame_from_other_frames_.emplace( + j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix())); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = shared_frames_[j]->keypoint()[match.trainIdx]; + + auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0); + if (lmk_id_map_.find(key) != lmk_id_map_.end()) { + const size_t id = lmk_id_map_.at(key); + ROBOT_CHECK(id < feature_tracks_.size(), + "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1); + feature_tracks_.push_back(feature_track); + lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0), + feature_tracks_.size() - 1); + } + } + } + } + } else { // successive only + for (size_t i = 0; i < shared_frames_.size() - 1; i++) { + std::vector matches = compute_matches(shared_frames_[i]->descriptors(), + shared_frames_[i + 1]->descriptors()); + enforce_bijective_buffer_matches(matches); + if (matches.size() < 5) { + continue; + } + + std::vector cv_kpts_1; + std::vector cv_kpts_2; + for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_1.push_back(cv_kpt); + } + for (const KeypointCV &kpt : shared_frames_[i + 1]->keypoint()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_2.push_back(cv_kpt); + } + std::optional scale_cam0_from_cam1 = + robot::geometry::estimate_cam0_from_cam1( + cv_kpts_1, cv_kpts_2, matches, images_and_points_[i].shared_img_pt->K->k_mat()); + if (!scale_cam0_from_cam1) { + continue; + } + ROBOT_CHECK(shared_frames_[i]->world_from_cam_initial_guess_, + "This rotation should be populated."); + shared_frames_[i + 1]->world_from_cam_initial_guess_.emplace( + shared_frames_[i]->world_from_cam_initial_guess_->matrix() * + scale_cam0_from_cam1->linear().matrix()); + + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = shared_frames_[i + 1]->keypoint()[match.trainIdx]; + + auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0); + if (lmk_id_map_.find(key) != lmk_id_map_.end()) { + const size_t id = lmk_id_map_.at(key); + ROBOT_CHECK(id < feature_tracks_.size(), + "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1); + feature_tracks_.push_back(feature_track); + lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0), + feature_tracks_.size() - 1); + } + } + } + } + std::cout << "done processing matches" << std::endl; +} + +void Frontend::interpolate_frames() { + std::vector frames_with_guess; + for (SharedFrame &shared_frame : shared_frames_) { // this assumes that frames are ordered by + // seq (time), later on may have to + // think about this more if trying to run pipeline on unordered images or sets + // whose capture times are on different days + if (shared_frame->cam_in_world_initial_guess_) { + frames_with_guess.push_back(&(*shared_frame)); + } + } + if (frames_with_guess.size() < 2) return; + size_t idx_guess = 0; + for (SharedFrame &shared_frame : shared_frames_) { + Eigen::Vector3d interpolated; + + // advance to next guess window if needed + while (idx_guess + 1 < frames_with_guess.size() - 1 && + shared_frame->seq_ > frames_with_guess[idx_guess + 1]->seq_) { + idx_guess++; + } + + if (shared_frame->cam_in_world_initial_guess_) { + interpolated = *shared_frame->cam_in_world_initial_guess_; + } else { + Eigen::Vector3d v_a_to_b = + *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]); + double dt = (shared_frame->seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * + 1e-9 * + (frames_with_guess[idx_guess]->seq_ > shared_frame->seq_ + ? frames_with_guess[idx_guess]->seq_ - shared_frame->seq_ + : shared_frame->seq_ - frames_with_guess[idx_guess]->seq_); + interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + + v_a_to_b * dt; // add sign back to dt + } + shared_frame->cam_in_world_interpolated_guess_ = interpolated; + } +} + +std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { + std::vector keypoints; + cv::Mat descriptors; + switch (params_.extractor_type) { + default: // the opencv extractors have the same function signature + feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); + break; + } + return std::pair, cv::Mat>(keypoints, descriptors); +} + +std::vector Frontend::compute_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const { + std::vector matches; + switch (params_.matcher_type) { + case FrontendParams::MatcherType::BRUTE_FORCE: + compute_brute_matches(descriptors1, descriptors2, matches); + break; + case FrontendParams::MatcherType::KNN: + compute_KNN_matches(descriptors1, descriptors2, matches); + break; + case FrontendParams::MatcherType::FLANN: + compute_FLANN_matches(descriptors1, descriptors2, matches); + break; + default: + break; + } + std::sort(matches.begin(), matches.end()); + return matches; +} + +void Frontend::threshold_matches(std::vector &matches, float dist_threshhold) { + matches.erase(std::remove_if(matches.begin(), matches.end(), + [dist_threshhold](const cv::DMatch &match) { + return match.distance > dist_threshhold; + }), + matches.end()); +} + +void Frontend::enforce_bijective_matches(std::vector &matches) { + std::unordered_map bestQueryMatch; + std::unordered_map bestTrainMatch; + + for (const auto &match : matches) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || + match.distance < bestQueryMatch[queryIdx].distance) { + bestQueryMatch[queryIdx] = match; + } + + if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || + match.distance < bestTrainMatch[trainIdx].distance) { + bestTrainMatch[trainIdx] = match; + } + } + + matches.erase(std::remove_if(matches.begin(), matches.end(), + [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + return bestQueryMatch[queryIdx].trainIdx != trainIdx || + bestTrainMatch[trainIdx].queryIdx != queryIdx; + }), + matches.end()); +} + +void Frontend::enforce_bijective_buffer_matches(std::vector &matches) { + // Store best and second-best matches per query_idx + std::unordered_map> best_two_query_matches; + for (const auto &match : matches) { + int query_idx = match.queryIdx; + float dist = match.distance; + + if (!best_two_query_matches.count(query_idx)) { + best_two_query_matches[query_idx] = {match, std::numeric_limits::max()}; + } else { + auto &[best_match, second_best_dist] = best_two_query_matches[query_idx]; + if (dist < best_match.distance) { + second_best_dist = best_match.distance; + best_match = match; + } else if (dist < second_best_dist) { + second_best_dist = dist; + } + } + } + + // Keep matches where best < 0.5 * second_best + std::vector filtered; + for (const auto &[query_idx, pair] : best_two_query_matches) { + const auto &[best_match, second_best_dist] = pair; + if (best_match.distance < 0.5f * second_best_dist) { + filtered.push_back(best_match); + } + } + + // Enforce bijection: each train_idx should also be best for only one query_idx + std::unordered_map best_train_match; + for (const auto &match : filtered) { + int train_idx = match.trainIdx; + if (!best_train_match.count(train_idx) || + match.distance < best_train_match[train_idx].distance) { + best_train_match[train_idx] = match; + } + } + + matches.clear(); + for (const auto &[train_idx, match] : best_train_match) { + matches.push_back(match); + } +} + +bool Frontend::compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::BRUTE_FORCE) { + return false; + } + matches_out.clear(); + descriptor_matcher_->match(descriptors1, descriptors2, matches_out); + return true; +} + +bool Frontend::compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::KNN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + // Lowe's Ratio Test + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} + +bool Frontend::compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::FLANN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh new file mode 100644 index 000000000..e381d8089 --- /dev/null +++ b/experimental/learn_descriptors/frontend.hh @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include + +#include "Eigen/Core" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +struct FrontendParams { + enum class ExtractorType { SIFT, ORB }; + enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; + ExtractorType extractor_type = ExtractorType::SIFT; + MatcherType matcher_type = MatcherType::KNN; + bool exhaustive = true; + bool incremental = false; // a.k.a. images are successive +}; +struct ImageAndPoint { + cv::Mat image_distorted; + std::shared_ptr shared_img_pt; +}; +class Frontend { + public: + static constexpr char symbol_pose_char = 'x'; + static constexpr char symbol_lmk_char = 'l'; + explicit Frontend(FrontendParams params); + ~Frontend(){}; + + void populate_frames(bool verbose = false); + void match_frames_and_build_tracks(); + void interpolate_frames(); + + void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); }; + void add_images(const std::vector &img_and_pts) { + for (const ImageAndPoint &img_and_pt : img_and_pts) { + images_and_points_.push_back(img_and_pt); + } + }; + + const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; }; + const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; }; + const FeatureTracks &feature_tracks() const { return feature_tracks_; }; + const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; }; + std::vector &frames() { return shared_frames_; }; + + std::pair, cv::Mat> extract_features(const cv::Mat &img) const; + std::vector compute_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const; + static void threshold_matches(std::vector &matches, float dist_threshhold); + static void enforce_bijective_matches(std::vector &matches); + static void enforce_bijective_buffer_matches(std::vector &matches); + static void draw_keypoints(const cv::Mat &img, std::vector keypoints, + cv::Mat &img_keypoints_out) { + cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), + cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + } + static void draw_matches(const cv::Mat &img1, std::vector keypoints1, + const cv::Mat &img2, std::vector keypoints2, + std::vector matches, cv::Mat &img_matches_out) { + cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); + } + + private: + bool compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + FrontendParams params_; + + cv::Ptr feature_extractor_; + cv::Ptr descriptor_matcher_; + + std::vector images_and_points_; + FeatureTracks feature_tracks_; + FrameLandmarkIdMap lmk_id_map_; + std::vector shared_frames_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh new file mode 100644 index 000000000..6204af219 --- /dev/null +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#include + +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Point3.h" +#include "opencv2/opencv.hpp" + +namespace std { +template <> +struct hash> { + size_t operator()(const std::pair &p) const { + size_t h1 = std::hash()(p.first); + size_t h2 = std::hash()(p.second.x) ^ (std::hash()(p.second.y) << 1); + return h1 ^ (h2 << 1); // combine hashes + } +}; +} // namespace std + +namespace robot::experimental::learn_descriptors { +class FeatureTrack { + public: + std::vector> obs_; + + bool in_ba_graph_; + + FeatureTrack(FrameId frame_id, const KeypointCV &px) { obs_.emplace_back(frame_id, px); } + + void print() const { + std::cout << "Feature track with cameras: "; + for (size_t i = 0; i < obs_.size(); i++) { + std::cout << " " << obs_[i].first << " "; + } + std::cout << std::endl; + } +}; + +using FeatureTracks = std::vector; // each idx is the FeatureTracks's LandmarkId +using FrameLandmarkIdMap = std::unordered_map, LandmarkId>; +} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc new file mode 100644 index 000000000..c5046fd1f --- /dev/null +++ b/experimental/learn_descriptors/frontend_test.cc @@ -0,0 +1,198 @@ +#include "experimental/learn_descriptors/frontend.hh" + +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "gtest/gtest.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +TEST(FrontendTest, pipeline_sweep) { + const size_t width = 640; + const size_t height = 480; + + const size_t pixel_shift_x = 20; + const size_t PIXEL_COMP_TOL = 20; + + cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3); + cv::Mat image_2; + + cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); + cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; + float com_x = 0.0f, com_y = 0.0f; + for (const cv::Point &rect_point : rect_points) { + com_x += rect_point.x; + com_y += rect_point.y; + } + com_x *= 0.25; + com_y *= 0.25; + cv::Point rotation_center(com_x, com_y); + cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0); + + const size_t line_spacing = 100; + for (size_t i = 0; i <= width / line_spacing; i++) { + size_t x = i * line_spacing + (width % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2); + } + for (size_t i = 0; i <= height / line_spacing; i++) { + size_t y = i * line_spacing + (height % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2); + } + + cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); + cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); + + // cv::Mat img_test_disp; + // cv::hconcat(image_1, image_2, img_test_disp); + // cv::imshow("Test", img_test_disp); + // cv::waitKey(1000); + + FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT, + FrontendParams::ExtractorType::ORB}; + FrontendParams::MatcherType matcher_types[3] = {FrontendParams::MatcherType::BRUTE_FORCE, + FrontendParams::MatcherType::FLANN, + FrontendParams::MatcherType::KNN}; + + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); + std::pair, cv::Mat> keypoints_descriptors_pair_1; + std::pair, cv::Mat> keypoints_descriptors_pair_2; + std::vector matches; + cv::Mat img_keypoints_out_1(height, width, CV_8UC3), + img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); + // cv::Mat img_display_test; + for (FrontendParams::ExtractorType extractor_type : extractor_types) { + for (FrontendParams::MatcherType matcher_type : matcher_types) { + printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); + FrontendParams params{extractor_type, matcher_type, true, false}; + try { + frontend = Frontend(params); + } catch (const std::invalid_argument &e) { + ROBOT_CHECK(std::string(e.what()) == + "FLANN can not be used with ORB."); // very jank... + continue; + } + keypoints_descriptors_pair_1 = frontend.extract_features(image_1); + keypoints_descriptors_pair_2 = frontend.extract_features(image_2); + matches = frontend.compute_matches(keypoints_descriptors_pair_1.second, + keypoints_descriptors_pair_2.second); + frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, + img_keypoints_out_1); + frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, + img_keypoints_out_2); + frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, + keypoints_descriptors_pair_2.first, matches, img_matches_out); + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // std::stringstream text; + // text << "Extractor " << static_cast(extractor_type) << ", matcher " + // << static_cast(matcher_type); + // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), + // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Hold spacebar to pause." << std::endl; + while (cv::waitKey(1000) == 32) { + } + printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); + if (extractor_type != FrontendParams::ExtractorType::ORB) { // don't check ORB for now + for (const cv::DMatch match : matches) { + EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - + keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, + pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL); + EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y - + keypoints_descriptors_pair_1.first[match.queryIdx].pt.y, + 0, PIXEL_COMP_TOL); + } + } + } + } +} + +TEST(FrontendTest, interpolate_frames) { + constexpr size_t width = 800; + constexpr size_t height = 400; + constexpr double fx = 500.0; + constexpr double fy = fx; + constexpr double cx = static_cast(width) / 2.0; + constexpr double cy = static_cast(height) / 2.0; + // gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + std::shared_ptr shared_K = + std::make_shared(fx, fy, cx, cy, 1, 1, 1, 1); + cv::Mat white_image(height, width, CV_8UC3, cv::Scalar(255, 255, 255)); + + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); + + struct VelocityPoint { + double time; // time in seconds + Eigen::Vector3d velocity; // m/s + }; + std::vector velocity_points{ + // basically a piecewise constant function + VelocityPoint{1.0, Eigen::Vector3d(1, 0, 0)}, // go in x dir from seconds 0 - 1 + VelocityPoint{2.0, Eigen::Vector3d(0, 1, 0)}, // go in y dir from seconds 1 - 2 + VelocityPoint{ + 3.0, Eigen::Vector3d(1, 1, 0)}, // go diagonal in first quadrant from seconds 2 - 3 + }; + size_t idx_vel_pt = 0; + const Eigen::Vector3d pt0_in_world = Eigen::Vector3d::Zero(); + std::vector pts_in_world{pt0_in_world}; + double time = 0.0; + std::vector> img_pts; + ImagePoint img_pt_first; + img_pt_first.id = 0; + img_pt_first.seq = static_cast(time * 1e9); + img_pt_first.K = shared_K; + img_pt_first.set_cam_in_world(pt0_in_world); + img_pts.push_back(std::make_shared(img_pt_first)); + frontend.add_image(ImageAndPoint{white_image, img_pts.back()}); + constexpr double sample_hz = 6.0; + constexpr size_t num_samples_skip = 2; + ROBOT_CHECK(static_cast(sample_hz) % (num_samples_skip + 1) == + 0); // needed so that the interpolation isn't lossy + constexpr double dt = 1 / sample_hz; + time += dt; + while (time < velocity_points.back().time) { + while (idx_vel_pt < velocity_points.size() && time > velocity_points[idx_vel_pt].time) { + idx_vel_pt++; + } + pts_in_world.emplace_back(pts_in_world.back() + dt * velocity_points[idx_vel_pt].velocity); + ImagePoint img_pt; + img_pt.id = img_pts.size(); + img_pt.seq = static_cast(time * 1e9); + img_pt.K = shared_K; + if (img_pts.size() % (num_samples_skip + 1) == 0) { + img_pt.set_cam_in_world(pts_in_world.back()); + } + img_pts.emplace_back(std::make_shared(img_pt)); + frontend.add_image(ImageAndPoint{white_image, img_pts.back()}); + time += dt; + } + frontend.populate_frames(); + frontend.interpolate_frames(); + + const std::vector &shared_frames = frontend.frames(); + constexpr double TOL = 1e-5; + for (size_t i = 0; i < pts_in_world.size(); i++) { + const Frame &frame = *shared_frames[i]; + ROBOT_CHECK(frame.cam_in_world_interpolated_guess_); + ROBOT_CHECK(pts_in_world[i].isApprox(*frame.cam_in_world_interpolated_guess_, TOL), i, + pts_in_world[i], *frame.cam_in_world_interpolated_guess_); + } +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh new file mode 100644 index 000000000..52d900be1 --- /dev/null +++ b/experimental/learn_descriptors/gps_data.hh @@ -0,0 +1,79 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "Eigen/Core" + +namespace robot::experimental::learn_descriptors { +struct GPSData { + struct Uncertainty { + double sigma_latitude_m; + double sigma_longitude_m; + double sigma_altitude_m; + double orientation_deg; + double rms_range_error_m; + + // diagonal latitude, longitude, altitude covariance in meters squared + Eigen::Matrix3d to_LLA_covariance() const { + Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); + cov(0, 0) = sigma_latitude_m * sigma_latitude_m; + cov(1, 1) = sigma_longitude_m * sigma_longitude_m; + cov(2, 2) = sigma_altitude_m * sigma_altitude_m; + return cov; + } + + // Convert lat/lon covariance to ENU + Eigen::Matrix3d to_ENU_covariance() const { + static constexpr double DEG2RAD = M_PI / 180.0; + static constexpr double ORIENT_OFFSET_DEG = 90.0; // CW‑from‑North → CCW‑from‑East + + // 1) build the 2×2 variance ellipse in metres² + Eigen::Matrix2d D; + D << sigma_latitude_m * sigma_latitude_m, 0.0, 0.0, + sigma_longitude_m * sigma_longitude_m; + + // 2) rotate that into ENU axes + double theta = (ORIENT_OFFSET_DEG - orientation_deg) * DEG2RAD; + Eigen::Matrix2d R; + R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); + + Eigen::Matrix2d horz = R * D * R.transpose(); + + // 3) assemble full 3×3 ENU covariance + Eigen::Matrix3d enu = Eigen::Matrix3d::Zero(); + enu.topLeftCorner<2, 2>() = horz; + enu(2, 2) = sigma_altitude_m * sigma_altitude_m; + return enu; + } + + std::string to_string() const { + std::stringstream ss; + ss << "Uncertainty:"; + ss << "\n\tsigma_lat_deg: " << sigma_latitude_m; + ss << "\n\tsigma_lon_deg: " << sigma_longitude_m; + ss << "\n\tsigma_altitude_m: " << sigma_altitude_m; + ss << "\n\torientation_deg: " << orientation_deg; + ss << "\n\trms_range_error_m: " << rms_range_error_m; + return ss.str(); + } + }; + size_t seq; // time in nanoseconds, may differ from image capturetime + double latitude; + double longitude; + std::optional altitude; // meters above sea level + std::optional uncertainty; + std::string to_string() const { + std::stringstream ss; + ss << "GPS Data: "; + ss << "\n\tseq: " << seq; + ss << "\n\tlla: " << latitude << ", " << longitude << ", " + << (altitude ? std::to_string(*altitude) : std::string("N/A")); + ss << "\n" << (uncertainty ? uncertainty->to_string() : "Uncertainty: N/A"); + return ss.str(); + } +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc index 53ee67b49..019d0c7c4 100644 --- a/experimental/learn_descriptors/gtsam_test.cc +++ b/experimental/learn_descriptors/gtsam_test.cc @@ -1,13 +1,18 @@ + #include #include #include "Eigen/Core" #include "gtest/gtest.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" #include "gtsam/geometry/Point3.h" #include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" #include "gtsam/inference/Symbol.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/GeneralSFMFactor.h" #include "gtsam/slam/PriorFactor.h" #include "gtsam/slam/ProjectionFactor.h" @@ -87,7 +92,91 @@ TEST(GtsamTesting, gtsam_simple_cube) { gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); gtsam::Values result = optimizer.optimize(); - // result.print("Optimized Results:\n"); + result.print("Optimized Results:\n"); + + constexpr double TOL = 1e-3; + for (size_t i = 0; i < poses.size(); i++) { + gtsam::Pose3 result_pose = result.at(gtsam::Symbol('x', i)); + EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL); + EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL); + EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL); + EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL)); + } + for (size_t i = 0; i < cube_W.size(); i++) { + gtsam::Point3 result_point = result.at(gtsam::Symbol('L', i)); + EXPECT_NEAR(cube_W[i][0], result_point[0], TOL); + EXPECT_NEAR(cube_W[i][1], result_point[1], TOL); + EXPECT_NEAR(cube_W[i][2], result_point[2], TOL); + } +} +TEST(GtsamTesting, gtsam_simple_cube_2) { + std::vector cube_W; + float cube_size = 1.0f; + cube_W.push_back(gtsam::Point3(0, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size)); + cube_W.push_back(gtsam::Point3(0, cube_size, cube_size)); + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Values initial; + gtsam::NonlinearFactorGraph graph; + + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + initial.insert(gtsam::Symbol('K', 0), *K); + + auto poseNoise = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); + auto measurementNoise = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + std::vector poses; + std::vector> cameras; + + Eigen::Matrix3d rotation0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0)); + gtsam::PinholeCamera camera0(pose0, *K); + graph.emplace_shared>(gtsam::Symbol('x', 0), pose0, poseNoise); + poses.push_back(pose0); + cameras.push_back(camera0); + + gtsam::Pose3 pose1( + gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + rotation0), + gtsam::Point3(0, 4, 0)); + gtsam::PinholeCamera camera1(pose1, *K); + graph.emplace_shared>( + gtsam::Symbol('x', 0), gtsam::Symbol('x', 1), pose0.between(pose1), poseNoise); + poses.push_back(pose1); + cameras.push_back(camera1); + + for (size_t i = 0; i < poses.size(); i++) { + initial.insert(gtsam::Symbol('x', i), poses[i]); + } + for (size_t i = 0; i < cube_W.size(); i++) { + initial.insert(gtsam::Symbol('L', i), gtsam::Point3(0, 0, 0)); + for (size_t j = 0; j < poses.size(); j++) { + gtsam::Point2 pixel_p_cube_C = cameras[j].project(cube_W[i]); + if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C, img_width, img_height)) { + graph.emplace_shared>( + pixel_p_cube_C, measurementNoise, gtsam::Symbol('x', j), gtsam::Symbol('L', i), + gtsam::Symbol('K', 0)); + } + } + } + + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + result.print("Optimized Results:\n"); constexpr double TOL = 1e-3; for (size_t i = 0; i < poses.size(); i++) { diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index c46708d17..6e9273eed 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -2,132 +2,72 @@ #include #include -#include +#include #include #include #include #include "Eigen/Core" #include "Eigen/Geometry" +#include "camera_calibration.hh" #include "common/liegroups/se3.hh" -#include "common/time/robot_time.hh" +#include "experimental/learn_descriptors/camera_calibration.hh" namespace robot::experimental::learn_descriptors { struct ImagePoint { - struct GPSData { - struct Uncertainty { - double sigma_lat_mitude; - double sigma_longitude; - double sigma_altitude; - double orientation_deg; - double rms_range_error_m; + virtual ~ImagePoint() = default; - // diagonal latitude, longitude, altitude covariance in meters squared - Eigen::Matrix3d to_LLA_covariance() const { - Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); - cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude; - cov(1, 1) = sigma_longitude * sigma_longitude; - cov(2, 2) = sigma_altitude * sigma_altitude; - return cov; - } - - // Convert lat/lon covariance to ENU at given latitude - Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { - double lat_rad = lat_deg * M_PI / 180.0; - - // meters per degree scaling - double kLat = - 111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad); - double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad); - - Eigen::Matrix3d lla_cov = to_LLA_covariance(); - - // rotate lat/lon covariance (upper-left 2x2) using orientation - double theta = orientation_deg * M_PI / 180.0; - Eigen::Matrix2d R; - R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); - - Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); - Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose(); - - // apply linear scaling to convert degrees to meters in EN - Eigen::Matrix2d J; - J << kLat, 0, 0, kLon; - - Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose(); - - Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); - enu_cov.topLeftCorner<2, 2>() = en_cov; - enu_cov(2, 2) = lla_cov(2, 2); // alt variance unchanged - - return enu_cov; - } - }; - size_t seq; // time in nanoseconds, may differ from image capturetime - double latitude; - double longitude; - std::optional altitude; // meters above sea level - std::optional uncertainty; - }; size_t id; // idx for DB - size_t seq; // time in nanoseconds of image capture (also the name of image) - std::optional - AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop - // closure + etc.) from cam - std::optional AS_w_from_vio_cam; // arbitrary scale world from vio result cam - std::optional gps_gcs; // raw gps measurement in gcs (global cordinate system) - - const std::string to_string() const { - auto se3_to_str = [](const liegroups::SE3& se3) { - const Eigen::Vector3d& t = se3.translation(); - const Eigen::Quaterniond& r = se3.so3().unit_quaternion(); + size_t seq; // time in nanoseconds of image capture (also could be name of image; this is also + // time since unix epoch) + std::shared_ptr K; + void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; }; + virtual std::optional world_from_cam_ground_truth() const { + return std::nullopt; + }; + virtual std::optional cam_in_world() const { return cam_in_world_; }; + virtual std::optional translation_covariance_in_cam() const { + return std::nullopt; + }; + virtual std::string to_string() const { + auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) { std::stringstream ss; - ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n"; + ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]"; + return ss.str(); + }; + auto isom3d_to_str = [&vec3d_to_str](const Eigen::Isometry3d& isom3d) { + const Eigen::Vector3d& t = isom3d.translation(); + const Eigen::Quaterniond r(isom3d.rotation()); + std::stringstream ss; + ss << "\t\tt: " << vec3d_to_str(t) << "\n"; ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]"; return ss.str(); }; std::stringstream ss; ss << "Image Point " << id << ":\n"; - ss << "\tseq: " << seq << "\n"; - ss << "\tAS_w_from_gnss_cam: "; - if (AS_w_from_gnss_cam) { - ss << "\n" << se3_to_str(*AS_w_from_gnss_cam); + ss << "\tworld_from_cam_ground_truth: "; + if (world_from_cam_ground_truth()) { + ss << "\n" << isom3d_to_str(*world_from_cam_ground_truth()); } else { ss << "N/A"; } - ss << "\n\tAS_w_from_vio_cam: "; - if (AS_w_from_vio_cam) { - ss << "\n" << se3_to_str(*AS_w_from_vio_cam); + ss << "\n\tcam_in_world: "; + if (cam_in_world()) { + ss << vec3d_to_str(*cam_in_world()); } else { ss << "N/A"; } - ss << "\n\tgps_gcs: "; - if (gps_gcs) { - ss << "\n\t\tseq: " << gps_gcs->seq; - ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t"; - if (gps_gcs->altitude) { - ss << *(gps_gcs->altitude); - } else { - ss << "alt N/A"; - } - ss << "\n\t\tsigma: "; - if (gps_gcs->uncertainty) { - ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t" - << gps_gcs->uncertainty->sigma_longitude << "\t" - << gps_gcs->uncertainty->sigma_altitude; - } else { - ss << "N/A"; - } + ss << "\n\ttranslation_covariance_in_cam: "; + if (translation_covariance_in_cam()) { + ss << *translation_covariance_in_cam(); } else { ss << "N/A"; } return ss.str(); } - cv::Mat load_image(const std::filesystem::path& img_dir) const { - const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png")); - return cv::imread(path); - } + private: + std::optional cam_in_world_; }; -typedef std::vector ImagePointVector; +using SharedImagePoint = std::shared_ptr; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc new file mode 100644 index 000000000..7c2829e94 --- /dev/null +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -0,0 +1,53 @@ +#include "experimental/learn_descriptors/image_point_four_seasons.hh" + +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/gps/frame_translation.hh" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" + +namespace robot::experimental::learn_descriptors { +ImagePointFourSeasons::~ImagePointFourSeasons() = default; + +std::optional ImagePointFourSeasons::world_from_cam_ground_truth() const { + if (!AS_w_from_gnss_cam) { + return std::nullopt; + } + Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); + scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = + shared_static_transforms->gnss_scale; + Eigen::Isometry3d world_from_cam_ground_truth( + (shared_static_transforms->S_from_AS.matrix() * scale_mat_reference * + AS_w_from_gnss_cam->matrix()) + .matrix()); + return world_from_cam_ground_truth; +} + +std::optional ImagePointFourSeasons::cam_in_world() const { + if (!gps_gcs) return std::nullopt; + const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla( + Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude))); + const Eigen::Vector3d gps_in_world(shared_static_transforms->w_from_gpsw * + shared_static_transforms->e_from_gpsw.inverse() * + gps_in_ECEF); + const Eigen::Isometry3d cam_from_gps( + (shared_static_transforms->cam_from_imu * shared_static_transforms->gps_from_imu.inverse()) + .matrix()); + const Eigen::Vector3d cam_in_world(gps_in_world + cam_from_gps.translation()); + return cam_in_world; +} + +std::optional ImagePointFourSeasons::translation_covariance_in_cam() const { + return gps_covariance_in_world(); +} + +std::optional ImagePointFourSeasons::gps_covariance_in_world() const { + if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt; + const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance()); + const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix()); + const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose()); + return w_covariance; +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh new file mode 100644 index 000000000..95401d885 --- /dev/null +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -0,0 +1,85 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" +#include "experimental/learn_descriptors/gps_data.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class FourSeasonsParser; +struct ImagePointFourSeasons : ImagePoint { + virtual ~ImagePointFourSeasons(); + + // size_t id; // idx for DB + // size_t seq; // time in nanoseconds of image capture (also the name of image) + std::optional + AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop + // closure + etc.) from cam + std::optional AS_w_from_vio_cam; // arbitrary scale world from vio result cam + std::optional gps_gcs; // raw gps measurement in gcs (global cordinate system) + std::shared_ptr shared_static_transforms; + + std::optional velocity_to(const ImagePointFourSeasons& other_img_pt) const { + ROBOT_CHECK(other_img_pt.seq >= seq); + if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt; + double dt_seconds = static_cast(other_img_pt.seq - seq) * 1e-9; + return (*other_img_pt.cam_in_world() - *cam_in_world()) / dt_seconds; + }; + std::optional velocity_from(const ImagePointFourSeasons& other_img_pt) const { + ROBOT_CHECK(seq >= other_img_pt.seq); + if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt; + double dt_seconds = static_cast(seq - other_img_pt.seq) * 1e-9; + return (*cam_in_world() - *other_img_pt.cam_in_world()) / dt_seconds; + }; + + std::optional world_from_cam_ground_truth() const override; + std::optional cam_in_world() const override; + std::optional translation_covariance_in_cam() const override; + std::optional gps_covariance_in_world() const; + + std::string to_string() const override { + auto se3_to_str = [](const liegroups::SE3& se3) { + const Eigen::Vector3d& t = se3.translation(); + const Eigen::Quaterniond& r = se3.so3().unit_quaternion(); + std::stringstream ss; + ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n"; + ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]"; + return ss.str(); + }; + std::stringstream ss; + ss << "Image Point " << id << ":\n"; + ss << "\tseq: " << seq << "\n"; + ss << "\tAS_w_from_gnss_cam: "; + if (AS_w_from_gnss_cam) { + ss << "\n" << se3_to_str(*AS_w_from_gnss_cam); + } else { + ss << "N/A"; + } + ss << "\n\tAS_w_from_vio_cam: "; + if (AS_w_from_vio_cam) { + ss << "\n" << se3_to_str(*AS_w_from_vio_cam); + } else { + ss << "N/A"; + } + ss << "\n" << (gps_gcs ? gps_gcs->to_string() : "GPS Data: N/A"); + return ss.str(); + } + + cv::Mat load_image(const std::filesystem::path& img_dir) const { + const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png")); + return cv::imread(path); + } +}; +typedef std::vector ImagePointFourSeasonsVector; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc index 94ba9833a..f2938510f 100644 --- a/experimental/learn_descriptors/learn_descriptors.cc +++ b/experimental/learn_descriptors/learn_descriptors.cc @@ -1,6 +1,7 @@ #include "experimental/learn_descriptors/learn_descriptors.hh" #include +#include namespace robot::experimental::learn_descriptors { void hello_world(const std::string &in) { std::cout << in << std::endl; } diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc new file mode 100644 index 000000000..93168e118 --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene.cc @@ -0,0 +1,147 @@ +#include "experimental/learn_descriptors/spatial_test_scene.hh" + +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "opencv2/opencv.hpp" + +Eigen::Isometry3d isometry_from_vector(const Eigen::VectorXd &pose_vector) { + ROBOT_CHECK(pose_vector.size() == 6); + Eigen::Isometry3d T = Eigen::Isometry3d::Identity(); + if (pose_vector.tail<3>().norm() > 1e-6) { + T.linear() = Eigen::AngleAxisd(pose_vector.tail<3>().norm(), pose_vector.tail<3>()) + .toRotationMatrix(); + } + T.translation() = pose_vector.head<3>(); + return T; +} + +namespace robot::experimental::learn_descriptors { +template +std::vector SpatialTestScene::get_projected_pixels( + const gtsam::PinholeCamera &camera, const cv::Size &img_size) { + std::vector pixels; + for (size_t i = 0; i < points_.size(); i++) { + const Eigen::Vector3d pt = points_[i]; + Eigen::Vector2d pixel = camera.project(pt); + if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) { + ProjectedPoint projected_pt(i, pixel); + pixels.push_back(projected_pt); + } + } + return pixels; +}; + +template +std::vector> +SpatialTestScene::get_corresponding_pixels( + const std::vector, cv::Size>> &cam_and_img_size) { + std::vector> corresponding_pixels; + for (size_t i = 0; i < points_.size(); i++) { + const Eigen::Vector3d pt = points_[i]; + Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt); + Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt); + if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width, + cam_and_img_size[0].second.height) && + ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width, + cam_and_img_size[1].second.height)) { + corresponding_pixels.emplace_back(ProjectedPoint(i, pixel_cam0), + ProjectedPoint(i, pixel_cam1)); + } + } + return corresponding_pixels; +}; + +void SpatialTestScene::add_point(Eigen::Vector3d point, std::optional point_noise) { + points_groundtruth_.push_back(point); + if (point_noise) { + ROBOT_CHECK(point_noise->dim() == 3); + points_.push_back(point + point_noise->sample()); + } else { + points_.push_back(point); + } +}; + +void SpatialTestScene::add_points(std::vector points, + std::optional point_noise) { + for (const Eigen::Vector3d &point : points) { + points_groundtruth_.push_back(point); + if (point_noise) { + ROBOT_CHECK(point_noise->dim() == 3); + points_.push_back(point + point_noise->sample()); + } else { + points_.push_back(point); + } + } +} + +void SpatialTestScene::add_camera(gtsam::PinholeCamera camera, + std::optional pose_noise) { + cameras_groundtruth_.push_back(camera); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + camera.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()), + camera.calibration())); + } else { + cameras_.push_back(camera); + } +} + +void SpatialTestScene::add_cameras(std::vector> cameras, + std::optional pose_noise) { + for (const gtsam::PinholeCamera &cam : cameras) { + cameras_groundtruth_.push_back(cam); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + cam.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()), + cam.calibration())); + } else { + cameras_.push_back(cam); + } + } +} + +void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, + double max_radius_origin, + std::optional pose_noise, + const gtsam::Cal3_S2 &K) { + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_real_distribution dist_radius(min_radius_origin, max_radius_origin); + std::uniform_real_distribution dist_omega(0.0, 2 * M_PI); + + const Eigen::Isometry3d world_from_cam_default = + Eigen::Translation3d(1.0, 0, 0) * Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ()); + for (int i = 0; i < num_cameras; i++) { + double angle_z = dist_omega(gen); + Eigen::Isometry3d longitude_from_world = Eigen::Isometry3d::Identity(); + longitude_from_world.linear() = + Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix(); + double angle_x = dist_omega(gen); + Eigen::Isometry3d latitude_from_world = Eigen::Isometry3d::Identity(); + latitude_from_world.linear() = + Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix(); + + double radius = dist_radius(gen); + Eigen::Isometry3d world_from_cam(world_from_cam_default); + world_from_cam.translation() *= radius; + world_from_cam = longitude_from_world * latitude_from_world * world_from_cam; + cameras_groundtruth_.push_back( + gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K)); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + gtsam::Pose3( + (world_from_cam * isometry_from_vector(pose_noise->sample())).matrix()), + K)); + } else { + cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K)); + } + } +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh new file mode 100644 index 000000000..4f542223a --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene.hh @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "Eigen/Dense" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class ProjectionHelper { + public: + static constexpr size_t img_width_default = 640; + static constexpr size_t img_height_default = 480; + static constexpr double fx_default = 500.0; + static constexpr double fy_default = fx_default; + static constexpr double cx_default = img_width_default / 2.0; + static constexpr double cy_default = img_height_default / 2.0; + static const gtsam::Cal3_S2::shared_ptr &K_default() { + static gtsam::Cal3_S2::shared_ptr K( + new gtsam::Cal3_S2(fx_default, fy_default, 0.0, cx_default, cy_default)); + return K; + } + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height; + } +}; +class SpatialTestScene { + public: + struct Noise { + Noise(const std::vector &sigmas, std::optional seed = std::nullopt) + : sigmas_(sigmas) { + if (seed) { + gen_.seed(*seed); + } else { + std::random_device rd; + gen_.seed(rd()); + } + + for (double sigma : sigmas_) { + dists_.emplace_back(0.0, sigma); + } + } + + static Noise constrained(size_t dim = 6) { return Noise(std::vector(dim, 0.0)); } + + Eigen::VectorXd sample() { + Eigen::VectorXd result(sigmas_.size()); + for (size_t i = 0; i < sigmas_.size(); ++i) { + result(i) = dists_[i](gen_); + } + return result; + } + + size_t dim() const { return sigmas_.size(); } + + private: + std::vector sigmas_; + std::vector> dists_; + std::mt19937 gen_; + }; + struct ProjectedPoint { + ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel) + : pt_idx(pt_idx), pixel(pixel){}; + size_t pt_idx; + Eigen::Vector2d pixel; + }; + SpatialTestScene() = default; + SpatialTestScene(std::vector points) : points_(points){}; + template + std::vector get_projected_pixels(const gtsam::PinholeCamera &camera, + const cv::Size &img_size); + template + std::vector> get_corresponding_pixels( + const std::vector, cv::Size>> + &cam_and_img_size); // make this parameter of size 2 + + void add_point(Eigen::Vector3d point, std::optional point_noise = std::nullopt); + void add_points(std::vector points, + std::optional point_noise = std::nullopt); + void add_camera(gtsam::PinholeCamera camera, + std::optional pose_noise = std::nullopt); + void add_cameras(std::vector> cameras, + std::optional pose_noise = std::nullopt); + void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, + double max_radius_origin, + std::optional pose_noise = std::nullopt, + const gtsam::Cal3_S2 &K = *ProjectionHelper::K_default()); + + const std::vector &points() { return points_; }; + const std::vector &points_groundtruth() { return points_groundtruth_; }; + const std::vector> &cameras() { return cameras_; }; + const std::vector> &cameras_groundtruth() { + return cameras_groundtruth_; + }; + + protected: + std::vector points_; + std::vector points_groundtruth_; + std::vector> cameras_; + std::vector> cameras_groundtruth_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh new file mode 100644 index 000000000..0d74262ac --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh @@ -0,0 +1,20 @@ +#pragma once + +#include "Eigen/Core" +#include "experimental/learn_descriptors/spatial_test_scene.hh" + +namespace robot::experimental::learn_descriptors { +class SpatialTestSceneCube : public SpatialTestScene { + public: + SpatialTestSceneCube(const float cube_size) : SpatialTestScene() { + add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2)); + }; +}; +} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc new file mode 100644 index 000000000..5ba850db7 --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc @@ -0,0 +1,46 @@ +#include +#include + +#include "Eigen/Dense" +#include "common/check.hh" +#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "visualization/opencv/opencv_viz.hh" + +using namespace robot::experimental::learn_descriptors; + +int main() { + SpatialTestSceneCube test_scene(1.0f); + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = static_cast(img_width) / 2.0; + const double cy = static_cast(img_height) / 2.0; + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + + SpatialTestScene::Noise pose_noise({0.05, 0.05, 0.05, 0.15, 0.15, 0.15}); + test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, pose_noise, *K); + + std::vector> cameras = test_scene.cameras(); + std::vector> cameras_grndtrth = + test_scene.cameras_groundtruth(); + ROBOT_CHECK(cameras.size() == cameras_grndtrth.size()); + std::vector viz_cam_poses; + viz_cam_poses.reserve(cameras.size() + cameras_grndtrth.size()); + for (size_t i = 0; i < cameras.size(); i++) { + viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras[i].pose().matrix()), + "cam_" + std::to_string(i)); + viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras_grndtrth[i].pose().matrix()), + "cam_grnd_" + std::to_string(i)); + } + ROBOT_CHECK(test_scene.points().size() == test_scene.points_groundtruth().size()); + std::vector viz_points; + for (size_t i = 0; i < test_scene.points().size(); i++) { + viz_points.emplace_back(test_scene.points()[i], "pt_" + std::to_string(i)); + viz_points.emplace_back(test_scene.points_groundtruth()[i], "pt_grnd_" + std::to_string(i)); + } + robot::visualization::viz_scene(viz_cam_poses, viz_points); +} \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc new file mode 100644 index 000000000..ae46c2966 --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -0,0 +1,84 @@ +#include "experimental/learn_descriptors/structure_from_motion.hh" + +#include +#include +#include +#include +#include + +#include "common/check.hh" +#include "common/geometry/camera.hh" +#include "common/geometry/translate_types.hh" +#include "experimental/learn_descriptors/backend.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "gtsam/geometry/Point2.h" +#include "opencv2/opencv.hpp" +#include "opencv2/viz.hpp" +#include "visualization/opencv/opencv_viz.hh" + +namespace robot::experimental::learn_descriptors { +void StructureFromMotion::add_image_point(const cv::Mat &img_undistorted, + const SharedImagePoint &image_point) { + frontend_.add_image(ImageAndPoint{img_undistorted, image_point}); +} +void StructureFromMotion::add_image_points( + const std::vector &imgs_undistorted, + const std::vector &shared_image_points) { + ROBOT_CHECK(imgs_undistorted.size() == shared_image_points.size()); + std::vector img_and_pts; + img_and_pts.reserve(shared_image_points.size()); + for (size_t i = 0; i < shared_image_points.size(); i++) { + img_and_pts.emplace_back(imgs_undistorted[i], shared_image_points[i]); + } + frontend_.add_images(img_and_pts); +} + +void StructureFromMotion::solve_structure( + const int num_steps, std::optional iter_debug_func) { + // FRONTEND + frontend_.populate_frames(); + ROBOT_CHECK(frontend_.frames().size() > 2); + frontend_.match_frames_and_build_tracks(); + ROBOT_CHECK(frontend_.frames().front()->world_from_cam_groundtruth_, + "Assuming that our first frame has groundtruth for now.", + *frontend_.frames().front()->world_from_cam_groundtruth_); + // align rotation via the initial guess + frontend_.frames().front()->world_from_cam_initial_guess_ = + frontend_.frames().front()->world_from_cam_groundtruth_->rotation(); + + // BACKEND + backend_.clear(); + backend_.add_frames(frontend_.frames()); + backend_.calculate_initial_values(); + backend_.populate_graph(frontend_.feature_tracks()); + graph_values(backend_.current_initial_values(), "viz init values"); + backend_.solve_graph(10, iter_debug_func); +} + +void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name, + const cv::viz::Color color) { + std::vector final_poses; + std::vector final_lmks; + for (const auto &key_value : values) { + gtsam::Key key = key_value.key; + gtsam::Symbol symbol(key); + try { + const gtsam::Pose3 &pose = values.at(key); + final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string()); + continue; + } catch (const gtsam::ValuesIncorrectType &) { + } + + try { + const gtsam::Point3 &pt = values.at(key); + final_lmks.emplace_back(pt, symbol.string()); + continue; + } catch (const gtsam::ValuesIncorrectType &) { + } + + std::cerr << "Key " << symbol << " is neither Pose3 nor Point3.\n"; + } + robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh new file mode 100644 index 000000000..93ffe0ddb --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Eigen/Dense" +#include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/backend.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/nonlinear/Values.h" +#include "opencv2/opencv.hpp" +#include "opencv2/viz.hpp" + +namespace robot::experimental::learn_descriptors { +class StructureFromMotion { + public: + StructureFromMotion(FrontendParams frontend_params) : frontend_(Frontend(frontend_params)){}; + + static void graph_values(const gtsam::Values &values, + const std::string &window_name = "Viz Structure", + const cv::viz::Color color = cv::viz::Color::brown()); + + void add_image_point(const cv::Mat &img_undistorted, const SharedImagePoint &image_point); + void add_image_points(const std::vector &imgs_undistorted, + const std::vector &image_points); + void solve_structure( + const int num_steps, + std::optional iter_debug_func = std::nullopt); + const gtsam::Values &result() { return backend_.result(); }; + + Frontend frontend() { return frontend_; }; + Backend backend() { return backend_; } + + private: + Frontend frontend_; + Backend backend_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc new file mode 100644 index 000000000..2dce0047e --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion_example.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "cxxopts.hpp" +#include "experimental/learn_descriptors/backend.hh" +#include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/image_point_four_seasons.hh" +#include "experimental/learn_descriptors/structure_from_motion.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "opencv2/opencv.hpp" +#include "visualization/opencv/opencv_viz.hh" + +int main(int argc, const char **argv) { + using namespace robot::experimental::learn_descriptors; + // clang-format off + cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); + options.add_options() + ("data_dir", "Path to dataset root directory", cxxopts::value()) + ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) + ("help", "Print usage"); + // clang-format on + + auto args = options.parse(argc, argv); + + const auto check_required = [&](const std::string &opt) { + if (args.count(opt) == 0) { + std::cout << "Missing " << opt << " argument" << std::endl; + std::cout << options.help() << std::endl; + std::exit(1); + } + }; + + if (args.count("help")) { + std::cout << options.help() << std::endl; + return 0; + } + + check_required("data_dir"); + check_required("calibration_dir"); + + const std::filesystem::path path_data = args["data_dir"].as(); + const std::filesystem::path path_calibration = args["calibration_dir"].as(); + FourSeasonsParser parser(path_data, path_calibration); + + FrontendParams frontend_params{FrontendParams::ExtractorType::SIFT, + FrontendParams::MatcherType::KNN, true, false}; + StructureFromMotion sfm(frontend_params); + + // short sequences (maybe about a doezen points that have decent overlap and aren't spaced too + // far apart on the order of maybe a meter) perform ok. many improvements needed for the overall + // system + for (size_t i = 835; i < 855; i += 2) { + const ImagePointFourSeasons img_pt = parser.get_image_point(i); + sfm.add_image_point(parser.load_image(i), std::make_shared(img_pt)); + } + + using epoch = size_t; + Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals, + const epoch iter) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + StructureFromMotion::graph_values(vals, window_name); + }; + // passing graph_itr_debug_func will execute the function after each optimization epoch + // sfm.solve_structure(10, graph_itr_debug_func); + sfm.solve_structure(10); + StructureFromMotion::graph_values(sfm.result()); +} \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh new file mode 100644 index 000000000..e8d0595bc --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion_types.hh @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +#include "gtsam/geometry/Point3.h" +#include "opencv2/opencv.hpp" + +using Timestamp = size_t; + +using FrameId = size_t; // Frame id is used as the index of gtsam symbol + // (not as a gtsam key). +using LandmarkId = size_t; +using LandmarkIds = std::vector; +using Landmark = gtsam::Point3; +using Landmarks = std::vector>; + +using KeypointCV = cv::Point2f; +using KeypointsCV = std::vector; diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 5f93ab174..f06ebd116 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -5,11 +5,72 @@ #include "common/check.hh" namespace robot::experimental::learn_descriptors { +const Eigen::Vector3d DataParser::t_boat_cam(-0.430, 0, 0.255); + +const Eigen::Isometry3d DataParser::T_boat_gps( + (Eigen::Matrix4d() << 1, 0, 0, 0.080, 0, 1, 0, -0.1183, 0, 0, 1, 0.010, 0, 0, 0, 1).finished()); + +const Eigen::Isometry3d DataParser::T_boat_imu = []() { + Eigen::Isometry3d T_boat_imu; + T_boat_imu.translation() = Eigen::Vector3d(-0.494, 0, -0.089); + T_boat_imu.linear() = Eigen::AngleAxisd(M_PI, Eigen::Vector3d::UnitX()).matrix(); + return T_boat_imu; +}(); + DataParser::DataParser(const std::filesystem::path &image_root_dir, - const std::vector &survey_list) { + const std::vector &survey_list) + : image_root_dir_(image_root_dir), survey_list_(survey_list) { ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!", image_root_dir); surveys_.load(image_root_dir.string(), survey_list); } DataParser::~DataParser() {} + +const Eigen::Isometry3d DataParser::get_boat_from_camera( + const symphony_lake_dataset::ImagePoint &img_pt) { + return get_boat_from_camera(img_pt.pan, img_pt.tilt); +} +const Eigen::Isometry3d DataParser::get_boat_from_camera(double theta_pan, double theta_tilt) { + Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan + Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt + Eigen::Matrix3d R_boat_gimbal(R_z * R_y); + + Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())) + .matrix(); + Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera); + + Eigen::Isometry3d T_boat_cam; + T_boat_cam.translation() = t_boat_cam; + T_boat_cam.linear() = R_boat_camera; + return T_boat_cam; +} + +const Eigen::Isometry3d DataParser::get_world_from_gps( + const symphony_lake_dataset::ImagePoint &img_pt) { + Eigen::Isometry3d world_from_gps; + // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However, + // because of the alignment of the imu and gps, this rotation math holds. + world_from_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix(); + world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + return world_from_gps; +} + +const Eigen::Isometry3d DataParser::get_world_from_boat( + const symphony_lake_dataset::ImagePoint &img_pt) { + // Eigen::Isometry3d world_from_gps; + + // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta); + // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps; + // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear(); + + // world_from_gps.linear() = R_world_gps; + // world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + + // Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse(); + + Eigen::Isometry3d world_from_gps = get_world_from_gps(img_pt); + Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse(); + return world_from_boat; +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 021ebd3f2..536771d06 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -1,21 +1,75 @@ #pragma once -#pragma once -#include - +#include #include +#include +#include +#include +#include "Eigen/Dense" +#include "symphony_lake_dataset/ImagePoint.h" #include "symphony_lake_dataset/SurveyVector.h" namespace robot::experimental::learn_descriptors { +class SymphonyLakeCamParams { + public: + constexpr static double fx = 759.308, fy = 690.44; + constexpr static double px = 370.915, py = 250.909; + + constexpr static double k1 = -0.302805, k2 = 0.171088, k3 = 0.0; + constexpr static double p1 = 0.001151, p2 = -0.00038; +}; class DataParser { public: + static const Eigen::Vector3d t_boat_cam; + static const Eigen::Isometry3d T_boat_gps; + static const Eigen::Isometry3d T_boat_imu; + DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list); ~DataParser(); + // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, + // bool use_compass = false); + static const Eigen::Isometry3d get_boat_from_camera( + const symphony_lake_dataset::ImagePoint &img_pt); + static const Eigen::Isometry3d get_boat_from_camera(double theta_pan, double theta_tilt); + static const Eigen::Isometry3d get_world_from_gps( + const symphony_lake_dataset::ImagePoint &img_pt); + /// @brief get_world_from_boat assuming z_axis_boat dot z_axis_world ~ -1 + /// @param img_pt + /// @return world_from_boat + static const Eigen::Isometry3d get_world_from_boat( + const symphony_lake_dataset::ImagePoint &img_pt); + const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; private: + std::filesystem::path image_root_dir_; + std::vector survey_list_; symphony_lake_dataset::SurveyVector surveys_; }; +class SymphonyLakeDatasetTestHelper { + public: + static constexpr const char *test_image_root_dir = + "external/symphony_lake_snippet/symphony_lake"; + static constexpr std::array test_survey_list = {"140106_snippet"}; + static bool images_equal(cv::Mat img1, cv::Mat img2) { + if (img1.size() != img2.size() || img1.type() != img2.type()) { + return false; + } + cv::Mat diff; + cv::absdiff(img1, img2, diff); + diff = diff.reshape(1); + return cv::countNonZero(diff) == 0; + }; + static DataParser get_test_parser() { + return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); + }; + static std::filesystem::path get_test_iamge_root_dir() { + return std::filesystem::path(test_image_root_dir); + }; + static std::vector get_test_survey_list() { + return std::vector(test_survey_list.begin(), test_survey_list.end()); + }; +}; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 93caa7a10..a8c309c4e 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -6,31 +6,17 @@ #include #include +#include "common/check.hh" #include "gtest/gtest.h" #include "opencv2/opencv.hpp" +#include "visualization/opencv/opencv_viz.hh" namespace robot::experimental::learn_descriptors { namespace { bool is_test() { return std::getenv("BAZEL_TEST") != nullptr; } } // namespace - -class SymphonyLakeDatasetTestHelper { - public: - static bool images_equal(cv::Mat img1, cv::Mat img2) { - if (img1.size() != img2.size() || img1.type() != img2.type()) { - return false; - } - cv::Mat diff; - cv::absdiff(img1, img2, diff); - diff = diff.reshape(1); - return cv::countNonZero(diff) == 0; - } -}; TEST(SymphonyLakeParserTest, snippet_140106) { - const std::filesystem::path image_root_dir = "external/symphony_lake_snippet/symphony_lake"; - const std::vector survey_list{"140106_snippet"}; - - DataParser data_parser = DataParser(image_root_dir, survey_list); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); @@ -43,6 +29,9 @@ TEST(SymphonyLakeParserTest, snippet_140106) { for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { const symphony_lake_dataset::Survey &survey = survey_vector.get(i); for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + // img_point contains all the csv data entries + const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j); + (void)img_point; // suppress unused variable image = survey.loadImageByImageIndex(j); // get the target image @@ -52,15 +41,87 @@ TEST(SymphonyLakeParserTest, snippet_140106) { std::string target_img_name_str = target_img_name.str(); target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, ""); std::filesystem::path target_img_dir = - image_root_dir / survey_list[i] / "0027" / target_img_name_str; + SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / + SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / + target_img_name_str; target_img = cv::imread(target_img_dir.string()); EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img)); - if (!is_test()) { - cv::imshow("Symphony Dataset Image", image); - cv::waitKey(2); - } + // if (!is_test()) { + // cv::imshow("Symphony Dataset Image", image); + // cv::waitKey(2); + // } + // cv::imshow("Symphony Dataset Image", image); + // cv::waitKey(200); + } + } +} + +TEST(SymphonyLakeParserTest, test_cam_frames) { + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); + } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point_first = + survey.getImagePoint(indices.front()); + + std::vector cam_frames; + + // NOTE: the world in these images is east, north, up centered at boat0 translation + Eigen::Vector3d t_world_boat0 = + DataParser::get_world_from_boat(image_point_first).translation(); + + for (size_t i = 0; i < indices.size(); i++) { + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); + Eigen::Isometry3d T_world_boatidx = DataParser::get_world_from_boat(img_pt); + Eigen::Isometry3d T_boatidx_camidx = + DataParser::get_boat_from_camera(img_pt); // current boat to current camera + + Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx; + T_world_camidx.translation() -= t_world_boat0; + + cam_frames.push_back(T_world_camidx); + } + + // geometry::viz_scene(cam_frames, std::vector(), cv::viz::Color::black(), + // true, true, "test_cam_frames"); +} + +TEST(SymphonyLakeParserTest, test_gps_frames) { + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point_first = + survey.getImagePoint(indices.front()); + + std::vector gps_frames; + + // NOTE: the world in these images is east, north, up centered at boat0 translation + Eigen::Vector3d t_world_boat0 = + DataParser::get_world_from_boat(image_point_first).translation(); + // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0); + + for (size_t i = 0; i < indices.size(); i++) { + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); + Eigen::Isometry3d T_world_gpsidx = DataParser::get_world_from_gps(img_pt); + T_world_gpsidx.translation() -= t_world_boat0; + gps_frames.push_back(T_world_gpsidx); } + + // geometry::viz_scene(gps_frames, std::vector(), cv::viz::Color::black(), + // true, true, "test_gps_frames"); } } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc deleted file mode 100644 index 360473208..000000000 --- a/experimental/learn_descriptors/visual_odometry.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include "experimental/learn_descriptors/visual_odometry.hh" - -namespace robot::experimental::learn_descriptors { -VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor, - Frontend::MatcherType frontend_matcher) { - frontend_ = Frontend(frontend_extractor, frontend_matcher); -} - -Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { - extractor_type_ = frontend_algorithm; - matcher_type_ = frontend_matcher; - - switch (extractor_type_) { - case ExtractorType::SIFT: - feature_extractor_ = cv::SIFT::create(); - break; - case ExtractorType::ORB: - feature_extractor_ = cv::ORB::create(); - break; - default: - // Error handling needed? - break; - } - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::KNN: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::FLANN: - if (frontend_algorithm == ExtractorType::ORB) { - throw std::invalid_argument("FLANN can not be used with ORB."); - } - descriptor_matcher_ = cv::FlannBasedMatcher::create(); - break; - default: - // Error handling needed? - break; - } -} - -std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( - const cv::Mat &img) const { - std::vector keypoints; - cv::Mat descriptors; - switch (extractor_type_) { - default: // the opencv extractors have the same function signature - feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); - break; - } - return std::pair, cv::Mat>(keypoints, descriptors); -} - -std::vector Frontend::get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const { - std::vector matches; - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - get_brute_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::KNN: - get_KNN_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::FLANN: - get_FLANN_matches(descriptors1, descriptors2, matches); - break; - default: - break; - } - std::sort(matches.begin(), matches.end()); - return matches; -} - -bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::BRUTE_FORCE) { - return false; - } - matches_out.clear(); - descriptor_matcher_->match(descriptors1, descriptors2, matches_out); - return true; -} - -bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::KNN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} - -bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::FLANN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh deleted file mode 100644 index b64434d2f..000000000 --- a/experimental/learn_descriptors/visual_odometry.hh +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include - -namespace robot::experimental::learn_descriptors { -class Frontend { - public: - enum class ExtractorType { SIFT, ORB }; - enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; - - Frontend(){}; - Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); - ~Frontend(){}; - - ExtractorType get_extractor_type() const { return extractor_type_; }; - MatcherType get_matcher_type() const { return matcher_type_; }; - - std::pair, cv::Mat> get_keypoints_and_descriptors( - const cv::Mat &img) const; - std::vector get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const; - - static void draw_keypoints(const cv::Mat &img, std::vector keypoints, - cv::Mat img_keypoints_out) { - cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), - cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - } - static void draw_matches(const cv::Mat &img1, std::vector keypoints1, - const cv::Mat &img2, std::vector keypoints2, - std::vector matches, cv::Mat img_matches_out) { - cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); - } - - private: - bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - ExtractorType extractor_type_; - MatcherType matcher_type_; - - cv::Ptr feature_extractor_; - cv::Ptr descriptor_matcher_; -}; -class VisualOdometry { - public: - VisualOdometry(Frontend::ExtractorType frontend_extractor, - Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); - ~VisualOdometry(){}; - - private: - cv::Mat prev_image_; - - Frontend frontend_; -}; -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/visual_odometry_test.cc deleted file mode 100644 index b1803317e..000000000 --- a/experimental/learn_descriptors/visual_odometry_test.cc +++ /dev/null @@ -1,112 +0,0 @@ -#include "experimental/learn_descriptors/visual_odometry.hh" - -#include -#include - -#include "gtest/gtest.h" - -namespace robot::experimental::learn_descriptors { -TEST(VIO_TEST, frontend_pipeline_sweep) { - const size_t width = 640; - const size_t height = 480; - - const size_t pixel_shift_x = 20; - const size_t PIXEL_COMP_TOL = 20; - - cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3); - cv::Mat image_2; - - cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); - cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; - float com_x = 0.0f, com_y = 0.0f; - for (const cv::Point& rect_point : rect_points) { - com_x += rect_point.x; - com_y += rect_point.y; - } - com_x *= 0.25; - com_y *= 0.25; - cv::Point rotation_center(com_x, com_y); - cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0); - - const size_t line_spacing = 100; - for (size_t i = 0; i <= width / line_spacing; i++) { - size_t x = i * line_spacing + (width % line_spacing) / 2; - size_t b = i * 255.0 / (width / line_spacing); - size_t g = 255.0 - i * 255.0 / (width / line_spacing); - cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2); - } - for (size_t i = 0; i <= height / line_spacing; i++) { - size_t y = i * line_spacing + (height % line_spacing) / 2; - size_t b = i * 255.0 / (width / line_spacing); - size_t g = 255.0 - i * 255.0 / (width / line_spacing); - cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2); - } - - cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); - cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); - - // cv::Mat img_test_disp; - // cv::hconcat(image_1, image_2, img_test_disp); - // cv::imshow("Test", img_test_disp); - // cv::waitKey(1000); - - Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, - Frontend::ExtractorType::ORB}; - Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE, - Frontend::MatcherType::FLANN, - Frontend::MatcherType::KNN}; - - Frontend frontend; - std::pair, cv::Mat> keypoints_descriptors_pair_1; - std::pair, cv::Mat> keypoints_descriptors_pair_2; - std::vector matches; - cv::Mat img_keypoints_out_1(height, width, CV_8UC3), - img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); - // cv::Mat img_display_test; - for (Frontend::ExtractorType extractor_type : extractor_types) { - for (Frontend::MatcherType matcher_type : matcher_types) { - printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), - static_cast(matcher_type)); - try { - frontend = Frontend(extractor_type, matcher_type); - } catch (const std::invalid_argument& e) { - assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... - continue; - } - keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1); - keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2); - matches = frontend.get_matches(keypoints_descriptors_pair_1.second, - keypoints_descriptors_pair_2.second); - frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, - img_keypoints_out_1); - frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, - img_keypoints_out_2); - frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, - keypoints_descriptors_pair_2.first, matches, img_matches_out); - // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - // cv::vconcat(img_display_test, img_matches_out, img_display_test); - // std::stringstream text; - // text << "Extractor " << static_cast(extractor_type) << ", matcher " - // << static_cast(matcher_type); - // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), - // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); - // cv::imshow("Keypoints and Matches Output.", img_display_test); - // std::cout << "Press spacebar to pause." << std::endl; - // while (cv::waitKey(1000) == 32) { - // } - printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), - static_cast(matcher_type)); - if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now - for (const cv::DMatch match : matches) { - EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - - keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, - pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL); - EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y - - keypoints_descriptors_pair_1.first[match.queryIdx].pt.y, - 0, PIXEL_COMP_TOL); - } - } - } - } -} -} // namespace robot::experimental::learn_descriptors diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc index 406def4b2..06c46da1f 100644 --- a/visualization/opencv/opencv_viz.cc +++ b/visualization/opencv/opencv_viz.cc @@ -2,10 +2,13 @@ #include +#include "Eigen/Core" +#include "Eigen/Geometry" #include "common/geometry/translate_types.hh" #include "opencv2/viz.hpp" -namespace robot::geometry { +using namespace robot::geometry; +namespace robot::visualization { cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { // Ensure R is a valid rotation matrix CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); @@ -109,4 +112,4 @@ void viz_scene(const std::vector &world_from_poses, window.spin(); } -} // namespace robot::geometry +} // namespace robot::visualization diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh index 28dbc197b..5f5956b43 100644 --- a/visualization/opencv/opencv_viz.hh +++ b/visualization/opencv/opencv_viz.hh @@ -7,7 +7,7 @@ #include "Eigen/Geometry" #include "opencv2/viz.hpp" -namespace robot::geometry { +namespace robot::visualization { struct VizPose { const Eigen::Isometry3d world_from_pose; const std::optional label; @@ -27,4 +27,4 @@ void viz_scene(const std::vector &world_from_poses, const cv::viz::Color color_background = cv::viz::Color::black(), const bool show_grid = true, const bool show_origin = true, const std::string &window_name = "Viz Window", const double text_scale = 0.1); -} // namespace robot::geometry \ No newline at end of file +} // namespace robot::visualization \ No newline at end of file diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc index 502f8e4f4..832ce2878 100644 --- a/visualization/opencv/opencv_viz_test.cc +++ b/visualization/opencv/opencv_viz_test.cc @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include "opencv2/viz.hpp" -namespace robot::geometry { +namespace robot::visualization { TEST(OpencvVizTest, demo) { cv::viz::Viz3d window("My Window"); @@ -111,4 +111,4 @@ TEST(OpencvVizTest, cube_test_labeled) { viz_scene(world_from_cams, cube_points_in_world); } -} // namespace robot::geometry \ No newline at end of file +} // namespace robot::visualization \ No newline at end of file